1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PPCISelLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCISelLowering.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPCCallingConv.h" 17 #include "PPCCCState.h" 18 #include "PPCMachineFunctionInfo.h" 19 #include "PPCPerfectShuffle.h" 20 #include "PPCTargetMachine.h" 21 #include "PPCTargetObjectFile.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/ADT/StringSwitch.h" 25 #include "llvm/ADT/Triple.h" 26 #include "llvm/CodeGen/CallingConvLower.h" 27 #include "llvm/CodeGen/MachineFrameInfo.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineJumpTableInfo.h" 31 #include "llvm/CodeGen/MachineLoopInfo.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/SelectionDAG.h" 34 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 35 #include "llvm/IR/CallingConv.h" 36 #include "llvm/IR/Constants.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/IR/Intrinsics.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/Format.h" 43 #include "llvm/Support/MathExtras.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include "llvm/Target/TargetOptions.h" 46 #include <list> 47 48 using namespace llvm; 49 50 #define DEBUG_TYPE "ppc-lowering" 51 52 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 53 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 54 55 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 56 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 57 58 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 59 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 60 61 static cl::opt<bool> DisableSCO("disable-ppc-sco", 62 cl::desc("disable sibling call optimization on ppc"), cl::Hidden); 63 64 STATISTIC(NumTailCalls, "Number of tail calls"); 65 STATISTIC(NumSiblingCalls, "Number of sibling calls"); 66 67 // FIXME: Remove this once the bug has been fixed! 68 extern cl::opt<bool> ANDIGlueBug; 69 70 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, 71 const PPCSubtarget &STI) 72 : TargetLowering(TM), Subtarget(STI) { 73 // Use _setjmp/_longjmp instead of setjmp/longjmp. 74 setUseUnderscoreSetJmp(true); 75 setUseUnderscoreLongJmp(true); 76 77 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 78 // arguments are at least 4/8 bytes aligned. 79 bool isPPC64 = Subtarget.isPPC64(); 80 setMinStackArgumentAlignment(isPPC64 ? 8:4); 81 82 // Set up the register classes. 83 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 84 if (!useSoftFloat()) { 85 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 86 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 87 } 88 89 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 90 for (MVT VT : MVT::integer_valuetypes()) { 91 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 92 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 93 } 94 95 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 96 97 // PowerPC has pre-inc load and store's. 98 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 99 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 100 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 101 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 102 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 103 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); 104 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); 105 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 106 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 107 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 108 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 109 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 110 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); 111 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); 112 113 if (Subtarget.useCRBits()) { 114 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 115 116 if (isPPC64 || Subtarget.hasFPCVT()) { 117 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 118 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 119 isPPC64 ? MVT::i64 : MVT::i32); 120 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 121 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, 122 isPPC64 ? MVT::i64 : MVT::i32); 123 } else { 124 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 125 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 126 } 127 128 // PowerPC does not support direct load / store of condition registers 129 setOperationAction(ISD::LOAD, MVT::i1, Custom); 130 setOperationAction(ISD::STORE, MVT::i1, Custom); 131 132 // FIXME: Remove this once the ANDI glue bug is fixed: 133 if (ANDIGlueBug) 134 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 135 136 for (MVT VT : MVT::integer_valuetypes()) { 137 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 139 setTruncStoreAction(VT, MVT::i1, Expand); 140 } 141 142 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 143 } 144 145 // This is used in the ppcf128->int sequence. Note it has different semantics 146 // from FP_ROUND: that rounds to nearest, this rounds to zero. 147 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 148 149 // We do not currently implement these libm ops for PowerPC. 150 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 151 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 152 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 153 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 154 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 155 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 156 157 // PowerPC has no SREM/UREM instructions 158 setOperationAction(ISD::SREM, MVT::i32, Expand); 159 setOperationAction(ISD::UREM, MVT::i32, Expand); 160 setOperationAction(ISD::SREM, MVT::i64, Expand); 161 setOperationAction(ISD::UREM, MVT::i64, Expand); 162 163 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 164 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 165 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 166 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 167 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 168 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 169 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 170 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 171 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 172 173 // We don't support sin/cos/sqrt/fmod/pow 174 setOperationAction(ISD::FSIN , MVT::f64, Expand); 175 setOperationAction(ISD::FCOS , MVT::f64, Expand); 176 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 177 setOperationAction(ISD::FREM , MVT::f64, Expand); 178 setOperationAction(ISD::FPOW , MVT::f64, Expand); 179 setOperationAction(ISD::FMA , MVT::f64, Legal); 180 setOperationAction(ISD::FSIN , MVT::f32, Expand); 181 setOperationAction(ISD::FCOS , MVT::f32, Expand); 182 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 183 setOperationAction(ISD::FREM , MVT::f32, Expand); 184 setOperationAction(ISD::FPOW , MVT::f32, Expand); 185 setOperationAction(ISD::FMA , MVT::f32, Legal); 186 187 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 188 189 // If we're enabling GP optimizations, use hardware square root 190 if (!Subtarget.hasFSQRT() && 191 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && 192 Subtarget.hasFRE())) 193 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 194 195 if (!Subtarget.hasFSQRT() && 196 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && 197 Subtarget.hasFRES())) 198 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 199 200 if (Subtarget.hasFCPSGN()) { 201 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 202 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 203 } else { 204 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 205 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 206 } 207 208 if (Subtarget.hasFPRND()) { 209 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 210 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 211 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 212 setOperationAction(ISD::FROUND, MVT::f64, Legal); 213 214 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 215 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 216 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 217 setOperationAction(ISD::FROUND, MVT::f32, Legal); 218 } 219 220 // PowerPC does not have BSWAP 221 // CTPOP or CTTZ were introduced in P8/P9 respectivelly 222 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 223 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 224 if (Subtarget.isISA3_0()) { 225 setOperationAction(ISD::CTTZ , MVT::i32 , Legal); 226 setOperationAction(ISD::CTTZ , MVT::i64 , Legal); 227 } else { 228 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 229 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 230 } 231 232 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { 233 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 234 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 235 } else { 236 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 237 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 238 } 239 240 // PowerPC does not have ROTR 241 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 242 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 243 244 if (!Subtarget.useCRBits()) { 245 // PowerPC does not have Select 246 setOperationAction(ISD::SELECT, MVT::i32, Expand); 247 setOperationAction(ISD::SELECT, MVT::i64, Expand); 248 setOperationAction(ISD::SELECT, MVT::f32, Expand); 249 setOperationAction(ISD::SELECT, MVT::f64, Expand); 250 } 251 252 // PowerPC wants to turn select_cc of FP into fsel when possible. 253 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 254 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 255 256 // PowerPC wants to optimize integer setcc a bit 257 if (!Subtarget.useCRBits()) 258 setOperationAction(ISD::SETCC, MVT::i32, Custom); 259 260 // PowerPC does not have BRCOND which requires SetCC 261 if (!Subtarget.useCRBits()) 262 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 263 264 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 265 266 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 267 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 268 269 // PowerPC does not have [U|S]INT_TO_FP 270 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 271 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 272 273 if (Subtarget.hasDirectMove() && isPPC64) { 274 setOperationAction(ISD::BITCAST, MVT::f32, Legal); 275 setOperationAction(ISD::BITCAST, MVT::i32, Legal); 276 setOperationAction(ISD::BITCAST, MVT::i64, Legal); 277 setOperationAction(ISD::BITCAST, MVT::f64, Legal); 278 } else { 279 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 280 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 281 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 282 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 283 } 284 285 // We cannot sextinreg(i1). Expand to shifts. 286 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 287 288 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 289 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 290 // support continuation, user-level threading, and etc.. As a result, no 291 // other SjLj exception interfaces are implemented and please don't build 292 // your own exception handling based on them. 293 // LLVM/Clang supports zero-cost DWARF exception handling. 294 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 295 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 296 297 // We want to legalize GlobalAddress and ConstantPool nodes into the 298 // appropriate instructions to materialize the address. 299 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 300 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 301 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 302 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 303 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 304 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 305 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 306 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 307 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 308 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 309 310 // TRAP is legal. 311 setOperationAction(ISD::TRAP, MVT::Other, Legal); 312 313 // TRAMPOLINE is custom lowered. 314 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 315 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 316 317 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 318 setOperationAction(ISD::VASTART , MVT::Other, Custom); 319 320 if (Subtarget.isSVR4ABI()) { 321 if (isPPC64) { 322 // VAARG always uses double-word chunks, so promote anything smaller. 323 setOperationAction(ISD::VAARG, MVT::i1, Promote); 324 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 325 setOperationAction(ISD::VAARG, MVT::i8, Promote); 326 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 327 setOperationAction(ISD::VAARG, MVT::i16, Promote); 328 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 329 setOperationAction(ISD::VAARG, MVT::i32, Promote); 330 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 331 setOperationAction(ISD::VAARG, MVT::Other, Expand); 332 } else { 333 // VAARG is custom lowered with the 32-bit SVR4 ABI. 334 setOperationAction(ISD::VAARG, MVT::Other, Custom); 335 setOperationAction(ISD::VAARG, MVT::i64, Custom); 336 } 337 } else 338 setOperationAction(ISD::VAARG, MVT::Other, Expand); 339 340 if (Subtarget.isSVR4ABI() && !isPPC64) 341 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 342 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 343 else 344 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 345 346 // Use the default implementation. 347 setOperationAction(ISD::VAEND , MVT::Other, Expand); 348 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 349 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 350 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 351 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 352 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); 353 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); 354 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 355 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 356 357 // We want to custom lower some of our intrinsics. 358 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 359 360 // To handle counter-based loop conditions. 361 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 362 363 // Comparisons that require checking two conditions. 364 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 365 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 366 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 367 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 368 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 369 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 370 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 371 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 372 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 373 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 374 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 375 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 376 377 if (Subtarget.has64BitSupport()) { 378 // They also have instructions for converting between i64 and fp. 379 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 380 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 381 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 382 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 383 // This is just the low 32 bits of a (signed) fp->i64 conversion. 384 // We cannot do this with Promote because i64 is not a legal type. 385 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 386 387 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 388 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 389 } else { 390 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 391 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 392 } 393 394 // With the instructions enabled under FPCVT, we can do everything. 395 if (Subtarget.hasFPCVT()) { 396 if (Subtarget.has64BitSupport()) { 397 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 398 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 399 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 400 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 401 } 402 403 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 404 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 405 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 406 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 407 } 408 409 if (Subtarget.use64BitRegs()) { 410 // 64-bit PowerPC implementations can support i64 types directly 411 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 412 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 413 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 414 // 64-bit PowerPC wants to expand i128 shifts itself. 415 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 416 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 417 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 418 } else { 419 // 32-bit PowerPC wants to expand i64 shifts itself. 420 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 421 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 422 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 423 } 424 425 if (Subtarget.hasAltivec()) { 426 // First set operation action for all vector types to expand. Then we 427 // will selectively turn on ones that can be effectively codegen'd. 428 for (MVT VT : MVT::vector_valuetypes()) { 429 // add/sub are legal for all supported vector VT's. 430 setOperationAction(ISD::ADD, VT, Legal); 431 setOperationAction(ISD::SUB, VT, Legal); 432 433 // Vector instructions introduced in P8 434 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { 435 setOperationAction(ISD::CTPOP, VT, Legal); 436 setOperationAction(ISD::CTLZ, VT, Legal); 437 } 438 else { 439 setOperationAction(ISD::CTPOP, VT, Expand); 440 setOperationAction(ISD::CTLZ, VT, Expand); 441 } 442 443 // Vector instructions introduced in P9 444 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)) 445 setOperationAction(ISD::CTTZ, VT, Legal); 446 else 447 setOperationAction(ISD::CTTZ, VT, Expand); 448 449 // We promote all shuffles to v16i8. 450 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 451 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 452 453 // We promote all non-typed operations to v4i32. 454 setOperationAction(ISD::AND , VT, Promote); 455 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 456 setOperationAction(ISD::OR , VT, Promote); 457 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 458 setOperationAction(ISD::XOR , VT, Promote); 459 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 460 setOperationAction(ISD::LOAD , VT, Promote); 461 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 462 setOperationAction(ISD::SELECT, VT, Promote); 463 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 464 setOperationAction(ISD::SELECT_CC, VT, Promote); 465 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); 466 setOperationAction(ISD::STORE, VT, Promote); 467 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 468 469 // No other operations are legal. 470 setOperationAction(ISD::MUL , VT, Expand); 471 setOperationAction(ISD::SDIV, VT, Expand); 472 setOperationAction(ISD::SREM, VT, Expand); 473 setOperationAction(ISD::UDIV, VT, Expand); 474 setOperationAction(ISD::UREM, VT, Expand); 475 setOperationAction(ISD::FDIV, VT, Expand); 476 setOperationAction(ISD::FREM, VT, Expand); 477 setOperationAction(ISD::FNEG, VT, Expand); 478 setOperationAction(ISD::FSQRT, VT, Expand); 479 setOperationAction(ISD::FLOG, VT, Expand); 480 setOperationAction(ISD::FLOG10, VT, Expand); 481 setOperationAction(ISD::FLOG2, VT, Expand); 482 setOperationAction(ISD::FEXP, VT, Expand); 483 setOperationAction(ISD::FEXP2, VT, Expand); 484 setOperationAction(ISD::FSIN, VT, Expand); 485 setOperationAction(ISD::FCOS, VT, Expand); 486 setOperationAction(ISD::FABS, VT, Expand); 487 setOperationAction(ISD::FPOWI, VT, Expand); 488 setOperationAction(ISD::FFLOOR, VT, Expand); 489 setOperationAction(ISD::FCEIL, VT, Expand); 490 setOperationAction(ISD::FTRUNC, VT, Expand); 491 setOperationAction(ISD::FRINT, VT, Expand); 492 setOperationAction(ISD::FNEARBYINT, VT, Expand); 493 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 494 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 495 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 496 setOperationAction(ISD::MULHU, VT, Expand); 497 setOperationAction(ISD::MULHS, VT, Expand); 498 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 499 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 500 setOperationAction(ISD::UDIVREM, VT, Expand); 501 setOperationAction(ISD::SDIVREM, VT, Expand); 502 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 503 setOperationAction(ISD::FPOW, VT, Expand); 504 setOperationAction(ISD::BSWAP, VT, Expand); 505 setOperationAction(ISD::VSELECT, VT, Expand); 506 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 507 setOperationAction(ISD::ROTL, VT, Expand); 508 setOperationAction(ISD::ROTR, VT, Expand); 509 510 for (MVT InnerVT : MVT::vector_valuetypes()) { 511 setTruncStoreAction(VT, InnerVT, Expand); 512 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 513 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 514 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 515 } 516 } 517 518 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 519 // with merges, splats, etc. 520 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 521 522 setOperationAction(ISD::AND , MVT::v4i32, Legal); 523 setOperationAction(ISD::OR , MVT::v4i32, Legal); 524 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 525 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 526 setOperationAction(ISD::SELECT, MVT::v4i32, 527 Subtarget.useCRBits() ? Legal : Expand); 528 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 529 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 530 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 531 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 532 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 533 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 534 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 535 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 536 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 537 538 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 539 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 540 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 541 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 542 543 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 544 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 545 546 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 547 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 548 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 549 } 550 551 if (Subtarget.hasP8Altivec()) 552 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 553 else 554 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 555 556 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 557 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 558 559 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 560 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 561 562 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 563 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 564 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 565 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 566 567 // Altivec does not contain unordered floating-point compare instructions 568 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 569 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 570 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 571 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 572 573 if (Subtarget.hasVSX()) { 574 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 575 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 576 if (Subtarget.hasP8Vector()) { 577 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 578 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); 579 } 580 if (Subtarget.hasDirectMove() && isPPC64) { 581 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); 582 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); 583 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); 584 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); 585 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); 586 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); 587 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); 588 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 589 } 590 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 591 592 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 593 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 594 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 595 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 596 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 597 598 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 599 600 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 601 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 602 603 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 604 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 605 606 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 607 setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); 608 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); 609 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 610 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); 611 612 // Share the Altivec comparison restrictions. 613 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 614 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 615 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 616 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 617 618 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 619 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 620 621 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 622 623 if (Subtarget.hasP8Vector()) 624 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); 625 626 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 627 628 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); 629 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 630 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 631 632 if (Subtarget.hasP8Altivec()) { 633 setOperationAction(ISD::SHL, MVT::v2i64, Legal); 634 setOperationAction(ISD::SRA, MVT::v2i64, Legal); 635 setOperationAction(ISD::SRL, MVT::v2i64, Legal); 636 637 setOperationAction(ISD::SETCC, MVT::v2i64, Legal); 638 } 639 else { 640 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 641 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 642 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 643 644 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 645 646 // VSX v2i64 only supports non-arithmetic operations. 647 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 648 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 649 } 650 651 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 652 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 653 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 654 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 655 656 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 657 658 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 659 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 660 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 661 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 662 663 // Vector operation legalization checks the result type of 664 // SIGN_EXTEND_INREG, overall legalization checks the inner type. 665 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); 666 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); 667 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); 668 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); 669 670 setOperationAction(ISD::FNEG, MVT::v4f32, Legal); 671 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 672 setOperationAction(ISD::FABS, MVT::v4f32, Legal); 673 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 674 675 if (Subtarget.hasDirectMove()) 676 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 677 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 678 679 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 680 } 681 682 if (Subtarget.hasP8Altivec()) { 683 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); 684 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); 685 } 686 687 if (Subtarget.hasP9Vector()) { 688 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 689 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 690 } 691 } 692 693 if (Subtarget.hasQPX()) { 694 setOperationAction(ISD::FADD, MVT::v4f64, Legal); 695 setOperationAction(ISD::FSUB, MVT::v4f64, Legal); 696 setOperationAction(ISD::FMUL, MVT::v4f64, Legal); 697 setOperationAction(ISD::FREM, MVT::v4f64, Expand); 698 699 setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); 700 setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); 701 702 setOperationAction(ISD::LOAD , MVT::v4f64, Custom); 703 setOperationAction(ISD::STORE , MVT::v4f64, Custom); 704 705 setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); 706 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); 707 708 if (!Subtarget.useCRBits()) 709 setOperationAction(ISD::SELECT, MVT::v4f64, Expand); 710 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); 711 712 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); 713 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); 714 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); 715 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); 716 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); 717 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); 718 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); 719 720 setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); 721 setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); 722 723 setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); 724 setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); 725 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); 726 727 setOperationAction(ISD::FNEG , MVT::v4f64, Legal); 728 setOperationAction(ISD::FABS , MVT::v4f64, Legal); 729 setOperationAction(ISD::FSIN , MVT::v4f64, Expand); 730 setOperationAction(ISD::FCOS , MVT::v4f64, Expand); 731 setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); 732 setOperationAction(ISD::FPOW , MVT::v4f64, Expand); 733 setOperationAction(ISD::FLOG , MVT::v4f64, Expand); 734 setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); 735 setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); 736 setOperationAction(ISD::FEXP , MVT::v4f64, Expand); 737 setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); 738 739 setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); 740 setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); 741 742 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); 743 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); 744 745 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); 746 747 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 748 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 749 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 750 setOperationAction(ISD::FREM, MVT::v4f32, Expand); 751 752 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); 753 setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); 754 755 setOperationAction(ISD::LOAD , MVT::v4f32, Custom); 756 setOperationAction(ISD::STORE , MVT::v4f32, Custom); 757 758 if (!Subtarget.useCRBits()) 759 setOperationAction(ISD::SELECT, MVT::v4f32, Expand); 760 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 761 762 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); 763 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); 764 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); 765 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); 766 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); 767 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 768 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 769 770 setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); 771 setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); 772 773 setOperationAction(ISD::FNEG , MVT::v4f32, Legal); 774 setOperationAction(ISD::FABS , MVT::v4f32, Legal); 775 setOperationAction(ISD::FSIN , MVT::v4f32, Expand); 776 setOperationAction(ISD::FCOS , MVT::v4f32, Expand); 777 setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); 778 setOperationAction(ISD::FPOW , MVT::v4f32, Expand); 779 setOperationAction(ISD::FLOG , MVT::v4f32, Expand); 780 setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); 781 setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); 782 setOperationAction(ISD::FEXP , MVT::v4f32, Expand); 783 setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); 784 785 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 786 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 787 788 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); 789 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); 790 791 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); 792 793 setOperationAction(ISD::AND , MVT::v4i1, Legal); 794 setOperationAction(ISD::OR , MVT::v4i1, Legal); 795 setOperationAction(ISD::XOR , MVT::v4i1, Legal); 796 797 if (!Subtarget.useCRBits()) 798 setOperationAction(ISD::SELECT, MVT::v4i1, Expand); 799 setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); 800 801 setOperationAction(ISD::LOAD , MVT::v4i1, Custom); 802 setOperationAction(ISD::STORE , MVT::v4i1, Custom); 803 804 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); 805 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); 806 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); 807 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); 808 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); 809 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); 810 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); 811 812 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); 813 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); 814 815 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); 816 817 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); 818 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); 819 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); 820 setOperationAction(ISD::FROUND, MVT::v4f64, Legal); 821 822 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 823 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 824 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 825 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 826 827 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); 828 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 829 830 // These need to set FE_INEXACT, and so cannot be vectorized here. 831 setOperationAction(ISD::FRINT, MVT::v4f64, Expand); 832 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 833 834 if (TM.Options.UnsafeFPMath) { 835 setOperationAction(ISD::FDIV, MVT::v4f64, Legal); 836 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); 837 838 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 839 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 840 } else { 841 setOperationAction(ISD::FDIV, MVT::v4f64, Expand); 842 setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); 843 844 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 845 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 846 } 847 } 848 849 if (Subtarget.has64BitSupport()) 850 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 851 852 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); 853 854 if (!isPPC64) { 855 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 856 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 857 } 858 859 setBooleanContents(ZeroOrOneBooleanContent); 860 861 if (Subtarget.hasAltivec()) { 862 // Altivec instructions set fields to all zeros or all ones. 863 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 864 } 865 866 if (!isPPC64) { 867 // These libcalls are not available in 32-bit. 868 setLibcallName(RTLIB::SHL_I128, nullptr); 869 setLibcallName(RTLIB::SRL_I128, nullptr); 870 setLibcallName(RTLIB::SRA_I128, nullptr); 871 } 872 873 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); 874 875 // We have target-specific dag combine patterns for the following nodes: 876 setTargetDAGCombine(ISD::SINT_TO_FP); 877 setTargetDAGCombine(ISD::BUILD_VECTOR); 878 if (Subtarget.hasFPCVT()) 879 setTargetDAGCombine(ISD::UINT_TO_FP); 880 setTargetDAGCombine(ISD::LOAD); 881 setTargetDAGCombine(ISD::STORE); 882 setTargetDAGCombine(ISD::BR_CC); 883 if (Subtarget.useCRBits()) 884 setTargetDAGCombine(ISD::BRCOND); 885 setTargetDAGCombine(ISD::BSWAP); 886 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 887 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 888 setTargetDAGCombine(ISD::INTRINSIC_VOID); 889 890 setTargetDAGCombine(ISD::SIGN_EXTEND); 891 setTargetDAGCombine(ISD::ZERO_EXTEND); 892 setTargetDAGCombine(ISD::ANY_EXTEND); 893 894 if (Subtarget.useCRBits()) { 895 setTargetDAGCombine(ISD::TRUNCATE); 896 setTargetDAGCombine(ISD::SETCC); 897 setTargetDAGCombine(ISD::SELECT_CC); 898 } 899 900 // Use reciprocal estimates. 901 if (TM.Options.UnsafeFPMath) { 902 setTargetDAGCombine(ISD::FDIV); 903 setTargetDAGCombine(ISD::FSQRT); 904 } 905 906 // Darwin long double math library functions have $LDBL128 appended. 907 if (Subtarget.isDarwin()) { 908 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 909 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 910 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 911 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 912 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 913 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 914 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 915 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 916 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 917 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 918 } 919 920 // With 32 condition bits, we don't need to sink (and duplicate) compares 921 // aggressively in CodeGenPrep. 922 if (Subtarget.useCRBits()) { 923 setHasMultipleConditionRegisters(); 924 setJumpIsExpensive(); 925 } 926 927 setMinFunctionAlignment(2); 928 if (Subtarget.isDarwin()) 929 setPrefFunctionAlignment(4); 930 931 switch (Subtarget.getDarwinDirective()) { 932 default: break; 933 case PPC::DIR_970: 934 case PPC::DIR_A2: 935 case PPC::DIR_E500mc: 936 case PPC::DIR_E5500: 937 case PPC::DIR_PWR4: 938 case PPC::DIR_PWR5: 939 case PPC::DIR_PWR5X: 940 case PPC::DIR_PWR6: 941 case PPC::DIR_PWR6X: 942 case PPC::DIR_PWR7: 943 case PPC::DIR_PWR8: 944 case PPC::DIR_PWR9: 945 setPrefFunctionAlignment(4); 946 setPrefLoopAlignment(4); 947 break; 948 } 949 950 if (Subtarget.enableMachineScheduler()) 951 setSchedulingPreference(Sched::Source); 952 else 953 setSchedulingPreference(Sched::Hybrid); 954 955 computeRegisterProperties(STI.getRegisterInfo()); 956 957 // The Freescale cores do better with aggressive inlining of memcpy and 958 // friends. GCC uses same threshold of 128 bytes (= 32 word stores). 959 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 960 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 961 MaxStoresPerMemset = 32; 962 MaxStoresPerMemsetOptSize = 16; 963 MaxStoresPerMemcpy = 32; 964 MaxStoresPerMemcpyOptSize = 8; 965 MaxStoresPerMemmove = 32; 966 MaxStoresPerMemmoveOptSize = 8; 967 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { 968 // The A2 also benefits from (very) aggressive inlining of memcpy and 969 // friends. The overhead of a the function call, even when warm, can be 970 // over one hundred cycles. 971 MaxStoresPerMemset = 128; 972 MaxStoresPerMemcpy = 128; 973 MaxStoresPerMemmove = 128; 974 } 975 } 976 977 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 978 /// the desired ByVal argument alignment. 979 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 980 unsigned MaxMaxAlign) { 981 if (MaxAlign == MaxMaxAlign) 982 return; 983 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 984 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 985 MaxAlign = 32; 986 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 987 MaxAlign = 16; 988 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 989 unsigned EltAlign = 0; 990 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 991 if (EltAlign > MaxAlign) 992 MaxAlign = EltAlign; 993 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 994 for (auto *EltTy : STy->elements()) { 995 unsigned EltAlign = 0; 996 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); 997 if (EltAlign > MaxAlign) 998 MaxAlign = EltAlign; 999 if (MaxAlign == MaxMaxAlign) 1000 break; 1001 } 1002 } 1003 } 1004 1005 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 1006 /// function arguments in the caller parameter area. 1007 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, 1008 const DataLayout &DL) const { 1009 // Darwin passes everything on 4 byte boundary. 1010 if (Subtarget.isDarwin()) 1011 return 4; 1012 1013 // 16byte and wider vectors are passed on 16byte boundary. 1014 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 1015 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 1016 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 1017 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 1018 return Align; 1019 } 1020 1021 bool PPCTargetLowering::useSoftFloat() const { 1022 return Subtarget.useSoftFloat(); 1023 } 1024 1025 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 1026 switch ((PPCISD::NodeType)Opcode) { 1027 case PPCISD::FIRST_NUMBER: break; 1028 case PPCISD::FSEL: return "PPCISD::FSEL"; 1029 case PPCISD::FCFID: return "PPCISD::FCFID"; 1030 case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; 1031 case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; 1032 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; 1033 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 1034 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 1035 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; 1036 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; 1037 case PPCISD::FRE: return "PPCISD::FRE"; 1038 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 1039 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 1040 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 1041 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 1042 case PPCISD::VPERM: return "PPCISD::VPERM"; 1043 case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; 1044 case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; 1045 case PPCISD::VECSHL: return "PPCISD::VECSHL"; 1046 case PPCISD::CMPB: return "PPCISD::CMPB"; 1047 case PPCISD::Hi: return "PPCISD::Hi"; 1048 case PPCISD::Lo: return "PPCISD::Lo"; 1049 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 1050 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 1051 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; 1052 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 1053 case PPCISD::SRL: return "PPCISD::SRL"; 1054 case PPCISD::SRA: return "PPCISD::SRA"; 1055 case PPCISD::SHL: return "PPCISD::SHL"; 1056 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; 1057 case PPCISD::CALL: return "PPCISD::CALL"; 1058 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 1059 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 1060 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 1061 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; 1062 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 1063 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; 1064 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 1065 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 1066 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 1067 case PPCISD::MFVSR: return "PPCISD::MFVSR"; 1068 case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; 1069 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; 1070 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; 1071 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; 1072 case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; 1073 case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; 1074 case PPCISD::VCMP: return "PPCISD::VCMP"; 1075 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 1076 case PPCISD::LBRX: return "PPCISD::LBRX"; 1077 case PPCISD::STBRX: return "PPCISD::STBRX"; 1078 case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; 1079 case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; 1080 case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; 1081 case PPCISD::STXSIX: return "PPCISD::STXSIX"; 1082 case PPCISD::VEXTS: return "PPCISD::VEXTS"; 1083 case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; 1084 case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; 1085 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 1086 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 1087 case PPCISD::BDZ: return "PPCISD::BDZ"; 1088 case PPCISD::MFFS: return "PPCISD::MFFS"; 1089 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 1090 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 1091 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 1092 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 1093 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 1094 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; 1095 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 1096 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 1097 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 1098 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 1099 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 1100 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 1101 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; 1102 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 1103 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 1104 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 1105 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; 1106 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 1107 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 1108 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 1109 case PPCISD::SC: return "PPCISD::SC"; 1110 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; 1111 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; 1112 case PPCISD::RFEBB: return "PPCISD::RFEBB"; 1113 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; 1114 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; 1115 case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; 1116 case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; 1117 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; 1118 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; 1119 case PPCISD::QBFLT: return "PPCISD::QBFLT"; 1120 case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; 1121 } 1122 return nullptr; 1123 } 1124 1125 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, 1126 EVT VT) const { 1127 if (!VT.isVector()) 1128 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 1129 1130 if (Subtarget.hasQPX()) 1131 return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); 1132 1133 return VT.changeVectorElementTypeToInteger(); 1134 } 1135 1136 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { 1137 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); 1138 return true; 1139 } 1140 1141 //===----------------------------------------------------------------------===// 1142 // Node matching predicates, for use by the tblgen matching code. 1143 //===----------------------------------------------------------------------===// 1144 1145 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 1146 static bool isFloatingPointZero(SDValue Op) { 1147 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 1148 return CFP->getValueAPF().isZero(); 1149 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 1150 // Maybe this has already been legalized into the constant pool? 1151 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 1152 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 1153 return CFP->getValueAPF().isZero(); 1154 } 1155 return false; 1156 } 1157 1158 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 1159 /// true if Op is undef or if it matches the specified value. 1160 static bool isConstantOrUndef(int Op, int Val) { 1161 return Op < 0 || Op == Val; 1162 } 1163 1164 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 1165 /// VPKUHUM instruction. 1166 /// The ShuffleKind distinguishes between big-endian operations with 1167 /// two different inputs (0), either-endian operations with two identical 1168 /// inputs (1), and little-endian operations with two different inputs (2). 1169 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1170 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1171 SelectionDAG &DAG) { 1172 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1173 if (ShuffleKind == 0) { 1174 if (IsLE) 1175 return false; 1176 for (unsigned i = 0; i != 16; ++i) 1177 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 1178 return false; 1179 } else if (ShuffleKind == 2) { 1180 if (!IsLE) 1181 return false; 1182 for (unsigned i = 0; i != 16; ++i) 1183 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 1184 return false; 1185 } else if (ShuffleKind == 1) { 1186 unsigned j = IsLE ? 0 : 1; 1187 for (unsigned i = 0; i != 8; ++i) 1188 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 1189 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 1190 return false; 1191 } 1192 return true; 1193 } 1194 1195 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 1196 /// VPKUWUM instruction. 1197 /// The ShuffleKind distinguishes between big-endian operations with 1198 /// two different inputs (0), either-endian operations with two identical 1199 /// inputs (1), and little-endian operations with two different inputs (2). 1200 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1201 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1202 SelectionDAG &DAG) { 1203 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1204 if (ShuffleKind == 0) { 1205 if (IsLE) 1206 return false; 1207 for (unsigned i = 0; i != 16; i += 2) 1208 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 1209 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 1210 return false; 1211 } else if (ShuffleKind == 2) { 1212 if (!IsLE) 1213 return false; 1214 for (unsigned i = 0; i != 16; i += 2) 1215 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1216 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 1217 return false; 1218 } else if (ShuffleKind == 1) { 1219 unsigned j = IsLE ? 0 : 2; 1220 for (unsigned i = 0; i != 8; i += 2) 1221 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1222 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1223 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1224 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 1225 return false; 1226 } 1227 return true; 1228 } 1229 1230 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a 1231 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the 1232 /// current subtarget. 1233 /// 1234 /// The ShuffleKind distinguishes between big-endian operations with 1235 /// two different inputs (0), either-endian operations with two identical 1236 /// inputs (1), and little-endian operations with two different inputs (2). 1237 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1238 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1239 SelectionDAG &DAG) { 1240 const PPCSubtarget& Subtarget = 1241 static_cast<const PPCSubtarget&>(DAG.getSubtarget()); 1242 if (!Subtarget.hasP8Vector()) 1243 return false; 1244 1245 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1246 if (ShuffleKind == 0) { 1247 if (IsLE) 1248 return false; 1249 for (unsigned i = 0; i != 16; i += 4) 1250 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || 1251 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || 1252 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || 1253 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) 1254 return false; 1255 } else if (ShuffleKind == 2) { 1256 if (!IsLE) 1257 return false; 1258 for (unsigned i = 0; i != 16; i += 4) 1259 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1260 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || 1261 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || 1262 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) 1263 return false; 1264 } else if (ShuffleKind == 1) { 1265 unsigned j = IsLE ? 0 : 4; 1266 for (unsigned i = 0; i != 8; i += 4) 1267 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1268 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1269 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || 1270 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || 1271 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1272 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || 1273 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || 1274 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) 1275 return false; 1276 } 1277 return true; 1278 } 1279 1280 /// isVMerge - Common function, used to match vmrg* shuffles. 1281 /// 1282 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 1283 unsigned LHSStart, unsigned RHSStart) { 1284 if (N->getValueType(0) != MVT::v16i8) 1285 return false; 1286 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 1287 "Unsupported merge size!"); 1288 1289 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 1290 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 1291 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 1292 LHSStart+j+i*UnitSize) || 1293 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 1294 RHSStart+j+i*UnitSize)) 1295 return false; 1296 } 1297 return true; 1298 } 1299 1300 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 1301 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 1302 /// The ShuffleKind distinguishes between big-endian merges with two 1303 /// different inputs (0), either-endian merges with two identical inputs (1), 1304 /// and little-endian merges with two different inputs (2). For the latter, 1305 /// the input operands are swapped (see PPCInstrAltivec.td). 1306 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1307 unsigned ShuffleKind, SelectionDAG &DAG) { 1308 if (DAG.getDataLayout().isLittleEndian()) { 1309 if (ShuffleKind == 1) // unary 1310 return isVMerge(N, UnitSize, 0, 0); 1311 else if (ShuffleKind == 2) // swapped 1312 return isVMerge(N, UnitSize, 0, 16); 1313 else 1314 return false; 1315 } else { 1316 if (ShuffleKind == 1) // unary 1317 return isVMerge(N, UnitSize, 8, 8); 1318 else if (ShuffleKind == 0) // normal 1319 return isVMerge(N, UnitSize, 8, 24); 1320 else 1321 return false; 1322 } 1323 } 1324 1325 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 1326 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 1327 /// The ShuffleKind distinguishes between big-endian merges with two 1328 /// different inputs (0), either-endian merges with two identical inputs (1), 1329 /// and little-endian merges with two different inputs (2). For the latter, 1330 /// the input operands are swapped (see PPCInstrAltivec.td). 1331 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1332 unsigned ShuffleKind, SelectionDAG &DAG) { 1333 if (DAG.getDataLayout().isLittleEndian()) { 1334 if (ShuffleKind == 1) // unary 1335 return isVMerge(N, UnitSize, 8, 8); 1336 else if (ShuffleKind == 2) // swapped 1337 return isVMerge(N, UnitSize, 8, 24); 1338 else 1339 return false; 1340 } else { 1341 if (ShuffleKind == 1) // unary 1342 return isVMerge(N, UnitSize, 0, 0); 1343 else if (ShuffleKind == 0) // normal 1344 return isVMerge(N, UnitSize, 0, 16); 1345 else 1346 return false; 1347 } 1348 } 1349 1350 /** 1351 * \brief Common function used to match vmrgew and vmrgow shuffles 1352 * 1353 * The indexOffset determines whether to look for even or odd words in 1354 * the shuffle mask. This is based on the of the endianness of the target 1355 * machine. 1356 * - Little Endian: 1357 * - Use offset of 0 to check for odd elements 1358 * - Use offset of 4 to check for even elements 1359 * - Big Endian: 1360 * - Use offset of 0 to check for even elements 1361 * - Use offset of 4 to check for odd elements 1362 * A detailed description of the vector element ordering for little endian and 1363 * big endian can be found at 1364 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html 1365 * Targeting your applications - what little endian and big endian IBM XL C/C++ 1366 * compiler differences mean to you 1367 * 1368 * The mask to the shuffle vector instruction specifies the indices of the 1369 * elements from the two input vectors to place in the result. The elements are 1370 * numbered in array-access order, starting with the first vector. These vectors 1371 * are always of type v16i8, thus each vector will contain 16 elements of size 1372 * 8. More info on the shuffle vector can be found in the 1373 * http://llvm.org/docs/LangRef.html#shufflevector-instruction 1374 * Language Reference. 1375 * 1376 * The RHSStartValue indicates whether the same input vectors are used (unary) 1377 * or two different input vectors are used, based on the following: 1378 * - If the instruction uses the same vector for both inputs, the range of the 1379 * indices will be 0 to 15. In this case, the RHSStart value passed should 1380 * be 0. 1381 * - If the instruction has two different vectors then the range of the 1382 * indices will be 0 to 31. In this case, the RHSStart value passed should 1383 * be 16 (indices 0-15 specify elements in the first vector while indices 16 1384 * to 31 specify elements in the second vector). 1385 * 1386 * \param[in] N The shuffle vector SD Node to analyze 1387 * \param[in] IndexOffset Specifies whether to look for even or odd elements 1388 * \param[in] RHSStartValue Specifies the starting index for the righthand input 1389 * vector to the shuffle_vector instruction 1390 * \return true iff this shuffle vector represents an even or odd word merge 1391 */ 1392 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, 1393 unsigned RHSStartValue) { 1394 if (N->getValueType(0) != MVT::v16i8) 1395 return false; 1396 1397 for (unsigned i = 0; i < 2; ++i) 1398 for (unsigned j = 0; j < 4; ++j) 1399 if (!isConstantOrUndef(N->getMaskElt(i*4+j), 1400 i*RHSStartValue+j+IndexOffset) || 1401 !isConstantOrUndef(N->getMaskElt(i*4+j+8), 1402 i*RHSStartValue+j+IndexOffset+8)) 1403 return false; 1404 return true; 1405 } 1406 1407 /** 1408 * \brief Determine if the specified shuffle mask is suitable for the vmrgew or 1409 * vmrgow instructions. 1410 * 1411 * \param[in] N The shuffle vector SD Node to analyze 1412 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) 1413 * \param[in] ShuffleKind Identify the type of merge: 1414 * - 0 = big-endian merge with two different inputs; 1415 * - 1 = either-endian merge with two identical inputs; 1416 * - 2 = little-endian merge with two different inputs (inputs are swapped for 1417 * little-endian merges). 1418 * \param[in] DAG The current SelectionDAG 1419 * \return true iff this shuffle mask 1420 */ 1421 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, 1422 unsigned ShuffleKind, SelectionDAG &DAG) { 1423 if (DAG.getDataLayout().isLittleEndian()) { 1424 unsigned indexOffset = CheckEven ? 4 : 0; 1425 if (ShuffleKind == 1) // Unary 1426 return isVMerge(N, indexOffset, 0); 1427 else if (ShuffleKind == 2) // swapped 1428 return isVMerge(N, indexOffset, 16); 1429 else 1430 return false; 1431 } 1432 else { 1433 unsigned indexOffset = CheckEven ? 0 : 4; 1434 if (ShuffleKind == 1) // Unary 1435 return isVMerge(N, indexOffset, 0); 1436 else if (ShuffleKind == 0) // Normal 1437 return isVMerge(N, indexOffset, 16); 1438 else 1439 return false; 1440 } 1441 return false; 1442 } 1443 1444 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 1445 /// amount, otherwise return -1. 1446 /// The ShuffleKind distinguishes between big-endian operations with two 1447 /// different inputs (0), either-endian operations with two identical inputs 1448 /// (1), and little-endian operations with two different inputs (2). For the 1449 /// latter, the input operands are swapped (see PPCInstrAltivec.td). 1450 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 1451 SelectionDAG &DAG) { 1452 if (N->getValueType(0) != MVT::v16i8) 1453 return -1; 1454 1455 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1456 1457 // Find the first non-undef value in the shuffle mask. 1458 unsigned i; 1459 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1460 /*search*/; 1461 1462 if (i == 16) return -1; // all undef. 1463 1464 // Otherwise, check to see if the rest of the elements are consecutively 1465 // numbered from this value. 1466 unsigned ShiftAmt = SVOp->getMaskElt(i); 1467 if (ShiftAmt < i) return -1; 1468 1469 ShiftAmt -= i; 1470 bool isLE = DAG.getDataLayout().isLittleEndian(); 1471 1472 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1473 // Check the rest of the elements to see if they are consecutive. 1474 for (++i; i != 16; ++i) 1475 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1476 return -1; 1477 } else if (ShuffleKind == 1) { 1478 // Check the rest of the elements to see if they are consecutive. 1479 for (++i; i != 16; ++i) 1480 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1481 return -1; 1482 } else 1483 return -1; 1484 1485 if (isLE) 1486 ShiftAmt = 16 - ShiftAmt; 1487 1488 return ShiftAmt; 1489 } 1490 1491 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1492 /// specifies a splat of a single element that is suitable for input to 1493 /// VSPLTB/VSPLTH/VSPLTW. 1494 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1495 assert(N->getValueType(0) == MVT::v16i8 && 1496 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1497 1498 // The consecutive indices need to specify an element, not part of two 1499 // different elements. So abandon ship early if this isn't the case. 1500 if (N->getMaskElt(0) % EltSize != 0) 1501 return false; 1502 1503 // This is a splat operation if each element of the permute is the same, and 1504 // if the value doesn't reference the second vector. 1505 unsigned ElementBase = N->getMaskElt(0); 1506 1507 // FIXME: Handle UNDEF elements too! 1508 if (ElementBase >= 16) 1509 return false; 1510 1511 // Check that the indices are consecutive, in the case of a multi-byte element 1512 // splatted with a v16i8 mask. 1513 for (unsigned i = 1; i != EltSize; ++i) 1514 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1515 return false; 1516 1517 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1518 if (N->getMaskElt(i) < 0) continue; 1519 for (unsigned j = 0; j != EltSize; ++j) 1520 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1521 return false; 1522 } 1523 return true; 1524 } 1525 1526 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, 1527 unsigned &InsertAtByte, bool &Swap, bool IsLE) { 1528 1529 // Check that the mask is shuffling words 1530 for (unsigned i = 0; i < 4; ++i) { 1531 unsigned B0 = N->getMaskElt(i*4); 1532 unsigned B1 = N->getMaskElt(i*4+1); 1533 unsigned B2 = N->getMaskElt(i*4+2); 1534 unsigned B3 = N->getMaskElt(i*4+3); 1535 if (B0 % 4) 1536 return false; 1537 if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1) 1538 return false; 1539 } 1540 1541 // Now we look at mask elements 0,4,8,12 1542 unsigned M0 = N->getMaskElt(0) / 4; 1543 unsigned M1 = N->getMaskElt(4) / 4; 1544 unsigned M2 = N->getMaskElt(8) / 4; 1545 unsigned M3 = N->getMaskElt(12) / 4; 1546 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; 1547 unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; 1548 1549 // Below, let H and L be arbitrary elements of the shuffle mask 1550 // where H is in the range [4,7] and L is in the range [0,3]. 1551 // H, 1, 2, 3 or L, 5, 6, 7 1552 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || 1553 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { 1554 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; 1555 InsertAtByte = IsLE ? 12 : 0; 1556 Swap = M0 < 4; 1557 return true; 1558 } 1559 // 0, H, 2, 3 or 4, L, 6, 7 1560 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || 1561 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { 1562 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; 1563 InsertAtByte = IsLE ? 8 : 4; 1564 Swap = M1 < 4; 1565 return true; 1566 } 1567 // 0, 1, H, 3 or 4, 5, L, 7 1568 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || 1569 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { 1570 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; 1571 InsertAtByte = IsLE ? 4 : 8; 1572 Swap = M2 < 4; 1573 return true; 1574 } 1575 // 0, 1, 2, H or 4, 5, 6, L 1576 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || 1577 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { 1578 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; 1579 InsertAtByte = IsLE ? 0 : 12; 1580 Swap = M3 < 4; 1581 return true; 1582 } 1583 1584 // If both vector operands for the shuffle are the same vector, the mask will 1585 // contain only elements from the first one and the second one will be undef. 1586 if (N->getOperand(1).isUndef()) { 1587 ShiftElts = 0; 1588 Swap = true; 1589 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; 1590 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { 1591 InsertAtByte = IsLE ? 12 : 0; 1592 return true; 1593 } 1594 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { 1595 InsertAtByte = IsLE ? 8 : 4; 1596 return true; 1597 } 1598 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { 1599 InsertAtByte = IsLE ? 4 : 8; 1600 return true; 1601 } 1602 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { 1603 InsertAtByte = IsLE ? 0 : 12; 1604 return true; 1605 } 1606 } 1607 1608 return false; 1609 } 1610 1611 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 1612 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 1613 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 1614 SelectionDAG &DAG) { 1615 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1616 assert(isSplatShuffleMask(SVOp, EltSize)); 1617 if (DAG.getDataLayout().isLittleEndian()) 1618 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 1619 else 1620 return SVOp->getMaskElt(0) / EltSize; 1621 } 1622 1623 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 1624 /// by using a vspltis[bhw] instruction of the specified element size, return 1625 /// the constant being splatted. The ByteSize field indicates the number of 1626 /// bytes of each element [124] -> [bhw]. 1627 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 1628 SDValue OpVal(nullptr, 0); 1629 1630 // If ByteSize of the splat is bigger than the element size of the 1631 // build_vector, then we have a case where we are checking for a splat where 1632 // multiple elements of the buildvector are folded together into a single 1633 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 1634 unsigned EltSize = 16/N->getNumOperands(); 1635 if (EltSize < ByteSize) { 1636 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 1637 SDValue UniquedVals[4]; 1638 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 1639 1640 // See if all of the elements in the buildvector agree across. 1641 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1642 if (N->getOperand(i).isUndef()) continue; 1643 // If the element isn't a constant, bail fully out. 1644 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 1645 1646 1647 if (!UniquedVals[i&(Multiple-1)].getNode()) 1648 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 1649 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 1650 return SDValue(); // no match. 1651 } 1652 1653 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 1654 // either constant or undef values that are identical for each chunk. See 1655 // if these chunks can form into a larger vspltis*. 1656 1657 // Check to see if all of the leading entries are either 0 or -1. If 1658 // neither, then this won't fit into the immediate field. 1659 bool LeadingZero = true; 1660 bool LeadingOnes = true; 1661 for (unsigned i = 0; i != Multiple-1; ++i) { 1662 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 1663 1664 LeadingZero &= isNullConstant(UniquedVals[i]); 1665 LeadingOnes &= isAllOnesConstant(UniquedVals[i]); 1666 } 1667 // Finally, check the least significant entry. 1668 if (LeadingZero) { 1669 if (!UniquedVals[Multiple-1].getNode()) 1670 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef 1671 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 1672 if (Val < 16) // 0,0,0,4 -> vspltisw(4) 1673 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 1674 } 1675 if (LeadingOnes) { 1676 if (!UniquedVals[Multiple-1].getNode()) 1677 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef 1678 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 1679 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 1680 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 1681 } 1682 1683 return SDValue(); 1684 } 1685 1686 // Check to see if this buildvec has a single non-undef value in its elements. 1687 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1688 if (N->getOperand(i).isUndef()) continue; 1689 if (!OpVal.getNode()) 1690 OpVal = N->getOperand(i); 1691 else if (OpVal != N->getOperand(i)) 1692 return SDValue(); 1693 } 1694 1695 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 1696 1697 unsigned ValSizeInBytes = EltSize; 1698 uint64_t Value = 0; 1699 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1700 Value = CN->getZExtValue(); 1701 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1702 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 1703 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 1704 } 1705 1706 // If the splat value is larger than the element value, then we can never do 1707 // this splat. The only case that we could fit the replicated bits into our 1708 // immediate field for would be zero, and we prefer to use vxor for it. 1709 if (ValSizeInBytes < ByteSize) return SDValue(); 1710 1711 // If the element value is larger than the splat value, check if it consists 1712 // of a repeated bit pattern of size ByteSize. 1713 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) 1714 return SDValue(); 1715 1716 // Properly sign extend the value. 1717 int MaskVal = SignExtend32(Value, ByteSize * 8); 1718 1719 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 1720 if (MaskVal == 0) return SDValue(); 1721 1722 // Finally, if this value fits in a 5 bit sext field, return it 1723 if (SignExtend32<5>(MaskVal) == MaskVal) 1724 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); 1725 return SDValue(); 1726 } 1727 1728 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift 1729 /// amount, otherwise return -1. 1730 int PPC::isQVALIGNIShuffleMask(SDNode *N) { 1731 EVT VT = N->getValueType(0); 1732 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) 1733 return -1; 1734 1735 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1736 1737 // Find the first non-undef value in the shuffle mask. 1738 unsigned i; 1739 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) 1740 /*search*/; 1741 1742 if (i == 4) return -1; // all undef. 1743 1744 // Otherwise, check to see if the rest of the elements are consecutively 1745 // numbered from this value. 1746 unsigned ShiftAmt = SVOp->getMaskElt(i); 1747 if (ShiftAmt < i) return -1; 1748 ShiftAmt -= i; 1749 1750 // Check the rest of the elements to see if they are consecutive. 1751 for (++i; i != 4; ++i) 1752 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1753 return -1; 1754 1755 return ShiftAmt; 1756 } 1757 1758 //===----------------------------------------------------------------------===// 1759 // Addressing Mode Selection 1760 //===----------------------------------------------------------------------===// 1761 1762 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 1763 /// or 64-bit immediate, and if the value can be accurately represented as a 1764 /// sign extension from a 16-bit value. If so, this returns true and the 1765 /// immediate. 1766 static bool isIntS16Immediate(SDNode *N, short &Imm) { 1767 if (!isa<ConstantSDNode>(N)) 1768 return false; 1769 1770 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1771 if (N->getValueType(0) == MVT::i32) 1772 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1773 else 1774 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1775 } 1776 static bool isIntS16Immediate(SDValue Op, short &Imm) { 1777 return isIntS16Immediate(Op.getNode(), Imm); 1778 } 1779 1780 /// SelectAddressRegReg - Given the specified addressed, check to see if it 1781 /// can be represented as an indexed [r+r] operation. Returns false if it 1782 /// can be more efficiently represented with [r+imm]. 1783 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1784 SDValue &Index, 1785 SelectionDAG &DAG) const { 1786 short imm = 0; 1787 if (N.getOpcode() == ISD::ADD) { 1788 if (isIntS16Immediate(N.getOperand(1), imm)) 1789 return false; // r+i 1790 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1791 return false; // r+i 1792 1793 Base = N.getOperand(0); 1794 Index = N.getOperand(1); 1795 return true; 1796 } else if (N.getOpcode() == ISD::OR) { 1797 if (isIntS16Immediate(N.getOperand(1), imm)) 1798 return false; // r+i can fold it if we can. 1799 1800 // If this is an or of disjoint bitfields, we can codegen this as an add 1801 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1802 // disjoint. 1803 APInt LHSKnownZero, LHSKnownOne; 1804 APInt RHSKnownZero, RHSKnownOne; 1805 DAG.computeKnownBits(N.getOperand(0), 1806 LHSKnownZero, LHSKnownOne); 1807 1808 if (LHSKnownZero.getBoolValue()) { 1809 DAG.computeKnownBits(N.getOperand(1), 1810 RHSKnownZero, RHSKnownOne); 1811 // If all of the bits are known zero on the LHS or RHS, the add won't 1812 // carry. 1813 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1814 Base = N.getOperand(0); 1815 Index = N.getOperand(1); 1816 return true; 1817 } 1818 } 1819 } 1820 1821 return false; 1822 } 1823 1824 // If we happen to be doing an i64 load or store into a stack slot that has 1825 // less than a 4-byte alignment, then the frame-index elimination may need to 1826 // use an indexed load or store instruction (because the offset may not be a 1827 // multiple of 4). The extra register needed to hold the offset comes from the 1828 // register scavenger, and it is possible that the scavenger will need to use 1829 // an emergency spill slot. As a result, we need to make sure that a spill slot 1830 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1831 // stack slot. 1832 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1833 // FIXME: This does not handle the LWA case. 1834 if (VT != MVT::i64) 1835 return; 1836 1837 // NOTE: We'll exclude negative FIs here, which come from argument 1838 // lowering, because there are no known test cases triggering this problem 1839 // using packed structures (or similar). We can remove this exclusion if 1840 // we find such a test case. The reason why this is so test-case driven is 1841 // because this entire 'fixup' is only to prevent crashes (from the 1842 // register scavenger) on not-really-valid inputs. For example, if we have: 1843 // %a = alloca i1 1844 // %b = bitcast i1* %a to i64* 1845 // store i64* a, i64 b 1846 // then the store should really be marked as 'align 1', but is not. If it 1847 // were marked as 'align 1' then the indexed form would have been 1848 // instruction-selected initially, and the problem this 'fixup' is preventing 1849 // won't happen regardless. 1850 if (FrameIdx < 0) 1851 return; 1852 1853 MachineFunction &MF = DAG.getMachineFunction(); 1854 MachineFrameInfo &MFI = MF.getFrameInfo(); 1855 1856 unsigned Align = MFI.getObjectAlignment(FrameIdx); 1857 if (Align >= 4) 1858 return; 1859 1860 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1861 FuncInfo->setHasNonRISpills(); 1862 } 1863 1864 /// Returns true if the address N can be represented by a base register plus 1865 /// a signed 16-bit displacement [r+imm], and if it is not better 1866 /// represented as reg+reg. If Aligned is true, only accept displacements 1867 /// suitable for STD and friends, i.e. multiples of 4. 1868 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1869 SDValue &Base, 1870 SelectionDAG &DAG, 1871 bool Aligned) const { 1872 // FIXME dl should come from parent load or store, not from address 1873 SDLoc dl(N); 1874 // If this can be more profitably realized as r+r, fail. 1875 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1876 return false; 1877 1878 if (N.getOpcode() == ISD::ADD) { 1879 short imm = 0; 1880 if (isIntS16Immediate(N.getOperand(1), imm) && 1881 (!Aligned || (imm & 3) == 0)) { 1882 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 1883 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1884 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1885 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1886 } else { 1887 Base = N.getOperand(0); 1888 } 1889 return true; // [r+i] 1890 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1891 // Match LOAD (ADD (X, Lo(G))). 1892 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1893 && "Cannot handle constant offsets yet!"); 1894 Disp = N.getOperand(1).getOperand(0); // The global address. 1895 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1896 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1897 Disp.getOpcode() == ISD::TargetConstantPool || 1898 Disp.getOpcode() == ISD::TargetJumpTable); 1899 Base = N.getOperand(0); 1900 return true; // [&g+r] 1901 } 1902 } else if (N.getOpcode() == ISD::OR) { 1903 short imm = 0; 1904 if (isIntS16Immediate(N.getOperand(1), imm) && 1905 (!Aligned || (imm & 3) == 0)) { 1906 // If this is an or of disjoint bitfields, we can codegen this as an add 1907 // (for better address arithmetic) if the LHS and RHS of the OR are 1908 // provably disjoint. 1909 APInt LHSKnownZero, LHSKnownOne; 1910 DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1911 1912 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1913 // If all of the bits are known zero on the LHS or RHS, the add won't 1914 // carry. 1915 if (FrameIndexSDNode *FI = 1916 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1917 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1918 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1919 } else { 1920 Base = N.getOperand(0); 1921 } 1922 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 1923 return true; 1924 } 1925 } 1926 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1927 // Loading from a constant address. 1928 1929 // If this address fits entirely in a 16-bit sext immediate field, codegen 1930 // this as "d, 0" 1931 short Imm; 1932 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1933 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); 1934 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1935 CN->getValueType(0)); 1936 return true; 1937 } 1938 1939 // Handle 32-bit sext immediates with LIS + addr mode. 1940 if ((CN->getValueType(0) == MVT::i32 || 1941 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1942 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1943 int Addr = (int)CN->getZExtValue(); 1944 1945 // Otherwise, break this down into an LIS + disp. 1946 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); 1947 1948 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, 1949 MVT::i32); 1950 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1951 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1952 return true; 1953 } 1954 } 1955 1956 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); 1957 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1958 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1959 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1960 } else 1961 Base = N; 1962 return true; // [r+0] 1963 } 1964 1965 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1966 /// represented as an indexed [r+r] operation. 1967 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1968 SDValue &Index, 1969 SelectionDAG &DAG) const { 1970 // Check to see if we can easily represent this as an [r+r] address. This 1971 // will fail if it thinks that the address is more profitably represented as 1972 // reg+imm, e.g. where imm = 0. 1973 if (SelectAddressRegReg(N, Base, Index, DAG)) 1974 return true; 1975 1976 // If the operand is an addition, always emit this as [r+r], since this is 1977 // better (for code size, and execution, as the memop does the add for free) 1978 // than emitting an explicit add. 1979 if (N.getOpcode() == ISD::ADD) { 1980 Base = N.getOperand(0); 1981 Index = N.getOperand(1); 1982 return true; 1983 } 1984 1985 // Otherwise, do it the hard way, using R0 as the base register. 1986 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1987 N.getValueType()); 1988 Index = N; 1989 return true; 1990 } 1991 1992 /// getPreIndexedAddressParts - returns true by value, base pointer and 1993 /// offset pointer and addressing mode by reference if the node's address 1994 /// can be legally represented as pre-indexed load / store address. 1995 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1996 SDValue &Offset, 1997 ISD::MemIndexedMode &AM, 1998 SelectionDAG &DAG) const { 1999 if (DisablePPCPreinc) return false; 2000 2001 bool isLoad = true; 2002 SDValue Ptr; 2003 EVT VT; 2004 unsigned Alignment; 2005 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2006 Ptr = LD->getBasePtr(); 2007 VT = LD->getMemoryVT(); 2008 Alignment = LD->getAlignment(); 2009 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 2010 Ptr = ST->getBasePtr(); 2011 VT = ST->getMemoryVT(); 2012 Alignment = ST->getAlignment(); 2013 isLoad = false; 2014 } else 2015 return false; 2016 2017 // PowerPC doesn't have preinc load/store instructions for vectors (except 2018 // for QPX, which does have preinc r+r forms). 2019 if (VT.isVector()) { 2020 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { 2021 return false; 2022 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { 2023 AM = ISD::PRE_INC; 2024 return true; 2025 } 2026 } 2027 2028 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 2029 2030 // Common code will reject creating a pre-inc form if the base pointer 2031 // is a frame index, or if N is a store and the base pointer is either 2032 // the same as or a predecessor of the value being stored. Check for 2033 // those situations here, and try with swapped Base/Offset instead. 2034 bool Swap = false; 2035 2036 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 2037 Swap = true; 2038 else if (!isLoad) { 2039 SDValue Val = cast<StoreSDNode>(N)->getValue(); 2040 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 2041 Swap = true; 2042 } 2043 2044 if (Swap) 2045 std::swap(Base, Offset); 2046 2047 AM = ISD::PRE_INC; 2048 return true; 2049 } 2050 2051 // LDU/STU can only handle immediates that are a multiple of 4. 2052 if (VT != MVT::i64) { 2053 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 2054 return false; 2055 } else { 2056 // LDU/STU need an address with at least 4-byte alignment. 2057 if (Alignment < 4) 2058 return false; 2059 2060 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 2061 return false; 2062 } 2063 2064 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2065 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 2066 // sext i32 to i64 when addr mode is r+i. 2067 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 2068 LD->getExtensionType() == ISD::SEXTLOAD && 2069 isa<ConstantSDNode>(Offset)) 2070 return false; 2071 } 2072 2073 AM = ISD::PRE_INC; 2074 return true; 2075 } 2076 2077 //===----------------------------------------------------------------------===// 2078 // LowerOperation implementation 2079 //===----------------------------------------------------------------------===// 2080 2081 /// Return true if we should reference labels using a PICBase, set the HiOpFlags 2082 /// and LoOpFlags to the target MO flags. 2083 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, 2084 unsigned &HiOpFlags, unsigned &LoOpFlags, 2085 const GlobalValue *GV = nullptr) { 2086 HiOpFlags = PPCII::MO_HA; 2087 LoOpFlags = PPCII::MO_LO; 2088 2089 // Don't use the pic base if not in PIC relocation model. 2090 if (IsPIC) { 2091 HiOpFlags |= PPCII::MO_PIC_FLAG; 2092 LoOpFlags |= PPCII::MO_PIC_FLAG; 2093 } 2094 2095 // If this is a reference to a global value that requires a non-lazy-ptr, make 2096 // sure that instruction lowering adds it. 2097 if (GV && Subtarget.hasLazyResolverStub(GV)) { 2098 HiOpFlags |= PPCII::MO_NLP_FLAG; 2099 LoOpFlags |= PPCII::MO_NLP_FLAG; 2100 2101 if (GV->hasHiddenVisibility()) { 2102 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2103 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2104 } 2105 } 2106 } 2107 2108 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 2109 SelectionDAG &DAG) { 2110 SDLoc DL(HiPart); 2111 EVT PtrVT = HiPart.getValueType(); 2112 SDValue Zero = DAG.getConstant(0, DL, PtrVT); 2113 2114 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 2115 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 2116 2117 // With PIC, the first instruction is actually "GR+hi(&G)". 2118 if (isPIC) 2119 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 2120 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 2121 2122 // Generate non-pic code that has direct accesses to the constant pool. 2123 // The address of the global is just (hi(&g)+lo(&g)). 2124 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 2125 } 2126 2127 static void setUsesTOCBasePtr(MachineFunction &MF) { 2128 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2129 FuncInfo->setUsesTOCBasePtr(); 2130 } 2131 2132 static void setUsesTOCBasePtr(SelectionDAG &DAG) { 2133 setUsesTOCBasePtr(DAG.getMachineFunction()); 2134 } 2135 2136 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, 2137 SDValue GA) { 2138 EVT VT = Is64Bit ? MVT::i64 : MVT::i32; 2139 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : 2140 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); 2141 2142 SDValue Ops[] = { GA, Reg }; 2143 return DAG.getMemIntrinsicNode( 2144 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, 2145 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true, 2146 false, 0); 2147 } 2148 2149 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 2150 SelectionDAG &DAG) const { 2151 EVT PtrVT = Op.getValueType(); 2152 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2153 const Constant *C = CP->getConstVal(); 2154 2155 // 64-bit SVR4 ABI code is always position-independent. 2156 // The actual address of the GlobalValue is stored in the TOC. 2157 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2158 setUsesTOCBasePtr(DAG); 2159 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 2160 return getTOCEntry(DAG, SDLoc(CP), true, GA); 2161 } 2162 2163 unsigned MOHiFlag, MOLoFlag; 2164 bool IsPIC = isPositionIndependent(); 2165 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2166 2167 if (IsPIC && Subtarget.isSVR4ABI()) { 2168 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 2169 PPCII::MO_PIC_FLAG); 2170 return getTOCEntry(DAG, SDLoc(CP), false, GA); 2171 } 2172 2173 SDValue CPIHi = 2174 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 2175 SDValue CPILo = 2176 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 2177 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); 2178 } 2179 2180 // For 64-bit PowerPC, prefer the more compact relative encodings. 2181 // This trades 32 bits per jump table entry for one or two instructions 2182 // on the jump site. 2183 unsigned PPCTargetLowering::getJumpTableEncoding() const { 2184 if (isJumpTableRelative()) 2185 return MachineJumpTableInfo::EK_LabelDifference32; 2186 2187 return TargetLowering::getJumpTableEncoding(); 2188 } 2189 2190 bool PPCTargetLowering::isJumpTableRelative() const { 2191 if (Subtarget.isPPC64()) 2192 return true; 2193 return TargetLowering::isJumpTableRelative(); 2194 } 2195 2196 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table, 2197 SelectionDAG &DAG) const { 2198 if (!Subtarget.isPPC64()) 2199 return TargetLowering::getPICJumpTableRelocBase(Table, DAG); 2200 2201 switch (getTargetMachine().getCodeModel()) { 2202 case CodeModel::Default: 2203 case CodeModel::Small: 2204 case CodeModel::Medium: 2205 return TargetLowering::getPICJumpTableRelocBase(Table, DAG); 2206 default: 2207 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(), 2208 getPointerTy(DAG.getDataLayout())); 2209 } 2210 } 2211 2212 const MCExpr * 2213 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 2214 unsigned JTI, 2215 MCContext &Ctx) const { 2216 if (!Subtarget.isPPC64()) 2217 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 2218 2219 switch (getTargetMachine().getCodeModel()) { 2220 case CodeModel::Default: 2221 case CodeModel::Small: 2222 case CodeModel::Medium: 2223 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 2224 default: 2225 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); 2226 } 2227 } 2228 2229 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 2230 EVT PtrVT = Op.getValueType(); 2231 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2232 2233 // 64-bit SVR4 ABI code is always position-independent. 2234 // The actual address of the GlobalValue is stored in the TOC. 2235 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2236 setUsesTOCBasePtr(DAG); 2237 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 2238 return getTOCEntry(DAG, SDLoc(JT), true, GA); 2239 } 2240 2241 unsigned MOHiFlag, MOLoFlag; 2242 bool IsPIC = isPositionIndependent(); 2243 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2244 2245 if (IsPIC && Subtarget.isSVR4ABI()) { 2246 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 2247 PPCII::MO_PIC_FLAG); 2248 return getTOCEntry(DAG, SDLoc(GA), false, GA); 2249 } 2250 2251 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 2252 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 2253 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); 2254 } 2255 2256 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 2257 SelectionDAG &DAG) const { 2258 EVT PtrVT = Op.getValueType(); 2259 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 2260 const BlockAddress *BA = BASDN->getBlockAddress(); 2261 2262 // 64-bit SVR4 ABI code is always position-independent. 2263 // The actual BlockAddress is stored in the TOC. 2264 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2265 setUsesTOCBasePtr(DAG); 2266 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 2267 return getTOCEntry(DAG, SDLoc(BASDN), true, GA); 2268 } 2269 2270 unsigned MOHiFlag, MOLoFlag; 2271 bool IsPIC = isPositionIndependent(); 2272 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2273 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 2274 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 2275 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); 2276 } 2277 2278 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 2279 SelectionDAG &DAG) const { 2280 2281 // FIXME: TLS addresses currently use medium model code sequences, 2282 // which is the most useful form. Eventually support for small and 2283 // large models could be added if users need it, at the cost of 2284 // additional complexity. 2285 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2286 if (DAG.getTarget().Options.EmulatedTLS) 2287 return LowerToTLSEmulatedModel(GA, DAG); 2288 2289 SDLoc dl(GA); 2290 const GlobalValue *GV = GA->getGlobal(); 2291 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2292 bool is64bit = Subtarget.isPPC64(); 2293 const Module *M = DAG.getMachineFunction().getFunction()->getParent(); 2294 PICLevel::Level picLevel = M->getPICLevel(); 2295 2296 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 2297 2298 if (Model == TLSModel::LocalExec) { 2299 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2300 PPCII::MO_TPREL_HA); 2301 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2302 PPCII::MO_TPREL_LO); 2303 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 2304 is64bit ? MVT::i64 : MVT::i32); 2305 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 2306 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 2307 } 2308 2309 if (Model == TLSModel::InitialExec) { 2310 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2311 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2312 PPCII::MO_TLS); 2313 SDValue GOTPtr; 2314 if (is64bit) { 2315 setUsesTOCBasePtr(DAG); 2316 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2317 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 2318 PtrVT, GOTReg, TGA); 2319 } else 2320 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 2321 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 2322 PtrVT, TGA, GOTPtr); 2323 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 2324 } 2325 2326 if (Model == TLSModel::GeneralDynamic) { 2327 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2328 SDValue GOTPtr; 2329 if (is64bit) { 2330 setUsesTOCBasePtr(DAG); 2331 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2332 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 2333 GOTReg, TGA); 2334 } else { 2335 if (picLevel == PICLevel::SmallPIC) 2336 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2337 else 2338 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2339 } 2340 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, 2341 GOTPtr, TGA, TGA); 2342 } 2343 2344 if (Model == TLSModel::LocalDynamic) { 2345 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2346 SDValue GOTPtr; 2347 if (is64bit) { 2348 setUsesTOCBasePtr(DAG); 2349 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2350 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 2351 GOTReg, TGA); 2352 } else { 2353 if (picLevel == PICLevel::SmallPIC) 2354 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2355 else 2356 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2357 } 2358 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, 2359 PtrVT, GOTPtr, TGA, TGA); 2360 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, 2361 PtrVT, TLSAddr, TGA); 2362 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 2363 } 2364 2365 llvm_unreachable("Unknown TLS model!"); 2366 } 2367 2368 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 2369 SelectionDAG &DAG) const { 2370 EVT PtrVT = Op.getValueType(); 2371 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 2372 SDLoc DL(GSDN); 2373 const GlobalValue *GV = GSDN->getGlobal(); 2374 2375 // 64-bit SVR4 ABI code is always position-independent. 2376 // The actual address of the GlobalValue is stored in the TOC. 2377 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2378 setUsesTOCBasePtr(DAG); 2379 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 2380 return getTOCEntry(DAG, DL, true, GA); 2381 } 2382 2383 unsigned MOHiFlag, MOLoFlag; 2384 bool IsPIC = isPositionIndependent(); 2385 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); 2386 2387 if (IsPIC && Subtarget.isSVR4ABI()) { 2388 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 2389 GSDN->getOffset(), 2390 PPCII::MO_PIC_FLAG); 2391 return getTOCEntry(DAG, DL, false, GA); 2392 } 2393 2394 SDValue GAHi = 2395 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 2396 SDValue GALo = 2397 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 2398 2399 SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG); 2400 2401 // If the global reference is actually to a non-lazy-pointer, we have to do an 2402 // extra load to get the address of the global. 2403 if (MOHiFlag & PPCII::MO_NLP_FLAG) 2404 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 2405 return Ptr; 2406 } 2407 2408 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 2409 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2410 SDLoc dl(Op); 2411 2412 if (Op.getValueType() == MVT::v2i64) { 2413 // When the operands themselves are v2i64 values, we need to do something 2414 // special because VSX has no underlying comparison operations for these. 2415 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 2416 // Equality can be handled by casting to the legal type for Altivec 2417 // comparisons, everything else needs to be expanded. 2418 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2419 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 2420 DAG.getSetCC(dl, MVT::v4i32, 2421 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 2422 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 2423 CC)); 2424 } 2425 2426 return SDValue(); 2427 } 2428 2429 // We handle most of these in the usual way. 2430 return Op; 2431 } 2432 2433 // If we're comparing for equality to zero, expose the fact that this is 2434 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can 2435 // fold the new nodes. 2436 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG)) 2437 return V; 2438 2439 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 2440 // Leave comparisons against 0 and -1 alone for now, since they're usually 2441 // optimized. FIXME: revisit this when we can custom lower all setcc 2442 // optimizations. 2443 if (C->isAllOnesValue() || C->isNullValue()) 2444 return SDValue(); 2445 } 2446 2447 // If we have an integer seteq/setne, turn it into a compare against zero 2448 // by xor'ing the rhs with the lhs, which is faster than setting a 2449 // condition register, reading it back out, and masking the correct bit. The 2450 // normal approach here uses sub to do this instead of xor. Using xor exposes 2451 // the result to other bit-twiddling opportunities. 2452 EVT LHSVT = Op.getOperand(0).getValueType(); 2453 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 2454 EVT VT = Op.getValueType(); 2455 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 2456 Op.getOperand(1)); 2457 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); 2458 } 2459 return SDValue(); 2460 } 2461 2462 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { 2463 SDNode *Node = Op.getNode(); 2464 EVT VT = Node->getValueType(0); 2465 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2466 SDValue InChain = Node->getOperand(0); 2467 SDValue VAListPtr = Node->getOperand(1); 2468 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 2469 SDLoc dl(Node); 2470 2471 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 2472 2473 // gpr_index 2474 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2475 VAListPtr, MachinePointerInfo(SV), MVT::i8); 2476 InChain = GprIndex.getValue(1); 2477 2478 if (VT == MVT::i64) { 2479 // Check if GprIndex is even 2480 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 2481 DAG.getConstant(1, dl, MVT::i32)); 2482 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 2483 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); 2484 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 2485 DAG.getConstant(1, dl, MVT::i32)); 2486 // Align GprIndex to be even if it isn't 2487 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 2488 GprIndex); 2489 } 2490 2491 // fpr index is 1 byte after gpr 2492 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2493 DAG.getConstant(1, dl, MVT::i32)); 2494 2495 // fpr 2496 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2497 FprPtr, MachinePointerInfo(SV), MVT::i8); 2498 InChain = FprIndex.getValue(1); 2499 2500 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2501 DAG.getConstant(8, dl, MVT::i32)); 2502 2503 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2504 DAG.getConstant(4, dl, MVT::i32)); 2505 2506 // areas 2507 SDValue OverflowArea = 2508 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); 2509 InChain = OverflowArea.getValue(1); 2510 2511 SDValue RegSaveArea = 2512 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); 2513 InChain = RegSaveArea.getValue(1); 2514 2515 // select overflow_area if index > 8 2516 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 2517 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); 2518 2519 // adjustment constant gpr_index * 4/8 2520 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 2521 VT.isInteger() ? GprIndex : FprIndex, 2522 DAG.getConstant(VT.isInteger() ? 4 : 8, dl, 2523 MVT::i32)); 2524 2525 // OurReg = RegSaveArea + RegConstant 2526 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 2527 RegConstant); 2528 2529 // Floating types are 32 bytes into RegSaveArea 2530 if (VT.isFloatingPoint()) 2531 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 2532 DAG.getConstant(32, dl, MVT::i32)); 2533 2534 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 2535 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 2536 VT.isInteger() ? GprIndex : FprIndex, 2537 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, 2538 MVT::i32)); 2539 2540 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 2541 VT.isInteger() ? VAListPtr : FprPtr, 2542 MachinePointerInfo(SV), MVT::i8); 2543 2544 // determine if we should load from reg_save_area or overflow_area 2545 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 2546 2547 // increase overflow_area by 4/8 if gpr/fpr > 8 2548 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 2549 DAG.getConstant(VT.isInteger() ? 4 : 8, 2550 dl, MVT::i32)); 2551 2552 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 2553 OverflowAreaPlusN); 2554 2555 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, 2556 MachinePointerInfo(), MVT::i32); 2557 2558 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); 2559 } 2560 2561 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { 2562 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 2563 2564 // We have to copy the entire va_list struct: 2565 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 2566 return DAG.getMemcpy(Op.getOperand(0), Op, 2567 Op.getOperand(1), Op.getOperand(2), 2568 DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, 2569 false, MachinePointerInfo(), MachinePointerInfo()); 2570 } 2571 2572 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 2573 SelectionDAG &DAG) const { 2574 return Op.getOperand(0); 2575 } 2576 2577 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 2578 SelectionDAG &DAG) const { 2579 SDValue Chain = Op.getOperand(0); 2580 SDValue Trmp = Op.getOperand(1); // trampoline 2581 SDValue FPtr = Op.getOperand(2); // nested function 2582 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 2583 SDLoc dl(Op); 2584 2585 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2586 bool isPPC64 = (PtrVT == MVT::i64); 2587 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 2588 2589 TargetLowering::ArgListTy Args; 2590 TargetLowering::ArgListEntry Entry; 2591 2592 Entry.Ty = IntPtrTy; 2593 Entry.Node = Trmp; Args.push_back(Entry); 2594 2595 // TrampSize == (isPPC64 ? 48 : 40); 2596 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, 2597 isPPC64 ? MVT::i64 : MVT::i32); 2598 Args.push_back(Entry); 2599 2600 Entry.Node = FPtr; Args.push_back(Entry); 2601 Entry.Node = Nest; Args.push_back(Entry); 2602 2603 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 2604 TargetLowering::CallLoweringInfo CLI(DAG); 2605 CLI.setDebugLoc(dl).setChain(Chain) 2606 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 2607 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 2608 std::move(Args)); 2609 2610 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2611 return CallResult.second; 2612 } 2613 2614 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { 2615 MachineFunction &MF = DAG.getMachineFunction(); 2616 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2617 EVT PtrVT = getPointerTy(MF.getDataLayout()); 2618 2619 SDLoc dl(Op); 2620 2621 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 2622 // vastart just stores the address of the VarArgsFrameIndex slot into the 2623 // memory location argument. 2624 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2625 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2626 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2627 MachinePointerInfo(SV)); 2628 } 2629 2630 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 2631 // We suppose the given va_list is already allocated. 2632 // 2633 // typedef struct { 2634 // char gpr; /* index into the array of 8 GPRs 2635 // * stored in the register save area 2636 // * gpr=0 corresponds to r3, 2637 // * gpr=1 to r4, etc. 2638 // */ 2639 // char fpr; /* index into the array of 8 FPRs 2640 // * stored in the register save area 2641 // * fpr=0 corresponds to f1, 2642 // * fpr=1 to f2, etc. 2643 // */ 2644 // char *overflow_arg_area; 2645 // /* location on stack that holds 2646 // * the next overflow argument 2647 // */ 2648 // char *reg_save_area; 2649 // /* where r3:r10 and f1:f8 (if saved) 2650 // * are stored 2651 // */ 2652 // } va_list[1]; 2653 2654 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); 2655 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); 2656 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 2657 PtrVT); 2658 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2659 PtrVT); 2660 2661 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 2662 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); 2663 2664 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 2665 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); 2666 2667 uint64_t FPROffset = 1; 2668 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); 2669 2670 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2671 2672 // Store first byte : number of int regs 2673 SDValue firstStore = 2674 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), 2675 MachinePointerInfo(SV), MVT::i8); 2676 uint64_t nextOffset = FPROffset; 2677 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 2678 ConstFPROffset); 2679 2680 // Store second byte : number of float regs 2681 SDValue secondStore = 2682 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 2683 MachinePointerInfo(SV, nextOffset), MVT::i8); 2684 nextOffset += StackOffset; 2685 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 2686 2687 // Store second word : arguments given on stack 2688 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 2689 MachinePointerInfo(SV, nextOffset)); 2690 nextOffset += FrameOffset; 2691 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 2692 2693 // Store third word : arguments given in registers 2694 return DAG.getStore(thirdStore, dl, FR, nextPtr, 2695 MachinePointerInfo(SV, nextOffset)); 2696 } 2697 2698 #include "PPCGenCallingConv.inc" 2699 2700 // Function whose sole purpose is to kill compiler warnings 2701 // stemming from unused functions included from PPCGenCallingConv.inc. 2702 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 2703 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 2704 } 2705 2706 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 2707 CCValAssign::LocInfo &LocInfo, 2708 ISD::ArgFlagsTy &ArgFlags, 2709 CCState &State) { 2710 return true; 2711 } 2712 2713 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 2714 MVT &LocVT, 2715 CCValAssign::LocInfo &LocInfo, 2716 ISD::ArgFlagsTy &ArgFlags, 2717 CCState &State) { 2718 static const MCPhysReg ArgRegs[] = { 2719 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2720 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2721 }; 2722 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2723 2724 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2725 2726 // Skip one register if the first unallocated register has an even register 2727 // number and there are still argument registers available which have not been 2728 // allocated yet. RegNum is actually an index into ArgRegs, which means we 2729 // need to skip a register if RegNum is odd. 2730 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 2731 State.AllocateReg(ArgRegs[RegNum]); 2732 } 2733 2734 // Always return false here, as this function only makes sure that the first 2735 // unallocated register has an odd register number and does not actually 2736 // allocate a register for the current argument. 2737 return false; 2738 } 2739 2740 bool 2741 llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, 2742 MVT &LocVT, 2743 CCValAssign::LocInfo &LocInfo, 2744 ISD::ArgFlagsTy &ArgFlags, 2745 CCState &State) { 2746 static const MCPhysReg ArgRegs[] = { 2747 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2748 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2749 }; 2750 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2751 2752 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2753 int RegsLeft = NumArgRegs - RegNum; 2754 2755 // Skip if there is not enough registers left for long double type (4 gpr regs 2756 // in soft float mode) and put long double argument on the stack. 2757 if (RegNum != NumArgRegs && RegsLeft < 4) { 2758 for (int i = 0; i < RegsLeft; i++) { 2759 State.AllocateReg(ArgRegs[RegNum + i]); 2760 } 2761 } 2762 2763 return false; 2764 } 2765 2766 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 2767 MVT &LocVT, 2768 CCValAssign::LocInfo &LocInfo, 2769 ISD::ArgFlagsTy &ArgFlags, 2770 CCState &State) { 2771 static const MCPhysReg ArgRegs[] = { 2772 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2773 PPC::F8 2774 }; 2775 2776 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2777 2778 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2779 2780 // If there is only one Floating-point register left we need to put both f64 2781 // values of a split ppc_fp128 value on the stack. 2782 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 2783 State.AllocateReg(ArgRegs[RegNum]); 2784 } 2785 2786 // Always return false here, as this function only makes sure that the two f64 2787 // values a ppc_fp128 value is split into are both passed in registers or both 2788 // passed on the stack and does not actually allocate a register for the 2789 // current argument. 2790 return false; 2791 } 2792 2793 /// FPR - The set of FP registers that should be allocated for arguments, 2794 /// on Darwin. 2795 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, 2796 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, 2797 PPC::F11, PPC::F12, PPC::F13}; 2798 2799 /// QFPR - The set of QPX registers that should be allocated for arguments. 2800 static const MCPhysReg QFPR[] = { 2801 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, 2802 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; 2803 2804 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 2805 /// the stack. 2806 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 2807 unsigned PtrByteSize) { 2808 unsigned ArgSize = ArgVT.getStoreSize(); 2809 if (Flags.isByVal()) 2810 ArgSize = Flags.getByValSize(); 2811 2812 // Round up to multiples of the pointer size, except for array members, 2813 // which are always packed. 2814 if (!Flags.isInConsecutiveRegs()) 2815 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2816 2817 return ArgSize; 2818 } 2819 2820 /// CalculateStackSlotAlignment - Calculates the alignment of this argument 2821 /// on the stack. 2822 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 2823 ISD::ArgFlagsTy Flags, 2824 unsigned PtrByteSize) { 2825 unsigned Align = PtrByteSize; 2826 2827 // Altivec parameters are padded to a 16 byte boundary. 2828 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2829 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2830 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 2831 ArgVT == MVT::v1i128) 2832 Align = 16; 2833 // QPX vector types stored in double-precision are padded to a 32 byte 2834 // boundary. 2835 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) 2836 Align = 32; 2837 2838 // ByVal parameters are aligned as requested. 2839 if (Flags.isByVal()) { 2840 unsigned BVAlign = Flags.getByValAlign(); 2841 if (BVAlign > PtrByteSize) { 2842 if (BVAlign % PtrByteSize != 0) 2843 llvm_unreachable( 2844 "ByVal alignment is not a multiple of the pointer size"); 2845 2846 Align = BVAlign; 2847 } 2848 } 2849 2850 // Array members are always packed to their original alignment. 2851 if (Flags.isInConsecutiveRegs()) { 2852 // If the array member was split into multiple registers, the first 2853 // needs to be aligned to the size of the full type. (Except for 2854 // ppcf128, which is only aligned as its f64 components.) 2855 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 2856 Align = OrigVT.getStoreSize(); 2857 else 2858 Align = ArgVT.getStoreSize(); 2859 } 2860 2861 return Align; 2862 } 2863 2864 /// CalculateStackSlotUsed - Return whether this argument will use its 2865 /// stack slot (instead of being passed in registers). ArgOffset, 2866 /// AvailableFPRs, and AvailableVRs must hold the current argument 2867 /// position, and will be updated to account for this argument. 2868 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 2869 ISD::ArgFlagsTy Flags, 2870 unsigned PtrByteSize, 2871 unsigned LinkageSize, 2872 unsigned ParamAreaSize, 2873 unsigned &ArgOffset, 2874 unsigned &AvailableFPRs, 2875 unsigned &AvailableVRs, bool HasQPX) { 2876 bool UseMemory = false; 2877 2878 // Respect alignment of argument on the stack. 2879 unsigned Align = 2880 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 2881 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2882 // If there's no space left in the argument save area, we must 2883 // use memory (this check also catches zero-sized arguments). 2884 if (ArgOffset >= LinkageSize + ParamAreaSize) 2885 UseMemory = true; 2886 2887 // Allocate argument on the stack. 2888 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2889 if (Flags.isInConsecutiveRegsLast()) 2890 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2891 // If we overran the argument save area, we must use memory 2892 // (this check catches arguments passed partially in memory) 2893 if (ArgOffset > LinkageSize + ParamAreaSize) 2894 UseMemory = true; 2895 2896 // However, if the argument is actually passed in an FPR or a VR, 2897 // we don't use memory after all. 2898 if (!Flags.isByVal()) { 2899 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || 2900 // QPX registers overlap with the scalar FP registers. 2901 (HasQPX && (ArgVT == MVT::v4f32 || 2902 ArgVT == MVT::v4f64 || 2903 ArgVT == MVT::v4i1))) 2904 if (AvailableFPRs > 0) { 2905 --AvailableFPRs; 2906 return false; 2907 } 2908 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2909 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2910 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 2911 ArgVT == MVT::v1i128) 2912 if (AvailableVRs > 0) { 2913 --AvailableVRs; 2914 return false; 2915 } 2916 } 2917 2918 return UseMemory; 2919 } 2920 2921 /// EnsureStackAlignment - Round stack frame size up from NumBytes to 2922 /// ensure minimum alignment required for target. 2923 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, 2924 unsigned NumBytes) { 2925 unsigned TargetAlign = Lowering->getStackAlignment(); 2926 unsigned AlignMask = TargetAlign - 1; 2927 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2928 return NumBytes; 2929 } 2930 2931 SDValue PPCTargetLowering::LowerFormalArguments( 2932 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2933 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 2934 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2935 if (Subtarget.isSVR4ABI()) { 2936 if (Subtarget.isPPC64()) 2937 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 2938 dl, DAG, InVals); 2939 else 2940 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 2941 dl, DAG, InVals); 2942 } else { 2943 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 2944 dl, DAG, InVals); 2945 } 2946 } 2947 2948 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( 2949 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2950 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 2951 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2952 2953 // 32-bit SVR4 ABI Stack Frame Layout: 2954 // +-----------------------------------+ 2955 // +--> | Back chain | 2956 // | +-----------------------------------+ 2957 // | | Floating-point register save area | 2958 // | +-----------------------------------+ 2959 // | | General register save area | 2960 // | +-----------------------------------+ 2961 // | | CR save word | 2962 // | +-----------------------------------+ 2963 // | | VRSAVE save word | 2964 // | +-----------------------------------+ 2965 // | | Alignment padding | 2966 // | +-----------------------------------+ 2967 // | | Vector register save area | 2968 // | +-----------------------------------+ 2969 // | | Local variable space | 2970 // | +-----------------------------------+ 2971 // | | Parameter list area | 2972 // | +-----------------------------------+ 2973 // | | LR save word | 2974 // | +-----------------------------------+ 2975 // SP--> +--- | Back chain | 2976 // +-----------------------------------+ 2977 // 2978 // Specifications: 2979 // System V Application Binary Interface PowerPC Processor Supplement 2980 // AltiVec Technology Programming Interface Manual 2981 2982 MachineFunction &MF = DAG.getMachineFunction(); 2983 MachineFrameInfo &MFI = MF.getFrameInfo(); 2984 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2985 2986 EVT PtrVT = getPointerTy(MF.getDataLayout()); 2987 // Potential tail calls could cause overwriting of argument stack slots. 2988 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2989 (CallConv == CallingConv::Fast)); 2990 unsigned PtrByteSize = 4; 2991 2992 // Assign locations to all of the incoming arguments. 2993 SmallVector<CCValAssign, 16> ArgLocs; 2994 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 2995 *DAG.getContext()); 2996 2997 // Reserve space for the linkage area on the stack. 2998 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 2999 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 3000 if (useSoftFloat()) 3001 CCInfo.PreAnalyzeFormalArguments(Ins); 3002 3003 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 3004 CCInfo.clearWasPPCF128(); 3005 3006 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3007 CCValAssign &VA = ArgLocs[i]; 3008 3009 // Arguments stored in registers. 3010 if (VA.isRegLoc()) { 3011 const TargetRegisterClass *RC; 3012 EVT ValVT = VA.getValVT(); 3013 3014 switch (ValVT.getSimpleVT().SimpleTy) { 3015 default: 3016 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 3017 case MVT::i1: 3018 case MVT::i32: 3019 RC = &PPC::GPRCRegClass; 3020 break; 3021 case MVT::f32: 3022 if (Subtarget.hasP8Vector()) 3023 RC = &PPC::VSSRCRegClass; 3024 else 3025 RC = &PPC::F4RCRegClass; 3026 break; 3027 case MVT::f64: 3028 if (Subtarget.hasVSX()) 3029 RC = &PPC::VSFRCRegClass; 3030 else 3031 RC = &PPC::F8RCRegClass; 3032 break; 3033 case MVT::v16i8: 3034 case MVT::v8i16: 3035 case MVT::v4i32: 3036 RC = &PPC::VRRCRegClass; 3037 break; 3038 case MVT::v4f32: 3039 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; 3040 break; 3041 case MVT::v2f64: 3042 case MVT::v2i64: 3043 RC = &PPC::VRRCRegClass; 3044 break; 3045 case MVT::v4f64: 3046 RC = &PPC::QFRCRegClass; 3047 break; 3048 case MVT::v4i1: 3049 RC = &PPC::QBRCRegClass; 3050 break; 3051 } 3052 3053 // Transform the arguments stored in physical registers into virtual ones. 3054 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 3055 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 3056 ValVT == MVT::i1 ? MVT::i32 : ValVT); 3057 3058 if (ValVT == MVT::i1) 3059 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 3060 3061 InVals.push_back(ArgValue); 3062 } else { 3063 // Argument stored in memory. 3064 assert(VA.isMemLoc()); 3065 3066 unsigned ArgSize = VA.getLocVT().getStoreSize(); 3067 int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(), 3068 isImmutable); 3069 3070 // Create load nodes to retrieve arguments from the stack. 3071 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3072 InVals.push_back( 3073 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); 3074 } 3075 } 3076 3077 // Assign locations to all of the incoming aggregate by value arguments. 3078 // Aggregates passed by value are stored in the local variable space of the 3079 // caller's stack frame, right above the parameter list area. 3080 SmallVector<CCValAssign, 16> ByValArgLocs; 3081 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3082 ByValArgLocs, *DAG.getContext()); 3083 3084 // Reserve stack space for the allocations in CCInfo. 3085 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 3086 3087 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 3088 3089 // Area that is at least reserved in the caller of this function. 3090 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 3091 MinReservedArea = std::max(MinReservedArea, LinkageSize); 3092 3093 // Set the size that is at least reserved in caller of this function. Tail 3094 // call optimized function's reserved stack space needs to be aligned so that 3095 // taking the difference between two stack areas will result in an aligned 3096 // stack. 3097 MinReservedArea = 3098 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3099 FuncInfo->setMinReservedArea(MinReservedArea); 3100 3101 SmallVector<SDValue, 8> MemOps; 3102 3103 // If the function takes variable number of arguments, make a frame index for 3104 // the start of the first vararg value... for expansion of llvm.va_start. 3105 if (isVarArg) { 3106 static const MCPhysReg GPArgRegs[] = { 3107 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3108 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3109 }; 3110 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 3111 3112 static const MCPhysReg FPArgRegs[] = { 3113 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 3114 PPC::F8 3115 }; 3116 unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 3117 3118 if (useSoftFloat()) 3119 NumFPArgRegs = 0; 3120 3121 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); 3122 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); 3123 3124 // Make room for NumGPArgRegs and NumFPArgRegs. 3125 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 3126 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; 3127 3128 FuncInfo->setVarArgsStackOffset( 3129 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, 3130 CCInfo.getNextStackOffset(), true)); 3131 3132 FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false)); 3133 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3134 3135 // The fixed integer arguments of a variadic function are stored to the 3136 // VarArgsFrameIndex on the stack so that they may be loaded by 3137 // dereferencing the result of va_next. 3138 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 3139 // Get an existing live-in vreg, or add a new one. 3140 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 3141 if (!VReg) 3142 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 3143 3144 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3145 SDValue Store = 3146 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3147 MemOps.push_back(Store); 3148 // Increment the address by four for the next argument to store 3149 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3150 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3151 } 3152 3153 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 3154 // is set. 3155 // The double arguments are stored to the VarArgsFrameIndex 3156 // on the stack. 3157 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 3158 // Get an existing live-in vreg, or add a new one. 3159 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 3160 if (!VReg) 3161 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 3162 3163 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 3164 SDValue Store = 3165 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3166 MemOps.push_back(Store); 3167 // Increment the address by eight for the next argument to store 3168 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, 3169 PtrVT); 3170 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3171 } 3172 } 3173 3174 if (!MemOps.empty()) 3175 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3176 3177 return Chain; 3178 } 3179 3180 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3181 // value to MVT::i64 and then truncate to the correct register size. 3182 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, 3183 EVT ObjectVT, SelectionDAG &DAG, 3184 SDValue ArgVal, 3185 const SDLoc &dl) const { 3186 if (Flags.isSExt()) 3187 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 3188 DAG.getValueType(ObjectVT)); 3189 else if (Flags.isZExt()) 3190 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 3191 DAG.getValueType(ObjectVT)); 3192 3193 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 3194 } 3195 3196 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( 3197 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3198 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3199 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3200 // TODO: add description of PPC stack frame format, or at least some docs. 3201 // 3202 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3203 bool isLittleEndian = Subtarget.isLittleEndian(); 3204 MachineFunction &MF = DAG.getMachineFunction(); 3205 MachineFrameInfo &MFI = MF.getFrameInfo(); 3206 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3207 3208 assert(!(CallConv == CallingConv::Fast && isVarArg) && 3209 "fastcc not supported on varargs functions"); 3210 3211 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3212 // Potential tail calls could cause overwriting of argument stack slots. 3213 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3214 (CallConv == CallingConv::Fast)); 3215 unsigned PtrByteSize = 8; 3216 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3217 3218 static const MCPhysReg GPR[] = { 3219 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3220 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3221 }; 3222 static const MCPhysReg VR[] = { 3223 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3224 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3225 }; 3226 3227 const unsigned Num_GPR_Regs = array_lengthof(GPR); 3228 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; 3229 const unsigned Num_VR_Regs = array_lengthof(VR); 3230 const unsigned Num_QFPR_Regs = Num_FPR_Regs; 3231 3232 // Do a first pass over the arguments to determine whether the ABI 3233 // guarantees that our caller has allocated the parameter save area 3234 // on its stack frame. In the ELFv1 ABI, this is always the case; 3235 // in the ELFv2 ABI, it is true if this is a vararg function or if 3236 // any parameter is located in a stack slot. 3237 3238 bool HasParameterArea = !isELFv2ABI || isVarArg; 3239 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 3240 unsigned NumBytes = LinkageSize; 3241 unsigned AvailableFPRs = Num_FPR_Regs; 3242 unsigned AvailableVRs = Num_VR_Regs; 3243 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 3244 if (Ins[i].Flags.isNest()) 3245 continue; 3246 3247 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 3248 PtrByteSize, LinkageSize, ParamAreaSize, 3249 NumBytes, AvailableFPRs, AvailableVRs, 3250 Subtarget.hasQPX())) 3251 HasParameterArea = true; 3252 } 3253 3254 // Add DAG nodes to load the arguments or copy them out of registers. On 3255 // entry to a function on PPC, the arguments start after the linkage area, 3256 // although the first ones are often in registers. 3257 3258 unsigned ArgOffset = LinkageSize; 3259 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3260 unsigned &QFPR_idx = FPR_idx; 3261 SmallVector<SDValue, 8> MemOps; 3262 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3263 unsigned CurArgIdx = 0; 3264 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3265 SDValue ArgVal; 3266 bool needsLoad = false; 3267 EVT ObjectVT = Ins[ArgNo].VT; 3268 EVT OrigVT = Ins[ArgNo].ArgVT; 3269 unsigned ObjSize = ObjectVT.getStoreSize(); 3270 unsigned ArgSize = ObjSize; 3271 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3272 if (Ins[ArgNo].isOrigArg()) { 3273 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3274 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3275 } 3276 // We re-align the argument offset for each argument, except when using the 3277 // fast calling convention, when we need to make sure we do that only when 3278 // we'll actually use a stack slot. 3279 unsigned CurArgOffset, Align; 3280 auto ComputeArgOffset = [&]() { 3281 /* Respect alignment of argument on the stack. */ 3282 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); 3283 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 3284 CurArgOffset = ArgOffset; 3285 }; 3286 3287 if (CallConv != CallingConv::Fast) { 3288 ComputeArgOffset(); 3289 3290 /* Compute GPR index associated with argument offset. */ 3291 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3292 GPR_idx = std::min(GPR_idx, Num_GPR_Regs); 3293 } 3294 3295 // FIXME the codegen can be much improved in some cases. 3296 // We do not have to keep everything in memory. 3297 if (Flags.isByVal()) { 3298 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3299 3300 if (CallConv == CallingConv::Fast) 3301 ComputeArgOffset(); 3302 3303 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3304 ObjSize = Flags.getByValSize(); 3305 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3306 // Empty aggregate parameters do not take up registers. Examples: 3307 // struct { } a; 3308 // union { } b; 3309 // int c[0]; 3310 // etc. However, we have to provide a place-holder in InVals, so 3311 // pretend we have an 8-byte item at the current address for that 3312 // purpose. 3313 if (!ObjSize) { 3314 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); 3315 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3316 InVals.push_back(FIN); 3317 continue; 3318 } 3319 3320 // Create a stack object covering all stack doublewords occupied 3321 // by the argument. If the argument is (fully or partially) on 3322 // the stack, or if the argument is fully in registers but the 3323 // caller has allocated the parameter save anyway, we can refer 3324 // directly to the caller's stack frame. Otherwise, create a 3325 // local copy in our own frame. 3326 int FI; 3327 if (HasParameterArea || 3328 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) 3329 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true); 3330 else 3331 FI = MFI.CreateStackObject(ArgSize, Align, false); 3332 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3333 3334 // Handle aggregates smaller than 8 bytes. 3335 if (ObjSize < PtrByteSize) { 3336 // The value of the object is its address, which differs from the 3337 // address of the enclosing doubleword on big-endian systems. 3338 SDValue Arg = FIN; 3339 if (!isLittleEndian) { 3340 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); 3341 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); 3342 } 3343 InVals.push_back(Arg); 3344 3345 if (GPR_idx != Num_GPR_Regs) { 3346 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3347 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3348 SDValue Store; 3349 3350 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 3351 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 3352 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 3353 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, 3354 MachinePointerInfo(&*FuncArg), ObjType); 3355 } else { 3356 // For sizes that don't fit a truncating store (3, 5, 6, 7), 3357 // store the whole register as-is to the parameter save area 3358 // slot. 3359 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3360 MachinePointerInfo(&*FuncArg)); 3361 } 3362 3363 MemOps.push_back(Store); 3364 } 3365 // Whether we copied from a register or not, advance the offset 3366 // into the parameter save area by a full doubleword. 3367 ArgOffset += PtrByteSize; 3368 continue; 3369 } 3370 3371 // The value of the object is its address, which is the address of 3372 // its first stack doubleword. 3373 InVals.push_back(FIN); 3374 3375 // Store whatever pieces of the object are in registers to memory. 3376 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3377 if (GPR_idx == Num_GPR_Regs) 3378 break; 3379 3380 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3381 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3382 SDValue Addr = FIN; 3383 if (j) { 3384 SDValue Off = DAG.getConstant(j, dl, PtrVT); 3385 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); 3386 } 3387 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, 3388 MachinePointerInfo(&*FuncArg, j)); 3389 MemOps.push_back(Store); 3390 ++GPR_idx; 3391 } 3392 ArgOffset += ArgSize; 3393 continue; 3394 } 3395 3396 switch (ObjectVT.getSimpleVT().SimpleTy) { 3397 default: llvm_unreachable("Unhandled argument type!"); 3398 case MVT::i1: 3399 case MVT::i32: 3400 case MVT::i64: 3401 if (Flags.isNest()) { 3402 // The 'nest' parameter, if any, is passed in R11. 3403 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); 3404 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3405 3406 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3407 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3408 3409 break; 3410 } 3411 3412 // These can be scalar arguments or elements of an integer array type 3413 // passed directly. Clang may use those instead of "byval" aggregate 3414 // types to avoid forcing arguments to memory unnecessarily. 3415 if (GPR_idx != Num_GPR_Regs) { 3416 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3417 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3418 3419 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3420 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3421 // value to MVT::i64 and then truncate to the correct register size. 3422 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3423 } else { 3424 if (CallConv == CallingConv::Fast) 3425 ComputeArgOffset(); 3426 3427 needsLoad = true; 3428 ArgSize = PtrByteSize; 3429 } 3430 if (CallConv != CallingConv::Fast || needsLoad) 3431 ArgOffset += 8; 3432 break; 3433 3434 case MVT::f32: 3435 case MVT::f64: 3436 // These can be scalar arguments or elements of a float array type 3437 // passed directly. The latter are used to implement ELFv2 homogenous 3438 // float aggregates. 3439 if (FPR_idx != Num_FPR_Regs) { 3440 unsigned VReg; 3441 3442 if (ObjectVT == MVT::f32) 3443 VReg = MF.addLiveIn(FPR[FPR_idx], 3444 Subtarget.hasP8Vector() 3445 ? &PPC::VSSRCRegClass 3446 : &PPC::F4RCRegClass); 3447 else 3448 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() 3449 ? &PPC::VSFRCRegClass 3450 : &PPC::F8RCRegClass); 3451 3452 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3453 ++FPR_idx; 3454 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { 3455 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 3456 // once we support fp <-> gpr moves. 3457 3458 // This can only ever happen in the presence of f32 array types, 3459 // since otherwise we never run out of FPRs before running out 3460 // of GPRs. 3461 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3462 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3463 3464 if (ObjectVT == MVT::f32) { 3465 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) 3466 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, 3467 DAG.getConstant(32, dl, MVT::i32)); 3468 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 3469 } 3470 3471 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); 3472 } else { 3473 if (CallConv == CallingConv::Fast) 3474 ComputeArgOffset(); 3475 3476 needsLoad = true; 3477 } 3478 3479 // When passing an array of floats, the array occupies consecutive 3480 // space in the argument area; only round up to the next doubleword 3481 // at the end of the array. Otherwise, each float takes 8 bytes. 3482 if (CallConv != CallingConv::Fast || needsLoad) { 3483 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; 3484 ArgOffset += ArgSize; 3485 if (Flags.isInConsecutiveRegsLast()) 3486 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3487 } 3488 break; 3489 case MVT::v4f32: 3490 case MVT::v4i32: 3491 case MVT::v8i16: 3492 case MVT::v16i8: 3493 case MVT::v2f64: 3494 case MVT::v2i64: 3495 case MVT::v1i128: 3496 if (!Subtarget.hasQPX()) { 3497 // These can be scalar arguments or elements of a vector array type 3498 // passed directly. The latter are used to implement ELFv2 homogenous 3499 // vector aggregates. 3500 if (VR_idx != Num_VR_Regs) { 3501 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3502 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3503 ++VR_idx; 3504 } else { 3505 if (CallConv == CallingConv::Fast) 3506 ComputeArgOffset(); 3507 3508 needsLoad = true; 3509 } 3510 if (CallConv != CallingConv::Fast || needsLoad) 3511 ArgOffset += 16; 3512 break; 3513 } // not QPX 3514 3515 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && 3516 "Invalid QPX parameter type"); 3517 /* fall through */ 3518 3519 case MVT::v4f64: 3520 case MVT::v4i1: 3521 // QPX vectors are treated like their scalar floating-point subregisters 3522 // (except that they're larger). 3523 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; 3524 if (QFPR_idx != Num_QFPR_Regs) { 3525 const TargetRegisterClass *RC; 3526 switch (ObjectVT.getSimpleVT().SimpleTy) { 3527 case MVT::v4f64: RC = &PPC::QFRCRegClass; break; 3528 case MVT::v4f32: RC = &PPC::QSRCRegClass; break; 3529 default: RC = &PPC::QBRCRegClass; break; 3530 } 3531 3532 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); 3533 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3534 ++QFPR_idx; 3535 } else { 3536 if (CallConv == CallingConv::Fast) 3537 ComputeArgOffset(); 3538 needsLoad = true; 3539 } 3540 if (CallConv != CallingConv::Fast || needsLoad) 3541 ArgOffset += Sz; 3542 break; 3543 } 3544 3545 // We need to load the argument to a virtual register if we determined 3546 // above that we ran out of physical registers of the appropriate type. 3547 if (needsLoad) { 3548 if (ObjSize < ArgSize && !isLittleEndian) 3549 CurArgOffset += ArgSize - ObjSize; 3550 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable); 3551 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3552 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 3553 } 3554 3555 InVals.push_back(ArgVal); 3556 } 3557 3558 // Area that is at least reserved in the caller of this function. 3559 unsigned MinReservedArea; 3560 if (HasParameterArea) 3561 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); 3562 else 3563 MinReservedArea = LinkageSize; 3564 3565 // Set the size that is at least reserved in caller of this function. Tail 3566 // call optimized functions' reserved stack space needs to be aligned so that 3567 // taking the difference between two stack areas will result in an aligned 3568 // stack. 3569 MinReservedArea = 3570 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3571 FuncInfo->setMinReservedArea(MinReservedArea); 3572 3573 // If the function takes variable number of arguments, make a frame index for 3574 // the start of the first vararg value... for expansion of llvm.va_start. 3575 if (isVarArg) { 3576 int Depth = ArgOffset; 3577 3578 FuncInfo->setVarArgsFrameIndex( 3579 MFI.CreateFixedObject(PtrByteSize, Depth, true)); 3580 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3581 3582 // If this function is vararg, store any remaining integer argument regs 3583 // to their spots on the stack so that they may be loaded by dereferencing 3584 // the result of va_next. 3585 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3586 GPR_idx < Num_GPR_Regs; ++GPR_idx) { 3587 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3588 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3589 SDValue Store = 3590 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3591 MemOps.push_back(Store); 3592 // Increment the address by four for the next argument to store 3593 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); 3594 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3595 } 3596 } 3597 3598 if (!MemOps.empty()) 3599 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3600 3601 return Chain; 3602 } 3603 3604 SDValue PPCTargetLowering::LowerFormalArguments_Darwin( 3605 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3606 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3607 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3608 // TODO: add description of PPC stack frame format, or at least some docs. 3609 // 3610 MachineFunction &MF = DAG.getMachineFunction(); 3611 MachineFrameInfo &MFI = MF.getFrameInfo(); 3612 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3613 3614 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3615 bool isPPC64 = PtrVT == MVT::i64; 3616 // Potential tail calls could cause overwriting of argument stack slots. 3617 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3618 (CallConv == CallingConv::Fast)); 3619 unsigned PtrByteSize = isPPC64 ? 8 : 4; 3620 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3621 unsigned ArgOffset = LinkageSize; 3622 // Area that is at least reserved in caller of this function. 3623 unsigned MinReservedArea = ArgOffset; 3624 3625 static const MCPhysReg GPR_32[] = { // 32-bit registers. 3626 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3627 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3628 }; 3629 static const MCPhysReg GPR_64[] = { // 64-bit registers. 3630 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3631 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3632 }; 3633 static const MCPhysReg VR[] = { 3634 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3635 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3636 }; 3637 3638 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 3639 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; 3640 const unsigned Num_VR_Regs = array_lengthof( VR); 3641 3642 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3643 3644 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 3645 3646 // In 32-bit non-varargs functions, the stack space for vectors is after the 3647 // stack space for non-vectors. We do not use this space unless we have 3648 // too many vectors to fit in registers, something that only occurs in 3649 // constructed examples:), but we have to walk the arglist to figure 3650 // that out...for the pathological case, compute VecArgOffset as the 3651 // start of the vector parameter area. Computing VecArgOffset is the 3652 // entire point of the following loop. 3653 unsigned VecArgOffset = ArgOffset; 3654 if (!isVarArg && !isPPC64) { 3655 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 3656 ++ArgNo) { 3657 EVT ObjectVT = Ins[ArgNo].VT; 3658 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3659 3660 if (Flags.isByVal()) { 3661 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 3662 unsigned ObjSize = Flags.getByValSize(); 3663 unsigned ArgSize = 3664 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3665 VecArgOffset += ArgSize; 3666 continue; 3667 } 3668 3669 switch(ObjectVT.getSimpleVT().SimpleTy) { 3670 default: llvm_unreachable("Unhandled argument type!"); 3671 case MVT::i1: 3672 case MVT::i32: 3673 case MVT::f32: 3674 VecArgOffset += 4; 3675 break; 3676 case MVT::i64: // PPC64 3677 case MVT::f64: 3678 // FIXME: We are guaranteed to be !isPPC64 at this point. 3679 // Does MVT::i64 apply? 3680 VecArgOffset += 8; 3681 break; 3682 case MVT::v4f32: 3683 case MVT::v4i32: 3684 case MVT::v8i16: 3685 case MVT::v16i8: 3686 // Nothing to do, we're only looking at Nonvector args here. 3687 break; 3688 } 3689 } 3690 } 3691 // We've found where the vector parameter area in memory is. Skip the 3692 // first 12 parameters; these don't use that memory. 3693 VecArgOffset = ((VecArgOffset+15)/16)*16; 3694 VecArgOffset += 12*16; 3695 3696 // Add DAG nodes to load the arguments or copy them out of registers. On 3697 // entry to a function on PPC, the arguments start after the linkage area, 3698 // although the first ones are often in registers. 3699 3700 SmallVector<SDValue, 8> MemOps; 3701 unsigned nAltivecParamsAtEnd = 0; 3702 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3703 unsigned CurArgIdx = 0; 3704 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3705 SDValue ArgVal; 3706 bool needsLoad = false; 3707 EVT ObjectVT = Ins[ArgNo].VT; 3708 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 3709 unsigned ArgSize = ObjSize; 3710 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3711 if (Ins[ArgNo].isOrigArg()) { 3712 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3713 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3714 } 3715 unsigned CurArgOffset = ArgOffset; 3716 3717 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 3718 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 3719 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 3720 if (isVarArg || isPPC64) { 3721 MinReservedArea = ((MinReservedArea+15)/16)*16; 3722 MinReservedArea += CalculateStackSlotSize(ObjectVT, 3723 Flags, 3724 PtrByteSize); 3725 } else nAltivecParamsAtEnd++; 3726 } else 3727 // Calculate min reserved area. 3728 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 3729 Flags, 3730 PtrByteSize); 3731 3732 // FIXME the codegen can be much improved in some cases. 3733 // We do not have to keep everything in memory. 3734 if (Flags.isByVal()) { 3735 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3736 3737 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3738 ObjSize = Flags.getByValSize(); 3739 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3740 // Objects of size 1 and 2 are right justified, everything else is 3741 // left justified. This means the memory address is adjusted forwards. 3742 if (ObjSize==1 || ObjSize==2) { 3743 CurArgOffset = CurArgOffset + (4 - ObjSize); 3744 } 3745 // The value of the object is its address. 3746 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true); 3747 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3748 InVals.push_back(FIN); 3749 if (ObjSize==1 || ObjSize==2) { 3750 if (GPR_idx != Num_GPR_Regs) { 3751 unsigned VReg; 3752 if (isPPC64) 3753 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3754 else 3755 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3756 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3757 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 3758 SDValue Store = 3759 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 3760 MachinePointerInfo(&*FuncArg), ObjType); 3761 MemOps.push_back(Store); 3762 ++GPR_idx; 3763 } 3764 3765 ArgOffset += PtrByteSize; 3766 3767 continue; 3768 } 3769 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3770 // Store whatever pieces of the object are in registers 3771 // to memory. ArgOffset will be the address of the beginning 3772 // of the object. 3773 if (GPR_idx != Num_GPR_Regs) { 3774 unsigned VReg; 3775 if (isPPC64) 3776 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3777 else 3778 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3779 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); 3780 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3781 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3782 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3783 MachinePointerInfo(&*FuncArg, j)); 3784 MemOps.push_back(Store); 3785 ++GPR_idx; 3786 ArgOffset += PtrByteSize; 3787 } else { 3788 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 3789 break; 3790 } 3791 } 3792 continue; 3793 } 3794 3795 switch (ObjectVT.getSimpleVT().SimpleTy) { 3796 default: llvm_unreachable("Unhandled argument type!"); 3797 case MVT::i1: 3798 case MVT::i32: 3799 if (!isPPC64) { 3800 if (GPR_idx != Num_GPR_Regs) { 3801 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3802 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 3803 3804 if (ObjectVT == MVT::i1) 3805 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 3806 3807 ++GPR_idx; 3808 } else { 3809 needsLoad = true; 3810 ArgSize = PtrByteSize; 3811 } 3812 // All int arguments reserve stack space in the Darwin ABI. 3813 ArgOffset += PtrByteSize; 3814 break; 3815 } 3816 LLVM_FALLTHROUGH; 3817 case MVT::i64: // PPC64 3818 if (GPR_idx != Num_GPR_Regs) { 3819 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3820 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3821 3822 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3823 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3824 // value to MVT::i64 and then truncate to the correct register size. 3825 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3826 3827 ++GPR_idx; 3828 } else { 3829 needsLoad = true; 3830 ArgSize = PtrByteSize; 3831 } 3832 // All int arguments reserve stack space in the Darwin ABI. 3833 ArgOffset += 8; 3834 break; 3835 3836 case MVT::f32: 3837 case MVT::f64: 3838 // Every 4 bytes of argument space consumes one of the GPRs available for 3839 // argument passing. 3840 if (GPR_idx != Num_GPR_Regs) { 3841 ++GPR_idx; 3842 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 3843 ++GPR_idx; 3844 } 3845 if (FPR_idx != Num_FPR_Regs) { 3846 unsigned VReg; 3847 3848 if (ObjectVT == MVT::f32) 3849 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 3850 else 3851 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 3852 3853 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3854 ++FPR_idx; 3855 } else { 3856 needsLoad = true; 3857 } 3858 3859 // All FP arguments reserve stack space in the Darwin ABI. 3860 ArgOffset += isPPC64 ? 8 : ObjSize; 3861 break; 3862 case MVT::v4f32: 3863 case MVT::v4i32: 3864 case MVT::v8i16: 3865 case MVT::v16i8: 3866 // Note that vector arguments in registers don't reserve stack space, 3867 // except in varargs functions. 3868 if (VR_idx != Num_VR_Regs) { 3869 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3870 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3871 if (isVarArg) { 3872 while ((ArgOffset % 16) != 0) { 3873 ArgOffset += PtrByteSize; 3874 if (GPR_idx != Num_GPR_Regs) 3875 GPR_idx++; 3876 } 3877 ArgOffset += 16; 3878 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 3879 } 3880 ++VR_idx; 3881 } else { 3882 if (!isVarArg && !isPPC64) { 3883 // Vectors go after all the nonvectors. 3884 CurArgOffset = VecArgOffset; 3885 VecArgOffset += 16; 3886 } else { 3887 // Vectors are aligned. 3888 ArgOffset = ((ArgOffset+15)/16)*16; 3889 CurArgOffset = ArgOffset; 3890 ArgOffset += 16; 3891 } 3892 needsLoad = true; 3893 } 3894 break; 3895 } 3896 3897 // We need to load the argument to a virtual register if we determined above 3898 // that we ran out of physical registers of the appropriate type. 3899 if (needsLoad) { 3900 int FI = MFI.CreateFixedObject(ObjSize, 3901 CurArgOffset + (ArgSize - ObjSize), 3902 isImmutable); 3903 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3904 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 3905 } 3906 3907 InVals.push_back(ArgVal); 3908 } 3909 3910 // Allow for Altivec parameters at the end, if needed. 3911 if (nAltivecParamsAtEnd) { 3912 MinReservedArea = ((MinReservedArea+15)/16)*16; 3913 MinReservedArea += 16*nAltivecParamsAtEnd; 3914 } 3915 3916 // Area that is at least reserved in the caller of this function. 3917 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 3918 3919 // Set the size that is at least reserved in caller of this function. Tail 3920 // call optimized functions' reserved stack space needs to be aligned so that 3921 // taking the difference between two stack areas will result in an aligned 3922 // stack. 3923 MinReservedArea = 3924 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3925 FuncInfo->setMinReservedArea(MinReservedArea); 3926 3927 // If the function takes variable number of arguments, make a frame index for 3928 // the start of the first vararg value... for expansion of llvm.va_start. 3929 if (isVarArg) { 3930 int Depth = ArgOffset; 3931 3932 FuncInfo->setVarArgsFrameIndex( 3933 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, 3934 Depth, true)); 3935 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3936 3937 // If this function is vararg, store any remaining integer argument regs 3938 // to their spots on the stack so that they may be loaded by dereferencing 3939 // the result of va_next. 3940 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 3941 unsigned VReg; 3942 3943 if (isPPC64) 3944 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3945 else 3946 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3947 3948 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3949 SDValue Store = 3950 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3951 MemOps.push_back(Store); 3952 // Increment the address by four for the next argument to store 3953 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3954 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3955 } 3956 } 3957 3958 if (!MemOps.empty()) 3959 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3960 3961 return Chain; 3962 } 3963 3964 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 3965 /// adjusted to accommodate the arguments for the tailcall. 3966 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 3967 unsigned ParamSize) { 3968 3969 if (!isTailCall) return 0; 3970 3971 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 3972 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 3973 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 3974 // Remember only if the new adjustement is bigger. 3975 if (SPDiff < FI->getTailCallSPDelta()) 3976 FI->setTailCallSPDelta(SPDiff); 3977 3978 return SPDiff; 3979 } 3980 3981 static bool isFunctionGlobalAddress(SDValue Callee); 3982 3983 static bool 3984 resideInSameModule(SDValue Callee, Reloc::Model RelMod) { 3985 // If !G, Callee can be an external symbol. 3986 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 3987 if (!G) return false; 3988 3989 const GlobalValue *GV = G->getGlobal(); 3990 3991 if (GV->isDeclaration()) return false; 3992 3993 switch(GV->getLinkage()) { 3994 default: llvm_unreachable("unknow linkage type"); 3995 case GlobalValue::AvailableExternallyLinkage: 3996 case GlobalValue::ExternalWeakLinkage: 3997 return false; 3998 3999 // Callee with weak linkage is allowed if it has hidden or protected 4000 // visibility 4001 case GlobalValue::LinkOnceAnyLinkage: 4002 case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions 4003 case GlobalValue::WeakAnyLinkage: 4004 case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation 4005 if (GV->hasDefaultVisibility()) 4006 return false; 4007 4008 case GlobalValue::ExternalLinkage: 4009 case GlobalValue::InternalLinkage: 4010 case GlobalValue::PrivateLinkage: 4011 break; 4012 } 4013 4014 // With '-fPIC', calling default visiblity function need insert 'nop' after 4015 // function call, no matter that function resides in same module or not, so 4016 // we treat it as in different module. 4017 if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility()) 4018 return false; 4019 4020 return true; 4021 } 4022 4023 static bool 4024 needStackSlotPassParameters(const PPCSubtarget &Subtarget, 4025 const SmallVectorImpl<ISD::OutputArg> &Outs) { 4026 assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); 4027 4028 const unsigned PtrByteSize = 8; 4029 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 4030 4031 static const MCPhysReg GPR[] = { 4032 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4033 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4034 }; 4035 static const MCPhysReg VR[] = { 4036 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4037 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4038 }; 4039 4040 const unsigned NumGPRs = array_lengthof(GPR); 4041 const unsigned NumFPRs = 13; 4042 const unsigned NumVRs = array_lengthof(VR); 4043 const unsigned ParamAreaSize = NumGPRs * PtrByteSize; 4044 4045 unsigned NumBytes = LinkageSize; 4046 unsigned AvailableFPRs = NumFPRs; 4047 unsigned AvailableVRs = NumVRs; 4048 4049 for (const ISD::OutputArg& Param : Outs) { 4050 if (Param.Flags.isNest()) continue; 4051 4052 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, 4053 PtrByteSize, LinkageSize, ParamAreaSize, 4054 NumBytes, AvailableFPRs, AvailableVRs, 4055 Subtarget.hasQPX())) 4056 return true; 4057 } 4058 return false; 4059 } 4060 4061 static bool 4062 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) { 4063 if (CS->arg_size() != CallerFn->getArgumentList().size()) 4064 return false; 4065 4066 ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin(); 4067 ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end(); 4068 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); 4069 4070 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { 4071 const Value* CalleeArg = *CalleeArgIter; 4072 const Value* CallerArg = &(*CallerArgIter); 4073 if (CalleeArg == CallerArg) 4074 continue; 4075 4076 // e.g. @caller([4 x i64] %a, [4 x i64] %b) { 4077 // tail call @callee([4 x i64] undef, [4 x i64] %b) 4078 // } 4079 // 1st argument of callee is undef and has the same type as caller. 4080 if (CalleeArg->getType() == CallerArg->getType() && 4081 isa<UndefValue>(CalleeArg)) 4082 continue; 4083 4084 return false; 4085 } 4086 4087 return true; 4088 } 4089 4090 bool 4091 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( 4092 SDValue Callee, 4093 CallingConv::ID CalleeCC, 4094 ImmutableCallSite *CS, 4095 bool isVarArg, 4096 const SmallVectorImpl<ISD::OutputArg> &Outs, 4097 const SmallVectorImpl<ISD::InputArg> &Ins, 4098 SelectionDAG& DAG) const { 4099 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; 4100 4101 if (DisableSCO && !TailCallOpt) return false; 4102 4103 // Variadic argument functions are not supported. 4104 if (isVarArg) return false; 4105 4106 MachineFunction &MF = DAG.getMachineFunction(); 4107 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 4108 4109 // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has 4110 // the same calling convention 4111 if (CallerCC != CalleeCC) return false; 4112 4113 // SCO support C calling convention 4114 if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C) 4115 return false; 4116 4117 // Caller contains any byval parameter is not supported. 4118 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); })) 4119 return false; 4120 4121 // Callee contains any byval parameter is not supported, too. 4122 // Note: This is a quick work around, because in some cases, e.g. 4123 // caller's stack size > callee's stack size, we are still able to apply 4124 // sibling call optimization. See: https://reviews.llvm.org/D23441#513574 4125 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) 4126 return false; 4127 4128 // No TCO/SCO on indirect call because Caller have to restore its TOC 4129 if (!isFunctionGlobalAddress(Callee) && 4130 !isa<ExternalSymbolSDNode>(Callee)) 4131 return false; 4132 4133 // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI 4134 // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another 4135 // module. 4136 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 4137 if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel())) 4138 return false; 4139 4140 // TCO allows altering callee ABI, so we don't have to check further. 4141 if (CalleeCC == CallingConv::Fast && TailCallOpt) 4142 return true; 4143 4144 if (DisableSCO) return false; 4145 4146 // If callee use the same argument list that caller is using, then we can 4147 // apply SCO on this case. If it is not, then we need to check if callee needs 4148 // stack for passing arguments. 4149 if (!hasSameArgumentList(MF.getFunction(), CS) && 4150 needStackSlotPassParameters(Subtarget, Outs)) { 4151 return false; 4152 } 4153 4154 return true; 4155 } 4156 4157 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 4158 /// for tail call optimization. Targets which want to do tail call 4159 /// optimization should implement this function. 4160 bool 4161 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 4162 CallingConv::ID CalleeCC, 4163 bool isVarArg, 4164 const SmallVectorImpl<ISD::InputArg> &Ins, 4165 SelectionDAG& DAG) const { 4166 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4167 return false; 4168 4169 // Variable argument functions are not supported. 4170 if (isVarArg) 4171 return false; 4172 4173 MachineFunction &MF = DAG.getMachineFunction(); 4174 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 4175 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 4176 // Functions containing by val parameters are not supported. 4177 for (unsigned i = 0; i != Ins.size(); i++) { 4178 ISD::ArgFlagsTy Flags = Ins[i].Flags; 4179 if (Flags.isByVal()) return false; 4180 } 4181 4182 // Non-PIC/GOT tail calls are supported. 4183 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 4184 return true; 4185 4186 // At the moment we can only do local tail calls (in same module, hidden 4187 // or protected) if we are generating PIC. 4188 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4189 return G->getGlobal()->hasHiddenVisibility() 4190 || G->getGlobal()->hasProtectedVisibility(); 4191 } 4192 4193 return false; 4194 } 4195 4196 /// isCallCompatibleAddress - Return the immediate to use if the specified 4197 /// 32-bit value is representable in the immediate field of a BxA instruction. 4198 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 4199 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 4200 if (!C) return nullptr; 4201 4202 int Addr = C->getZExtValue(); 4203 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 4204 SignExtend32<26>(Addr) != Addr) 4205 return nullptr; // Top 6 bits have to be sext of immediate. 4206 4207 return DAG 4208 .getConstant( 4209 (int)C->getZExtValue() >> 2, SDLoc(Op), 4210 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) 4211 .getNode(); 4212 } 4213 4214 namespace { 4215 4216 struct TailCallArgumentInfo { 4217 SDValue Arg; 4218 SDValue FrameIdxOp; 4219 int FrameIdx; 4220 4221 TailCallArgumentInfo() : FrameIdx(0) {} 4222 }; 4223 } 4224 4225 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 4226 static void StoreTailCallArgumentsToStackSlot( 4227 SelectionDAG &DAG, SDValue Chain, 4228 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 4229 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) { 4230 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 4231 SDValue Arg = TailCallArgs[i].Arg; 4232 SDValue FIN = TailCallArgs[i].FrameIdxOp; 4233 int FI = TailCallArgs[i].FrameIdx; 4234 // Store relative to framepointer. 4235 MemOpChains.push_back(DAG.getStore( 4236 Chain, dl, Arg, FIN, 4237 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); 4238 } 4239 } 4240 4241 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 4242 /// the appropriate stack slot for the tail call optimized function call. 4243 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, 4244 SDValue OldRetAddr, SDValue OldFP, 4245 int SPDiff, const SDLoc &dl) { 4246 if (SPDiff) { 4247 // Calculate the new stack slot for the return address. 4248 MachineFunction &MF = DAG.getMachineFunction(); 4249 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); 4250 const PPCFrameLowering *FL = Subtarget.getFrameLowering(); 4251 bool isPPC64 = Subtarget.isPPC64(); 4252 int SlotSize = isPPC64 ? 8 : 4; 4253 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); 4254 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize, 4255 NewRetAddrLoc, true); 4256 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4257 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 4258 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 4259 MachinePointerInfo::getFixedStack(MF, NewRetAddr)); 4260 4261 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 4262 // slot as the FP is never overwritten. 4263 if (Subtarget.isDarwinABI()) { 4264 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); 4265 int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc, 4266 true); 4267 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 4268 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 4269 MachinePointerInfo::getFixedStack( 4270 DAG.getMachineFunction(), NewFPIdx)); 4271 } 4272 } 4273 return Chain; 4274 } 4275 4276 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 4277 /// the position of the argument. 4278 static void 4279 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 4280 SDValue Arg, int SPDiff, unsigned ArgOffset, 4281 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 4282 int Offset = ArgOffset + SPDiff; 4283 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8; 4284 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); 4285 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4286 SDValue FIN = DAG.getFrameIndex(FI, VT); 4287 TailCallArgumentInfo Info; 4288 Info.Arg = Arg; 4289 Info.FrameIdxOp = FIN; 4290 Info.FrameIdx = FI; 4291 TailCallArguments.push_back(Info); 4292 } 4293 4294 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 4295 /// stack slot. Returns the chain as result and the loaded frame pointers in 4296 /// LROpOut/FPOpout. Used when tail calling. 4297 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( 4298 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, 4299 SDValue &FPOpOut, const SDLoc &dl) const { 4300 if (SPDiff) { 4301 // Load the LR and FP stack slot for later adjusting. 4302 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 4303 LROpOut = getReturnAddrFrameIndex(DAG); 4304 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); 4305 Chain = SDValue(LROpOut.getNode(), 1); 4306 4307 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 4308 // slot as the FP is never overwritten. 4309 if (Subtarget.isDarwinABI()) { 4310 FPOpOut = getFramePointerFrameIndex(DAG); 4311 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo()); 4312 Chain = SDValue(FPOpOut.getNode(), 1); 4313 } 4314 } 4315 return Chain; 4316 } 4317 4318 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 4319 /// by "Src" to address "Dst" of size "Size". Alignment information is 4320 /// specified by the specific parameter attribute. The copy will be passed as 4321 /// a byval function parameter. 4322 /// Sometimes what we are copying is the end of a larger object, the part that 4323 /// does not fit in registers. 4324 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, 4325 SDValue Chain, ISD::ArgFlagsTy Flags, 4326 SelectionDAG &DAG, const SDLoc &dl) { 4327 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); 4328 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 4329 false, false, false, MachinePointerInfo(), 4330 MachinePointerInfo()); 4331 } 4332 4333 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 4334 /// tail calls. 4335 static void LowerMemOpCallTo( 4336 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, 4337 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, 4338 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 4339 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) { 4340 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4341 if (!isTailCall) { 4342 if (isVector) { 4343 SDValue StackPtr; 4344 if (isPPC64) 4345 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4346 else 4347 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4348 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4349 DAG.getConstant(ArgOffset, dl, PtrVT)); 4350 } 4351 MemOpChains.push_back( 4352 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 4353 // Calculate and remember argument location. 4354 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 4355 TailCallArguments); 4356 } 4357 4358 static void 4359 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 4360 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, 4361 SDValue FPOp, 4362 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 4363 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 4364 // might overwrite each other in case of tail call optimization. 4365 SmallVector<SDValue, 8> MemOpChains2; 4366 // Do not flag preceding copytoreg stuff together with the following stuff. 4367 InFlag = SDValue(); 4368 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 4369 MemOpChains2, dl); 4370 if (!MemOpChains2.empty()) 4371 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 4372 4373 // Store the return address to the appropriate stack slot. 4374 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); 4375 4376 // Emit callseq_end just before tailcall node. 4377 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4378 DAG.getIntPtrConstant(0, dl, true), InFlag, dl); 4379 InFlag = Chain.getValue(1); 4380 } 4381 4382 // Is this global address that of a function that can be called by name? (as 4383 // opposed to something that must hold a descriptor for an indirect call). 4384 static bool isFunctionGlobalAddress(SDValue Callee) { 4385 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4386 if (Callee.getOpcode() == ISD::GlobalTLSAddress || 4387 Callee.getOpcode() == ISD::TargetGlobalTLSAddress) 4388 return false; 4389 4390 return G->getGlobal()->getValueType()->isFunctionTy(); 4391 } 4392 4393 return false; 4394 } 4395 4396 static unsigned 4397 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, 4398 SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, 4399 bool isPatchPoint, bool hasNest, 4400 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, 4401 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 4402 ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { 4403 4404 bool isPPC64 = Subtarget.isPPC64(); 4405 bool isSVR4ABI = Subtarget.isSVR4ABI(); 4406 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4407 4408 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4409 NodeTys.push_back(MVT::Other); // Returns a chain 4410 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 4411 4412 unsigned CallOpc = PPCISD::CALL; 4413 4414 bool needIndirectCall = true; 4415 if (!isSVR4ABI || !isPPC64) 4416 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 4417 // If this is an absolute destination address, use the munged value. 4418 Callee = SDValue(Dest, 0); 4419 needIndirectCall = false; 4420 } 4421 4422 // PC-relative references to external symbols should go through $stub, unless 4423 // we're building with the leopard linker or later, which automatically 4424 // synthesizes these stubs. 4425 const TargetMachine &TM = DAG.getTarget(); 4426 const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); 4427 const GlobalValue *GV = nullptr; 4428 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4429 GV = G->getGlobal(); 4430 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); 4431 bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; 4432 4433 if (isFunctionGlobalAddress(Callee)) { 4434 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); 4435 // A call to a TLS address is actually an indirect call to a 4436 // thread-specific pointer. 4437 unsigned OpFlags = 0; 4438 if (UsePlt) 4439 OpFlags = PPCII::MO_PLT; 4440 4441 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 4442 // every direct call is) turn it into a TargetGlobalAddress / 4443 // TargetExternalSymbol node so that legalize doesn't hack it. 4444 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 4445 Callee.getValueType(), 0, OpFlags); 4446 needIndirectCall = false; 4447 } 4448 4449 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4450 unsigned char OpFlags = 0; 4451 4452 if (UsePlt) 4453 OpFlags = PPCII::MO_PLT; 4454 4455 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 4456 OpFlags); 4457 needIndirectCall = false; 4458 } 4459 4460 if (isPatchPoint) { 4461 // We'll form an invalid direct call when lowering a patchpoint; the full 4462 // sequence for an indirect call is complicated, and many of the 4463 // instructions introduced might have side effects (and, thus, can't be 4464 // removed later). The call itself will be removed as soon as the 4465 // argument/return lowering is complete, so the fact that it has the wrong 4466 // kind of operands should not really matter. 4467 needIndirectCall = false; 4468 } 4469 4470 if (needIndirectCall) { 4471 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 4472 // to do the call, we can't use PPCISD::CALL. 4473 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 4474 4475 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 4476 // Function pointers in the 64-bit SVR4 ABI do not point to the function 4477 // entry point, but to the function descriptor (the function entry point 4478 // address is part of the function descriptor though). 4479 // The function descriptor is a three doubleword structure with the 4480 // following fields: function entry point, TOC base address and 4481 // environment pointer. 4482 // Thus for a call through a function pointer, the following actions need 4483 // to be performed: 4484 // 1. Save the TOC of the caller in the TOC save area of its stack 4485 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 4486 // 2. Load the address of the function entry point from the function 4487 // descriptor. 4488 // 3. Load the TOC of the callee from the function descriptor into r2. 4489 // 4. Load the environment pointer from the function descriptor into 4490 // r11. 4491 // 5. Branch to the function entry point address. 4492 // 6. On return of the callee, the TOC of the caller needs to be 4493 // restored (this is done in FinishCall()). 4494 // 4495 // The loads are scheduled at the beginning of the call sequence, and the 4496 // register copies are flagged together to ensure that no other 4497 // operations can be scheduled in between. E.g. without flagging the 4498 // copies together, a TOC access in the caller could be scheduled between 4499 // the assignment of the callee TOC and the branch to the callee, which 4500 // results in the TOC access going through the TOC of the callee instead 4501 // of going through the TOC of the caller, which leads to incorrect code. 4502 4503 // Load the address of the function entry point from the function 4504 // descriptor. 4505 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); 4506 if (LDChain.getValueType() == MVT::Glue) 4507 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); 4508 4509 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() 4510 ? (MachineMemOperand::MODereferenceable | 4511 MachineMemOperand::MOInvariant) 4512 : MachineMemOperand::MONone; 4513 4514 MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr); 4515 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, 4516 /* Alignment = */ 8, MMOFlags); 4517 4518 // Load environment pointer into r11. 4519 SDValue PtrOff = DAG.getIntPtrConstant(16, dl); 4520 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 4521 SDValue LoadEnvPtr = 4522 DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), 4523 /* Alignment = */ 8, MMOFlags); 4524 4525 SDValue TOCOff = DAG.getIntPtrConstant(8, dl); 4526 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 4527 SDValue TOCPtr = 4528 DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), 4529 /* Alignment = */ 8, MMOFlags); 4530 4531 setUsesTOCBasePtr(DAG); 4532 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, 4533 InFlag); 4534 Chain = TOCVal.getValue(0); 4535 InFlag = TOCVal.getValue(1); 4536 4537 // If the function call has an explicit 'nest' parameter, it takes the 4538 // place of the environment pointer. 4539 if (!hasNest) { 4540 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 4541 InFlag); 4542 4543 Chain = EnvVal.getValue(0); 4544 InFlag = EnvVal.getValue(1); 4545 } 4546 4547 MTCTROps[0] = Chain; 4548 MTCTROps[1] = LoadFuncPtr; 4549 MTCTROps[2] = InFlag; 4550 } 4551 4552 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 4553 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 4554 InFlag = Chain.getValue(1); 4555 4556 NodeTys.clear(); 4557 NodeTys.push_back(MVT::Other); 4558 NodeTys.push_back(MVT::Glue); 4559 Ops.push_back(Chain); 4560 CallOpc = PPCISD::BCTRL; 4561 Callee.setNode(nullptr); 4562 // Add use of X11 (holding environment pointer) 4563 if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) 4564 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 4565 // Add CTR register as callee so a bctr can be emitted later. 4566 if (isTailCall) 4567 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 4568 } 4569 4570 // If this is a direct call, pass the chain and the callee. 4571 if (Callee.getNode()) { 4572 Ops.push_back(Chain); 4573 Ops.push_back(Callee); 4574 } 4575 // If this is a tail call add stack pointer delta. 4576 if (isTailCall) 4577 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); 4578 4579 // Add argument registers to the end of the list so that they are known live 4580 // into the call. 4581 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 4582 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 4583 RegsToPass[i].second.getValueType())); 4584 4585 // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live 4586 // into the call. 4587 if (isSVR4ABI && isPPC64 && !isPatchPoint) { 4588 setUsesTOCBasePtr(DAG); 4589 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); 4590 } 4591 4592 return CallOpc; 4593 } 4594 4595 static 4596 bool isLocalCall(const SDValue &Callee) 4597 { 4598 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4599 return G->getGlobal()->isStrongDefinitionForLinker(); 4600 return false; 4601 } 4602 4603 SDValue PPCTargetLowering::LowerCallResult( 4604 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, 4605 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 4606 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 4607 4608 SmallVector<CCValAssign, 16> RVLocs; 4609 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 4610 *DAG.getContext()); 4611 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 4612 4613 // Copy all of the result registers out of their specified physreg. 4614 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 4615 CCValAssign &VA = RVLocs[i]; 4616 assert(VA.isRegLoc() && "Can only return in registers!"); 4617 4618 SDValue Val = DAG.getCopyFromReg(Chain, dl, 4619 VA.getLocReg(), VA.getLocVT(), InFlag); 4620 Chain = Val.getValue(1); 4621 InFlag = Val.getValue(2); 4622 4623 switch (VA.getLocInfo()) { 4624 default: llvm_unreachable("Unknown loc info!"); 4625 case CCValAssign::Full: break; 4626 case CCValAssign::AExt: 4627 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4628 break; 4629 case CCValAssign::ZExt: 4630 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 4631 DAG.getValueType(VA.getValVT())); 4632 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4633 break; 4634 case CCValAssign::SExt: 4635 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 4636 DAG.getValueType(VA.getValVT())); 4637 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4638 break; 4639 } 4640 4641 InVals.push_back(Val); 4642 } 4643 4644 return Chain; 4645 } 4646 4647 SDValue PPCTargetLowering::FinishCall( 4648 CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, 4649 bool isPatchPoint, bool hasNest, SelectionDAG &DAG, 4650 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, 4651 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, 4652 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, 4653 SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const { 4654 4655 std::vector<EVT> NodeTys; 4656 SmallVector<SDValue, 8> Ops; 4657 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, 4658 SPDiff, isTailCall, isPatchPoint, hasNest, 4659 RegsToPass, Ops, NodeTys, CS, Subtarget); 4660 4661 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 4662 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 4663 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 4664 4665 // When performing tail call optimization the callee pops its arguments off 4666 // the stack. Account for this here so these bytes can be pushed back on in 4667 // PPCFrameLowering::eliminateCallFramePseudoInstr. 4668 int BytesCalleePops = 4669 (CallConv == CallingConv::Fast && 4670 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 4671 4672 // Add a register mask operand representing the call-preserved registers. 4673 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4674 const uint32_t *Mask = 4675 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); 4676 assert(Mask && "Missing call preserved mask for calling convention"); 4677 Ops.push_back(DAG.getRegisterMask(Mask)); 4678 4679 if (InFlag.getNode()) 4680 Ops.push_back(InFlag); 4681 4682 // Emit tail call. 4683 if (isTailCall) { 4684 assert(((Callee.getOpcode() == ISD::Register && 4685 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 4686 Callee.getOpcode() == ISD::TargetExternalSymbol || 4687 Callee.getOpcode() == ISD::TargetGlobalAddress || 4688 isa<ConstantSDNode>(Callee)) && 4689 "Expecting an global address, external symbol, absolute value or register"); 4690 4691 DAG.getMachineFunction().getFrameInfo().setHasTailCall(); 4692 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 4693 } 4694 4695 // Add a NOP immediately after the branch instruction when using the 64-bit 4696 // SVR4 ABI. At link time, if caller and callee are in a different module and 4697 // thus have a different TOC, the call will be replaced with a call to a stub 4698 // function which saves the current TOC, loads the TOC of the callee and 4699 // branches to the callee. The NOP will be replaced with a load instruction 4700 // which restores the TOC of the caller from the TOC save slot of the current 4701 // stack frame. If caller and callee belong to the same module (and have the 4702 // same TOC), the NOP will remain unchanged. 4703 4704 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && 4705 !isPatchPoint) { 4706 if (CallOpc == PPCISD::BCTRL) { 4707 // This is a call through a function pointer. 4708 // Restore the caller TOC from the save area into R2. 4709 // See PrepareCall() for more information about calls through function 4710 // pointers in the 64-bit SVR4 ABI. 4711 // We are using a target-specific load with r2 hard coded, because the 4712 // result of a target-independent load would never go directly into r2, 4713 // since r2 is a reserved register (which prevents the register allocator 4714 // from allocating it), resulting in an additional register being 4715 // allocated and an unnecessary move instruction being generated. 4716 CallOpc = PPCISD::BCTRL_LOAD_TOC; 4717 4718 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4719 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 4720 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 4721 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 4722 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 4723 4724 // The address needs to go after the chain input but before the flag (or 4725 // any other variadic arguments). 4726 Ops.insert(std::next(Ops.begin()), AddTOC); 4727 } else if ((CallOpc == PPCISD::CALL) && 4728 (!isLocalCall(Callee) || 4729 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) 4730 // Otherwise insert NOP for non-local calls. 4731 CallOpc = PPCISD::CALL_NOP; 4732 } 4733 4734 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 4735 InFlag = Chain.getValue(1); 4736 4737 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4738 DAG.getIntPtrConstant(BytesCalleePops, dl, true), 4739 InFlag, dl); 4740 if (!Ins.empty()) 4741 InFlag = Chain.getValue(1); 4742 4743 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 4744 Ins, dl, DAG, InVals); 4745 } 4746 4747 SDValue 4748 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 4749 SmallVectorImpl<SDValue> &InVals) const { 4750 SelectionDAG &DAG = CLI.DAG; 4751 SDLoc &dl = CLI.DL; 4752 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4753 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4754 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4755 SDValue Chain = CLI.Chain; 4756 SDValue Callee = CLI.Callee; 4757 bool &isTailCall = CLI.IsTailCall; 4758 CallingConv::ID CallConv = CLI.CallConv; 4759 bool isVarArg = CLI.IsVarArg; 4760 bool isPatchPoint = CLI.IsPatchPoint; 4761 ImmutableCallSite *CS = CLI.CS; 4762 4763 if (isTailCall) { 4764 if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall())) 4765 isTailCall = false; 4766 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) 4767 isTailCall = 4768 IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, 4769 isVarArg, Outs, Ins, DAG); 4770 else 4771 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 4772 Ins, DAG); 4773 if (isTailCall) { 4774 ++NumTailCalls; 4775 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4776 ++NumSiblingCalls; 4777 4778 assert(isa<GlobalAddressSDNode>(Callee) && 4779 "Callee should be an llvm::Function object."); 4780 DEBUG( 4781 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); 4782 const unsigned Width = 80 - strlen("TCO caller: ") 4783 - strlen(", callee linkage: 0, 0"); 4784 dbgs() << "TCO caller: " 4785 << left_justify(DAG.getMachineFunction().getName(), Width) 4786 << ", callee linkage: " 4787 << GV->getVisibility() << ", " << GV->getLinkage() << "\n" 4788 ); 4789 } 4790 } 4791 4792 if (!isTailCall && CS && CS->isMustTailCall()) 4793 report_fatal_error("failed to perform tail call elimination on a call " 4794 "site marked musttail"); 4795 4796 // When long calls (i.e. indirect calls) are always used, calls are always 4797 // made via function pointer. If we have a function name, first translate it 4798 // into a pointer. 4799 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) && 4800 !isTailCall) 4801 Callee = LowerGlobalAddress(Callee, DAG); 4802 4803 if (Subtarget.isSVR4ABI()) { 4804 if (Subtarget.isPPC64()) 4805 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 4806 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4807 dl, DAG, InVals, CS); 4808 else 4809 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 4810 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4811 dl, DAG, InVals, CS); 4812 } 4813 4814 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 4815 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4816 dl, DAG, InVals, CS); 4817 } 4818 4819 SDValue PPCTargetLowering::LowerCall_32SVR4( 4820 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 4821 bool isTailCall, bool isPatchPoint, 4822 const SmallVectorImpl<ISD::OutputArg> &Outs, 4823 const SmallVectorImpl<SDValue> &OutVals, 4824 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 4825 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 4826 ImmutableCallSite *CS) const { 4827 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 4828 // of the 32-bit SVR4 ABI stack frame layout. 4829 4830 assert((CallConv == CallingConv::C || 4831 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 4832 4833 unsigned PtrByteSize = 4; 4834 4835 MachineFunction &MF = DAG.getMachineFunction(); 4836 4837 // Mark this function as potentially containing a function that contains a 4838 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4839 // and restoring the callers stack pointer in this functions epilog. This is 4840 // done because by tail calling the called function might overwrite the value 4841 // in this function's (MF) stack pointer stack slot 0(SP). 4842 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4843 CallConv == CallingConv::Fast) 4844 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4845 4846 // Count how many bytes are to be pushed on the stack, including the linkage 4847 // area, parameter list area and the part of the local variable space which 4848 // contains copies of aggregates which are passed by value. 4849 4850 // Assign locations to all of the outgoing arguments. 4851 SmallVector<CCValAssign, 16> ArgLocs; 4852 PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); 4853 4854 // Reserve space for the linkage area on the stack. 4855 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), 4856 PtrByteSize); 4857 if (useSoftFloat()) 4858 CCInfo.PreAnalyzeCallOperands(Outs); 4859 4860 if (isVarArg) { 4861 // Handle fixed and variable vector arguments differently. 4862 // Fixed vector arguments go into registers as long as registers are 4863 // available. Variable vector arguments always go into memory. 4864 unsigned NumArgs = Outs.size(); 4865 4866 for (unsigned i = 0; i != NumArgs; ++i) { 4867 MVT ArgVT = Outs[i].VT; 4868 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4869 bool Result; 4870 4871 if (Outs[i].IsFixed) { 4872 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 4873 CCInfo); 4874 } else { 4875 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 4876 ArgFlags, CCInfo); 4877 } 4878 4879 if (Result) { 4880 #ifndef NDEBUG 4881 errs() << "Call operand #" << i << " has unhandled type " 4882 << EVT(ArgVT).getEVTString() << "\n"; 4883 #endif 4884 llvm_unreachable(nullptr); 4885 } 4886 } 4887 } else { 4888 // All arguments are treated the same. 4889 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 4890 } 4891 CCInfo.clearWasPPCF128(); 4892 4893 // Assign locations to all of the outgoing aggregate by value arguments. 4894 SmallVector<CCValAssign, 16> ByValArgLocs; 4895 CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext()); 4896 4897 // Reserve stack space for the allocations in CCInfo. 4898 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 4899 4900 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 4901 4902 // Size of the linkage area, parameter list area and the part of the local 4903 // space variable where copies of aggregates which are passed by value are 4904 // stored. 4905 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 4906 4907 // Calculate by how many bytes the stack has to be adjusted in case of tail 4908 // call optimization. 4909 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4910 4911 // Adjust the stack pointer for the new arguments... 4912 // These operations are automatically eliminated by the prolog/epilog pass 4913 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4914 dl); 4915 SDValue CallSeqStart = Chain; 4916 4917 // Load the return address and frame pointer so it can be moved somewhere else 4918 // later. 4919 SDValue LROp, FPOp; 4920 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 4921 4922 // Set up a copy of the stack pointer for use loading and storing any 4923 // arguments that may not fit in the registers available for argument 4924 // passing. 4925 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4926 4927 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4928 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4929 SmallVector<SDValue, 8> MemOpChains; 4930 4931 bool seenFloatArg = false; 4932 // Walk the register/memloc assignments, inserting copies/loads. 4933 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 4934 i != e; 4935 ++i) { 4936 CCValAssign &VA = ArgLocs[i]; 4937 SDValue Arg = OutVals[i]; 4938 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4939 4940 if (Flags.isByVal()) { 4941 // Argument is an aggregate which is passed by value, thus we need to 4942 // create a copy of it in the local variable space of the current stack 4943 // frame (which is the stack frame of the caller) and pass the address of 4944 // this copy to the callee. 4945 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 4946 CCValAssign &ByValVA = ByValArgLocs[j++]; 4947 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 4948 4949 // Memory reserved in the local variable space of the callers stack frame. 4950 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 4951 4952 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 4953 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 4954 StackPtr, PtrOff); 4955 4956 // Create a copy of the argument in the local area of the current 4957 // stack frame. 4958 SDValue MemcpyCall = 4959 CreateCopyOfByValArgument(Arg, PtrOff, 4960 CallSeqStart.getNode()->getOperand(0), 4961 Flags, DAG, dl); 4962 4963 // This must go outside the CALLSEQ_START..END. 4964 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4965 CallSeqStart.getNode()->getOperand(1), 4966 SDLoc(MemcpyCall)); 4967 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4968 NewCallSeqStart.getNode()); 4969 Chain = CallSeqStart = NewCallSeqStart; 4970 4971 // Pass the address of the aggregate copy on the stack either in a 4972 // physical register or in the parameter list area of the current stack 4973 // frame to the callee. 4974 Arg = PtrOff; 4975 } 4976 4977 if (VA.isRegLoc()) { 4978 if (Arg.getValueType() == MVT::i1) 4979 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); 4980 4981 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 4982 // Put argument in a physical register. 4983 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 4984 } else { 4985 // Put argument in the parameter list area of the current stack frame. 4986 assert(VA.isMemLoc()); 4987 unsigned LocMemOffset = VA.getLocMemOffset(); 4988 4989 if (!isTailCall) { 4990 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 4991 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 4992 StackPtr, PtrOff); 4993 4994 MemOpChains.push_back( 4995 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 4996 } else { 4997 // Calculate and remember argument location. 4998 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 4999 TailCallArguments); 5000 } 5001 } 5002 } 5003 5004 if (!MemOpChains.empty()) 5005 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5006 5007 // Build a sequence of copy-to-reg nodes chained together with token chain 5008 // and flag operands which copy the outgoing args into the appropriate regs. 5009 SDValue InFlag; 5010 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5011 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5012 RegsToPass[i].second, InFlag); 5013 InFlag = Chain.getValue(1); 5014 } 5015 5016 // Set CR bit 6 to true if this is a vararg call with floating args passed in 5017 // registers. 5018 if (isVarArg) { 5019 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 5020 SDValue Ops[] = { Chain, InFlag }; 5021 5022 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 5023 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 5024 5025 InFlag = Chain.getValue(1); 5026 } 5027 5028 if (isTailCall) 5029 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 5030 TailCallArguments); 5031 5032 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 5033 /* unused except on PPC64 ELFv1 */ false, DAG, 5034 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 5035 NumBytes, Ins, InVals, CS); 5036 } 5037 5038 // Copy an argument into memory, being careful to do this outside the 5039 // call sequence for the call to which the argument belongs. 5040 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( 5041 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, 5042 SelectionDAG &DAG, const SDLoc &dl) const { 5043 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 5044 CallSeqStart.getNode()->getOperand(0), 5045 Flags, DAG, dl); 5046 // The MEMCPY must go outside the CALLSEQ_START..END. 5047 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 5048 CallSeqStart.getNode()->getOperand(1), 5049 SDLoc(MemcpyCall)); 5050 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 5051 NewCallSeqStart.getNode()); 5052 return NewCallSeqStart; 5053 } 5054 5055 SDValue PPCTargetLowering::LowerCall_64SVR4( 5056 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 5057 bool isTailCall, bool isPatchPoint, 5058 const SmallVectorImpl<ISD::OutputArg> &Outs, 5059 const SmallVectorImpl<SDValue> &OutVals, 5060 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5061 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 5062 ImmutableCallSite *CS) const { 5063 5064 bool isELFv2ABI = Subtarget.isELFv2ABI(); 5065 bool isLittleEndian = Subtarget.isLittleEndian(); 5066 unsigned NumOps = Outs.size(); 5067 bool hasNest = false; 5068 bool IsSibCall = false; 5069 5070 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5071 unsigned PtrByteSize = 8; 5072 5073 MachineFunction &MF = DAG.getMachineFunction(); 5074 5075 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) 5076 IsSibCall = true; 5077 5078 // Mark this function as potentially containing a function that contains a 5079 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5080 // and restoring the callers stack pointer in this functions epilog. This is 5081 // done because by tail calling the called function might overwrite the value 5082 // in this function's (MF) stack pointer stack slot 0(SP). 5083 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5084 CallConv == CallingConv::Fast) 5085 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5086 5087 assert(!(CallConv == CallingConv::Fast && isVarArg) && 5088 "fastcc not supported on varargs functions"); 5089 5090 // Count how many bytes are to be pushed on the stack, including the linkage 5091 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 5092 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 5093 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 5094 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 5095 unsigned NumBytes = LinkageSize; 5096 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5097 unsigned &QFPR_idx = FPR_idx; 5098 5099 static const MCPhysReg GPR[] = { 5100 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5101 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5102 }; 5103 static const MCPhysReg VR[] = { 5104 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5105 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5106 }; 5107 5108 const unsigned NumGPRs = array_lengthof(GPR); 5109 const unsigned NumFPRs = 13; 5110 const unsigned NumVRs = array_lengthof(VR); 5111 const unsigned NumQFPRs = NumFPRs; 5112 5113 // When using the fast calling convention, we don't provide backing for 5114 // arguments that will be in registers. 5115 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; 5116 5117 // Add up all the space actually used. 5118 for (unsigned i = 0; i != NumOps; ++i) { 5119 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5120 EVT ArgVT = Outs[i].VT; 5121 EVT OrigVT = Outs[i].ArgVT; 5122 5123 if (Flags.isNest()) 5124 continue; 5125 5126 if (CallConv == CallingConv::Fast) { 5127 if (Flags.isByVal()) 5128 NumGPRsUsed += (Flags.getByValSize()+7)/8; 5129 else 5130 switch (ArgVT.getSimpleVT().SimpleTy) { 5131 default: llvm_unreachable("Unexpected ValueType for argument!"); 5132 case MVT::i1: 5133 case MVT::i32: 5134 case MVT::i64: 5135 if (++NumGPRsUsed <= NumGPRs) 5136 continue; 5137 break; 5138 case MVT::v4i32: 5139 case MVT::v8i16: 5140 case MVT::v16i8: 5141 case MVT::v2f64: 5142 case MVT::v2i64: 5143 case MVT::v1i128: 5144 if (++NumVRsUsed <= NumVRs) 5145 continue; 5146 break; 5147 case MVT::v4f32: 5148 // When using QPX, this is handled like a FP register, otherwise, it 5149 // is an Altivec register. 5150 if (Subtarget.hasQPX()) { 5151 if (++NumFPRsUsed <= NumFPRs) 5152 continue; 5153 } else { 5154 if (++NumVRsUsed <= NumVRs) 5155 continue; 5156 } 5157 break; 5158 case MVT::f32: 5159 case MVT::f64: 5160 case MVT::v4f64: // QPX 5161 case MVT::v4i1: // QPX 5162 if (++NumFPRsUsed <= NumFPRs) 5163 continue; 5164 break; 5165 } 5166 } 5167 5168 /* Respect alignment of argument on the stack. */ 5169 unsigned Align = 5170 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5171 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 5172 5173 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5174 if (Flags.isInConsecutiveRegsLast()) 5175 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5176 } 5177 5178 unsigned NumBytesActuallyUsed = NumBytes; 5179 5180 // The prolog code of the callee may store up to 8 GPR argument registers to 5181 // the stack, allowing va_start to index over them in memory if its varargs. 5182 // Because we cannot tell if this is needed on the caller side, we have to 5183 // conservatively assume that it is needed. As such, make sure we have at 5184 // least enough stack space for the caller to store the 8 GPRs. 5185 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. 5186 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5187 5188 // Tail call needs the stack to be aligned. 5189 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5190 CallConv == CallingConv::Fast) 5191 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5192 5193 int SPDiff = 0; 5194 5195 // Calculate by how many bytes the stack has to be adjusted in case of tail 5196 // call optimization. 5197 if (!IsSibCall) 5198 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5199 5200 // To protect arguments on the stack from being clobbered in a tail call, 5201 // force all the loads to happen before doing any other lowering. 5202 if (isTailCall) 5203 Chain = DAG.getStackArgumentTokenFactor(Chain); 5204 5205 // Adjust the stack pointer for the new arguments... 5206 // These operations are automatically eliminated by the prolog/epilog pass 5207 if (!IsSibCall) 5208 Chain = DAG.getCALLSEQ_START(Chain, 5209 DAG.getIntPtrConstant(NumBytes, dl, true), dl); 5210 SDValue CallSeqStart = Chain; 5211 5212 // Load the return address and frame pointer so it can be move somewhere else 5213 // later. 5214 SDValue LROp, FPOp; 5215 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5216 5217 // Set up a copy of the stack pointer for use loading and storing any 5218 // arguments that may not fit in the registers available for argument 5219 // passing. 5220 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5221 5222 // Figure out which arguments are going to go in registers, and which in 5223 // memory. Also, if this is a vararg function, floating point operations 5224 // must be stored to our stack, and loaded into integer regs as well, if 5225 // any integer regs are available for argument passing. 5226 unsigned ArgOffset = LinkageSize; 5227 5228 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5229 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5230 5231 SmallVector<SDValue, 8> MemOpChains; 5232 for (unsigned i = 0; i != NumOps; ++i) { 5233 SDValue Arg = OutVals[i]; 5234 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5235 EVT ArgVT = Outs[i].VT; 5236 EVT OrigVT = Outs[i].ArgVT; 5237 5238 // PtrOff will be used to store the current argument to the stack if a 5239 // register cannot be found for it. 5240 SDValue PtrOff; 5241 5242 // We re-align the argument offset for each argument, except when using the 5243 // fast calling convention, when we need to make sure we do that only when 5244 // we'll actually use a stack slot. 5245 auto ComputePtrOff = [&]() { 5246 /* Respect alignment of argument on the stack. */ 5247 unsigned Align = 5248 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5249 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 5250 5251 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5252 5253 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5254 }; 5255 5256 if (CallConv != CallingConv::Fast) { 5257 ComputePtrOff(); 5258 5259 /* Compute GPR index associated with argument offset. */ 5260 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 5261 GPR_idx = std::min(GPR_idx, NumGPRs); 5262 } 5263 5264 // Promote integers to 64-bit values. 5265 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 5266 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5267 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5268 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5269 } 5270 5271 // FIXME memcpy is used way more than necessary. Correctness first. 5272 // Note: "by value" is code for passing a structure by value, not 5273 // basic types. 5274 if (Flags.isByVal()) { 5275 // Note: Size includes alignment padding, so 5276 // struct x { short a; char b; } 5277 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 5278 // These are the proper values we need for right-justifying the 5279 // aggregate in a parameter register. 5280 unsigned Size = Flags.getByValSize(); 5281 5282 // An empty aggregate parameter takes up no storage and no 5283 // registers. 5284 if (Size == 0) 5285 continue; 5286 5287 if (CallConv == CallingConv::Fast) 5288 ComputePtrOff(); 5289 5290 // All aggregates smaller than 8 bytes must be passed right-justified. 5291 if (Size==1 || Size==2 || Size==4) { 5292 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 5293 if (GPR_idx != NumGPRs) { 5294 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5295 MachinePointerInfo(), VT); 5296 MemOpChains.push_back(Load.getValue(1)); 5297 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5298 5299 ArgOffset += PtrByteSize; 5300 continue; 5301 } 5302 } 5303 5304 if (GPR_idx == NumGPRs && Size < 8) { 5305 SDValue AddPtr = PtrOff; 5306 if (!isLittleEndian) { 5307 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5308 PtrOff.getValueType()); 5309 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5310 } 5311 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5312 CallSeqStart, 5313 Flags, DAG, dl); 5314 ArgOffset += PtrByteSize; 5315 continue; 5316 } 5317 // Copy entire object into memory. There are cases where gcc-generated 5318 // code assumes it is there, even if it could be put entirely into 5319 // registers. (This is not what the doc says.) 5320 5321 // FIXME: The above statement is likely due to a misunderstanding of the 5322 // documents. All arguments must be copied into the parameter area BY 5323 // THE CALLEE in the event that the callee takes the address of any 5324 // formal argument. That has not yet been implemented. However, it is 5325 // reasonable to use the stack area as a staging area for the register 5326 // load. 5327 5328 // Skip this for small aggregates, as we will use the same slot for a 5329 // right-justified copy, below. 5330 if (Size >= 8) 5331 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5332 CallSeqStart, 5333 Flags, DAG, dl); 5334 5335 // When a register is available, pass a small aggregate right-justified. 5336 if (Size < 8 && GPR_idx != NumGPRs) { 5337 // The easiest way to get this right-justified in a register 5338 // is to copy the structure into the rightmost portion of a 5339 // local variable slot, then load the whole slot into the 5340 // register. 5341 // FIXME: The memcpy seems to produce pretty awful code for 5342 // small aggregates, particularly for packed ones. 5343 // FIXME: It would be preferable to use the slot in the 5344 // parameter save area instead of a new local variable. 5345 SDValue AddPtr = PtrOff; 5346 if (!isLittleEndian) { 5347 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); 5348 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5349 } 5350 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5351 CallSeqStart, 5352 Flags, DAG, dl); 5353 5354 // Load the slot into the register. 5355 SDValue Load = 5356 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); 5357 MemOpChains.push_back(Load.getValue(1)); 5358 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5359 5360 // Done with this argument. 5361 ArgOffset += PtrByteSize; 5362 continue; 5363 } 5364 5365 // For aggregates larger than PtrByteSize, copy the pieces of the 5366 // object that fit into registers from the parameter save area. 5367 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5368 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5369 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5370 if (GPR_idx != NumGPRs) { 5371 SDValue Load = 5372 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 5373 MemOpChains.push_back(Load.getValue(1)); 5374 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5375 ArgOffset += PtrByteSize; 5376 } else { 5377 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5378 break; 5379 } 5380 } 5381 continue; 5382 } 5383 5384 switch (Arg.getSimpleValueType().SimpleTy) { 5385 default: llvm_unreachable("Unexpected ValueType for argument!"); 5386 case MVT::i1: 5387 case MVT::i32: 5388 case MVT::i64: 5389 if (Flags.isNest()) { 5390 // The 'nest' parameter, if any, is passed in R11. 5391 RegsToPass.push_back(std::make_pair(PPC::X11, Arg)); 5392 hasNest = true; 5393 break; 5394 } 5395 5396 // These can be scalar arguments or elements of an integer array type 5397 // passed directly. Clang may use those instead of "byval" aggregate 5398 // types to avoid forcing arguments to memory unnecessarily. 5399 if (GPR_idx != NumGPRs) { 5400 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5401 } else { 5402 if (CallConv == CallingConv::Fast) 5403 ComputePtrOff(); 5404 5405 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5406 true, isTailCall, false, MemOpChains, 5407 TailCallArguments, dl); 5408 if (CallConv == CallingConv::Fast) 5409 ArgOffset += PtrByteSize; 5410 } 5411 if (CallConv != CallingConv::Fast) 5412 ArgOffset += PtrByteSize; 5413 break; 5414 case MVT::f32: 5415 case MVT::f64: { 5416 // These can be scalar arguments or elements of a float array type 5417 // passed directly. The latter are used to implement ELFv2 homogenous 5418 // float aggregates. 5419 5420 // Named arguments go into FPRs first, and once they overflow, the 5421 // remaining arguments go into GPRs and then the parameter save area. 5422 // Unnamed arguments for vararg functions always go to GPRs and 5423 // then the parameter save area. For now, put all arguments to vararg 5424 // routines always in both locations (FPR *and* GPR or stack slot). 5425 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 5426 bool NeededLoad = false; 5427 5428 // First load the argument into the next available FPR. 5429 if (FPR_idx != NumFPRs) 5430 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5431 5432 // Next, load the argument into GPR or stack slot if needed. 5433 if (!NeedGPROrStack) 5434 ; 5435 else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) { 5436 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 5437 // once we support fp <-> gpr moves. 5438 5439 // In the non-vararg case, this can only ever happen in the 5440 // presence of f32 array types, since otherwise we never run 5441 // out of FPRs before running out of GPRs. 5442 SDValue ArgVal; 5443 5444 // Double values are always passed in a single GPR. 5445 if (Arg.getValueType() != MVT::f32) { 5446 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 5447 5448 // Non-array float values are extended and passed in a GPR. 5449 } else if (!Flags.isInConsecutiveRegs()) { 5450 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5451 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5452 5453 // If we have an array of floats, we collect every odd element 5454 // together with its predecessor into one GPR. 5455 } else if (ArgOffset % PtrByteSize != 0) { 5456 SDValue Lo, Hi; 5457 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 5458 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5459 if (!isLittleEndian) 5460 std::swap(Lo, Hi); 5461 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 5462 5463 // The final element, if even, goes into the first half of a GPR. 5464 } else if (Flags.isInConsecutiveRegsLast()) { 5465 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5466 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5467 if (!isLittleEndian) 5468 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 5469 DAG.getConstant(32, dl, MVT::i32)); 5470 5471 // Non-final even elements are skipped; they will be handled 5472 // together the with subsequent argument on the next go-around. 5473 } else 5474 ArgVal = SDValue(); 5475 5476 if (ArgVal.getNode()) 5477 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); 5478 } else { 5479 if (CallConv == CallingConv::Fast) 5480 ComputePtrOff(); 5481 5482 // Single-precision floating-point values are mapped to the 5483 // second (rightmost) word of the stack doubleword. 5484 if (Arg.getValueType() == MVT::f32 && 5485 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 5486 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 5487 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5488 } 5489 5490 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5491 true, isTailCall, false, MemOpChains, 5492 TailCallArguments, dl); 5493 5494 NeededLoad = true; 5495 } 5496 // When passing an array of floats, the array occupies consecutive 5497 // space in the argument area; only round up to the next doubleword 5498 // at the end of the array. Otherwise, each float takes 8 bytes. 5499 if (CallConv != CallingConv::Fast || NeededLoad) { 5500 ArgOffset += (Arg.getValueType() == MVT::f32 && 5501 Flags.isInConsecutiveRegs()) ? 4 : 8; 5502 if (Flags.isInConsecutiveRegsLast()) 5503 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5504 } 5505 break; 5506 } 5507 case MVT::v4f32: 5508 case MVT::v4i32: 5509 case MVT::v8i16: 5510 case MVT::v16i8: 5511 case MVT::v2f64: 5512 case MVT::v2i64: 5513 case MVT::v1i128: 5514 if (!Subtarget.hasQPX()) { 5515 // These can be scalar arguments or elements of a vector array type 5516 // passed directly. The latter are used to implement ELFv2 homogenous 5517 // vector aggregates. 5518 5519 // For a varargs call, named arguments go into VRs or on the stack as 5520 // usual; unnamed arguments always go to the stack or the corresponding 5521 // GPRs when within range. For now, we always put the value in both 5522 // locations (or even all three). 5523 if (isVarArg) { 5524 // We could elide this store in the case where the object fits 5525 // entirely in R registers. Maybe later. 5526 SDValue Store = 5527 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5528 MemOpChains.push_back(Store); 5529 if (VR_idx != NumVRs) { 5530 SDValue Load = 5531 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 5532 MemOpChains.push_back(Load.getValue(1)); 5533 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5534 } 5535 ArgOffset += 16; 5536 for (unsigned i=0; i<16; i+=PtrByteSize) { 5537 if (GPR_idx == NumGPRs) 5538 break; 5539 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5540 DAG.getConstant(i, dl, PtrVT)); 5541 SDValue Load = 5542 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5543 MemOpChains.push_back(Load.getValue(1)); 5544 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5545 } 5546 break; 5547 } 5548 5549 // Non-varargs Altivec params go into VRs or on the stack. 5550 if (VR_idx != NumVRs) { 5551 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5552 } else { 5553 if (CallConv == CallingConv::Fast) 5554 ComputePtrOff(); 5555 5556 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5557 true, isTailCall, true, MemOpChains, 5558 TailCallArguments, dl); 5559 if (CallConv == CallingConv::Fast) 5560 ArgOffset += 16; 5561 } 5562 5563 if (CallConv != CallingConv::Fast) 5564 ArgOffset += 16; 5565 break; 5566 } // not QPX 5567 5568 assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && 5569 "Invalid QPX parameter type"); 5570 5571 /* fall through */ 5572 case MVT::v4f64: 5573 case MVT::v4i1: { 5574 bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; 5575 if (isVarArg) { 5576 // We could elide this store in the case where the object fits 5577 // entirely in R registers. Maybe later. 5578 SDValue Store = 5579 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5580 MemOpChains.push_back(Store); 5581 if (QFPR_idx != NumQFPRs) { 5582 SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store, 5583 PtrOff, MachinePointerInfo()); 5584 MemOpChains.push_back(Load.getValue(1)); 5585 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); 5586 } 5587 ArgOffset += (IsF32 ? 16 : 32); 5588 for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) { 5589 if (GPR_idx == NumGPRs) 5590 break; 5591 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5592 DAG.getConstant(i, dl, PtrVT)); 5593 SDValue Load = 5594 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5595 MemOpChains.push_back(Load.getValue(1)); 5596 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5597 } 5598 break; 5599 } 5600 5601 // Non-varargs QPX params go into registers or on the stack. 5602 if (QFPR_idx != NumQFPRs) { 5603 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); 5604 } else { 5605 if (CallConv == CallingConv::Fast) 5606 ComputePtrOff(); 5607 5608 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5609 true, isTailCall, true, MemOpChains, 5610 TailCallArguments, dl); 5611 if (CallConv == CallingConv::Fast) 5612 ArgOffset += (IsF32 ? 16 : 32); 5613 } 5614 5615 if (CallConv != CallingConv::Fast) 5616 ArgOffset += (IsF32 ? 16 : 32); 5617 break; 5618 } 5619 } 5620 } 5621 5622 assert(NumBytesActuallyUsed == ArgOffset); 5623 (void)NumBytesActuallyUsed; 5624 5625 if (!MemOpChains.empty()) 5626 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5627 5628 // Check if this is an indirect call (MTCTR/BCTRL). 5629 // See PrepareCall() for more information about calls through function 5630 // pointers in the 64-bit SVR4 ABI. 5631 if (!isTailCall && !isPatchPoint && 5632 !isFunctionGlobalAddress(Callee) && 5633 !isa<ExternalSymbolSDNode>(Callee)) { 5634 // Load r2 into a virtual register and store it to the TOC save area. 5635 setUsesTOCBasePtr(DAG); 5636 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 5637 // TOC save area offset. 5638 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 5639 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 5640 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5641 Chain = DAG.getStore( 5642 Val.getValue(1), dl, Val, AddPtr, 5643 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); 5644 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 5645 // This does not mean the MTCTR instruction must use R12; it's easier 5646 // to model this as an extra parameter, so do that. 5647 if (isELFv2ABI && !isPatchPoint) 5648 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 5649 } 5650 5651 // Build a sequence of copy-to-reg nodes chained together with token chain 5652 // and flag operands which copy the outgoing args into the appropriate regs. 5653 SDValue InFlag; 5654 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5655 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5656 RegsToPass[i].second, InFlag); 5657 InFlag = Chain.getValue(1); 5658 } 5659 5660 if (isTailCall && !IsSibCall) 5661 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 5662 TailCallArguments); 5663 5664 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest, 5665 DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, 5666 SPDiff, NumBytes, Ins, InVals, CS); 5667 } 5668 5669 SDValue PPCTargetLowering::LowerCall_Darwin( 5670 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 5671 bool isTailCall, bool isPatchPoint, 5672 const SmallVectorImpl<ISD::OutputArg> &Outs, 5673 const SmallVectorImpl<SDValue> &OutVals, 5674 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5675 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 5676 ImmutableCallSite *CS) const { 5677 5678 unsigned NumOps = Outs.size(); 5679 5680 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5681 bool isPPC64 = PtrVT == MVT::i64; 5682 unsigned PtrByteSize = isPPC64 ? 8 : 4; 5683 5684 MachineFunction &MF = DAG.getMachineFunction(); 5685 5686 // Mark this function as potentially containing a function that contains a 5687 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5688 // and restoring the callers stack pointer in this functions epilog. This is 5689 // done because by tail calling the called function might overwrite the value 5690 // in this function's (MF) stack pointer stack slot 0(SP). 5691 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5692 CallConv == CallingConv::Fast) 5693 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5694 5695 // Count how many bytes are to be pushed on the stack, including the linkage 5696 // area, and parameter passing area. We start with 24/48 bytes, which is 5697 // prereserved space for [SP][CR][LR][3 x unused]. 5698 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 5699 unsigned NumBytes = LinkageSize; 5700 5701 // Add up all the space actually used. 5702 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 5703 // they all go in registers, but we must reserve stack space for them for 5704 // possible use by the caller. In varargs or 64-bit calls, parameters are 5705 // assigned stack space in order, with padding so Altivec parameters are 5706 // 16-byte aligned. 5707 unsigned nAltivecParamsAtEnd = 0; 5708 for (unsigned i = 0; i != NumOps; ++i) { 5709 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5710 EVT ArgVT = Outs[i].VT; 5711 // Varargs Altivec parameters are padded to a 16 byte boundary. 5712 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 5713 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 5714 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 5715 if (!isVarArg && !isPPC64) { 5716 // Non-varargs Altivec parameters go after all the non-Altivec 5717 // parameters; handle those later so we know how much padding we need. 5718 nAltivecParamsAtEnd++; 5719 continue; 5720 } 5721 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 5722 NumBytes = ((NumBytes+15)/16)*16; 5723 } 5724 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5725 } 5726 5727 // Allow for Altivec parameters at the end, if needed. 5728 if (nAltivecParamsAtEnd) { 5729 NumBytes = ((NumBytes+15)/16)*16; 5730 NumBytes += 16*nAltivecParamsAtEnd; 5731 } 5732 5733 // The prolog code of the callee may store up to 8 GPR argument registers to 5734 // the stack, allowing va_start to index over them in memory if its varargs. 5735 // Because we cannot tell if this is needed on the caller side, we have to 5736 // conservatively assume that it is needed. As such, make sure we have at 5737 // least enough stack space for the caller to store the 8 GPRs. 5738 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5739 5740 // Tail call needs the stack to be aligned. 5741 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5742 CallConv == CallingConv::Fast) 5743 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5744 5745 // Calculate by how many bytes the stack has to be adjusted in case of tail 5746 // call optimization. 5747 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5748 5749 // To protect arguments on the stack from being clobbered in a tail call, 5750 // force all the loads to happen before doing any other lowering. 5751 if (isTailCall) 5752 Chain = DAG.getStackArgumentTokenFactor(Chain); 5753 5754 // Adjust the stack pointer for the new arguments... 5755 // These operations are automatically eliminated by the prolog/epilog pass 5756 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 5757 dl); 5758 SDValue CallSeqStart = Chain; 5759 5760 // Load the return address and frame pointer so it can be move somewhere else 5761 // later. 5762 SDValue LROp, FPOp; 5763 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5764 5765 // Set up a copy of the stack pointer for use loading and storing any 5766 // arguments that may not fit in the registers available for argument 5767 // passing. 5768 SDValue StackPtr; 5769 if (isPPC64) 5770 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5771 else 5772 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 5773 5774 // Figure out which arguments are going to go in registers, and which in 5775 // memory. Also, if this is a vararg function, floating point operations 5776 // must be stored to our stack, and loaded into integer regs as well, if 5777 // any integer regs are available for argument passing. 5778 unsigned ArgOffset = LinkageSize; 5779 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5780 5781 static const MCPhysReg GPR_32[] = { // 32-bit registers. 5782 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 5783 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 5784 }; 5785 static const MCPhysReg GPR_64[] = { // 64-bit registers. 5786 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5787 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5788 }; 5789 static const MCPhysReg VR[] = { 5790 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5791 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5792 }; 5793 const unsigned NumGPRs = array_lengthof(GPR_32); 5794 const unsigned NumFPRs = 13; 5795 const unsigned NumVRs = array_lengthof(VR); 5796 5797 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 5798 5799 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5800 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5801 5802 SmallVector<SDValue, 8> MemOpChains; 5803 for (unsigned i = 0; i != NumOps; ++i) { 5804 SDValue Arg = OutVals[i]; 5805 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5806 5807 // PtrOff will be used to store the current argument to the stack if a 5808 // register cannot be found for it. 5809 SDValue PtrOff; 5810 5811 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5812 5813 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5814 5815 // On PPC64, promote integers to 64-bit values. 5816 if (isPPC64 && Arg.getValueType() == MVT::i32) { 5817 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5818 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5819 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5820 } 5821 5822 // FIXME memcpy is used way more than necessary. Correctness first. 5823 // Note: "by value" is code for passing a structure by value, not 5824 // basic types. 5825 if (Flags.isByVal()) { 5826 unsigned Size = Flags.getByValSize(); 5827 // Very small objects are passed right-justified. Everything else is 5828 // passed left-justified. 5829 if (Size==1 || Size==2) { 5830 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 5831 if (GPR_idx != NumGPRs) { 5832 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5833 MachinePointerInfo(), VT); 5834 MemOpChains.push_back(Load.getValue(1)); 5835 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5836 5837 ArgOffset += PtrByteSize; 5838 } else { 5839 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5840 PtrOff.getValueType()); 5841 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5842 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5843 CallSeqStart, 5844 Flags, DAG, dl); 5845 ArgOffset += PtrByteSize; 5846 } 5847 continue; 5848 } 5849 // Copy entire object into memory. There are cases where gcc-generated 5850 // code assumes it is there, even if it could be put entirely into 5851 // registers. (This is not what the doc says.) 5852 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5853 CallSeqStart, 5854 Flags, DAG, dl); 5855 5856 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 5857 // copy the pieces of the object that fit into registers from the 5858 // parameter save area. 5859 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5860 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5861 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5862 if (GPR_idx != NumGPRs) { 5863 SDValue Load = 5864 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 5865 MemOpChains.push_back(Load.getValue(1)); 5866 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5867 ArgOffset += PtrByteSize; 5868 } else { 5869 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5870 break; 5871 } 5872 } 5873 continue; 5874 } 5875 5876 switch (Arg.getSimpleValueType().SimpleTy) { 5877 default: llvm_unreachable("Unexpected ValueType for argument!"); 5878 case MVT::i1: 5879 case MVT::i32: 5880 case MVT::i64: 5881 if (GPR_idx != NumGPRs) { 5882 if (Arg.getValueType() == MVT::i1) 5883 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 5884 5885 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5886 } else { 5887 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5888 isPPC64, isTailCall, false, MemOpChains, 5889 TailCallArguments, dl); 5890 } 5891 ArgOffset += PtrByteSize; 5892 break; 5893 case MVT::f32: 5894 case MVT::f64: 5895 if (FPR_idx != NumFPRs) { 5896 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5897 5898 if (isVarArg) { 5899 SDValue Store = 5900 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5901 MemOpChains.push_back(Store); 5902 5903 // Float varargs are always shadowed in available integer registers 5904 if (GPR_idx != NumGPRs) { 5905 SDValue Load = 5906 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 5907 MemOpChains.push_back(Load.getValue(1)); 5908 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5909 } 5910 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 5911 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 5912 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5913 SDValue Load = 5914 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 5915 MemOpChains.push_back(Load.getValue(1)); 5916 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5917 } 5918 } else { 5919 // If we have any FPRs remaining, we may also have GPRs remaining. 5920 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 5921 // GPRs. 5922 if (GPR_idx != NumGPRs) 5923 ++GPR_idx; 5924 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 5925 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 5926 ++GPR_idx; 5927 } 5928 } else 5929 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5930 isPPC64, isTailCall, false, MemOpChains, 5931 TailCallArguments, dl); 5932 if (isPPC64) 5933 ArgOffset += 8; 5934 else 5935 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 5936 break; 5937 case MVT::v4f32: 5938 case MVT::v4i32: 5939 case MVT::v8i16: 5940 case MVT::v16i8: 5941 if (isVarArg) { 5942 // These go aligned on the stack, or in the corresponding R registers 5943 // when within range. The Darwin PPC ABI doc claims they also go in 5944 // V registers; in fact gcc does this only for arguments that are 5945 // prototyped, not for those that match the ... We do it for all 5946 // arguments, seems to work. 5947 while (ArgOffset % 16 !=0) { 5948 ArgOffset += PtrByteSize; 5949 if (GPR_idx != NumGPRs) 5950 GPR_idx++; 5951 } 5952 // We could elide this store in the case where the object fits 5953 // entirely in R registers. Maybe later. 5954 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 5955 DAG.getConstant(ArgOffset, dl, PtrVT)); 5956 SDValue Store = 5957 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5958 MemOpChains.push_back(Store); 5959 if (VR_idx != NumVRs) { 5960 SDValue Load = 5961 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 5962 MemOpChains.push_back(Load.getValue(1)); 5963 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5964 } 5965 ArgOffset += 16; 5966 for (unsigned i=0; i<16; i+=PtrByteSize) { 5967 if (GPR_idx == NumGPRs) 5968 break; 5969 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5970 DAG.getConstant(i, dl, PtrVT)); 5971 SDValue Load = 5972 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5973 MemOpChains.push_back(Load.getValue(1)); 5974 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5975 } 5976 break; 5977 } 5978 5979 // Non-varargs Altivec params generally go in registers, but have 5980 // stack space allocated at the end. 5981 if (VR_idx != NumVRs) { 5982 // Doesn't have GPR space allocated. 5983 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5984 } else if (nAltivecParamsAtEnd==0) { 5985 // We are emitting Altivec params in order. 5986 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5987 isPPC64, isTailCall, true, MemOpChains, 5988 TailCallArguments, dl); 5989 ArgOffset += 16; 5990 } 5991 break; 5992 } 5993 } 5994 // If all Altivec parameters fit in registers, as they usually do, 5995 // they get stack space following the non-Altivec parameters. We 5996 // don't track this here because nobody below needs it. 5997 // If there are more Altivec parameters than fit in registers emit 5998 // the stores here. 5999 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 6000 unsigned j = 0; 6001 // Offset is aligned; skip 1st 12 params which go in V registers. 6002 ArgOffset = ((ArgOffset+15)/16)*16; 6003 ArgOffset += 12*16; 6004 for (unsigned i = 0; i != NumOps; ++i) { 6005 SDValue Arg = OutVals[i]; 6006 EVT ArgType = Outs[i].VT; 6007 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 6008 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 6009 if (++j > NumVRs) { 6010 SDValue PtrOff; 6011 // We are emitting Altivec params in order. 6012 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6013 isPPC64, isTailCall, true, MemOpChains, 6014 TailCallArguments, dl); 6015 ArgOffset += 16; 6016 } 6017 } 6018 } 6019 } 6020 6021 if (!MemOpChains.empty()) 6022 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 6023 6024 // On Darwin, R12 must contain the address of an indirect callee. This does 6025 // not mean the MTCTR instruction must use R12; it's easier to model this as 6026 // an extra parameter, so do that. 6027 if (!isTailCall && 6028 !isFunctionGlobalAddress(Callee) && 6029 !isa<ExternalSymbolSDNode>(Callee) && 6030 !isBLACompatibleAddress(Callee, DAG)) 6031 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 6032 PPC::R12), Callee)); 6033 6034 // Build a sequence of copy-to-reg nodes chained together with token chain 6035 // and flag operands which copy the outgoing args into the appropriate regs. 6036 SDValue InFlag; 6037 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 6038 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 6039 RegsToPass[i].second, InFlag); 6040 InFlag = Chain.getValue(1); 6041 } 6042 6043 if (isTailCall) 6044 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 6045 TailCallArguments); 6046 6047 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 6048 /* unused except on PPC64 ELFv1 */ false, DAG, 6049 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 6050 NumBytes, Ins, InVals, CS); 6051 } 6052 6053 bool 6054 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 6055 MachineFunction &MF, bool isVarArg, 6056 const SmallVectorImpl<ISD::OutputArg> &Outs, 6057 LLVMContext &Context) const { 6058 SmallVector<CCValAssign, 16> RVLocs; 6059 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 6060 return CCInfo.CheckReturn(Outs, RetCC_PPC); 6061 } 6062 6063 SDValue 6064 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 6065 bool isVarArg, 6066 const SmallVectorImpl<ISD::OutputArg> &Outs, 6067 const SmallVectorImpl<SDValue> &OutVals, 6068 const SDLoc &dl, SelectionDAG &DAG) const { 6069 6070 SmallVector<CCValAssign, 16> RVLocs; 6071 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 6072 *DAG.getContext()); 6073 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 6074 6075 SDValue Flag; 6076 SmallVector<SDValue, 4> RetOps(1, Chain); 6077 6078 // Copy the result values into the output registers. 6079 for (unsigned i = 0; i != RVLocs.size(); ++i) { 6080 CCValAssign &VA = RVLocs[i]; 6081 assert(VA.isRegLoc() && "Can only return in registers!"); 6082 6083 SDValue Arg = OutVals[i]; 6084 6085 switch (VA.getLocInfo()) { 6086 default: llvm_unreachable("Unknown loc info!"); 6087 case CCValAssign::Full: break; 6088 case CCValAssign::AExt: 6089 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 6090 break; 6091 case CCValAssign::ZExt: 6092 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 6093 break; 6094 case CCValAssign::SExt: 6095 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 6096 break; 6097 } 6098 6099 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 6100 Flag = Chain.getValue(1); 6101 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 6102 } 6103 6104 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6105 const MCPhysReg *I = 6106 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); 6107 if (I) { 6108 for (; *I; ++I) { 6109 6110 if (PPC::G8RCRegClass.contains(*I)) 6111 RetOps.push_back(DAG.getRegister(*I, MVT::i64)); 6112 else if (PPC::F8RCRegClass.contains(*I)) 6113 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); 6114 else if (PPC::CRRCRegClass.contains(*I)) 6115 RetOps.push_back(DAG.getRegister(*I, MVT::i1)); 6116 else if (PPC::VRRCRegClass.contains(*I)) 6117 RetOps.push_back(DAG.getRegister(*I, MVT::Other)); 6118 else 6119 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 6120 } 6121 } 6122 6123 RetOps[0] = Chain; // Update chain. 6124 6125 // Add the flag if we have it. 6126 if (Flag.getNode()) 6127 RetOps.push_back(Flag); 6128 6129 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 6130 } 6131 6132 SDValue 6133 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, 6134 SelectionDAG &DAG) const { 6135 SDLoc dl(Op); 6136 6137 // Get the corect type for integers. 6138 EVT IntVT = Op.getValueType(); 6139 6140 // Get the inputs. 6141 SDValue Chain = Op.getOperand(0); 6142 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6143 // Build a DYNAREAOFFSET node. 6144 SDValue Ops[2] = {Chain, FPSIdx}; 6145 SDVTList VTs = DAG.getVTList(IntVT); 6146 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); 6147 } 6148 6149 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, 6150 SelectionDAG &DAG) const { 6151 // When we pop the dynamic allocation we need to restore the SP link. 6152 SDLoc dl(Op); 6153 6154 // Get the corect type for pointers. 6155 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6156 6157 // Construct the stack pointer operand. 6158 bool isPPC64 = Subtarget.isPPC64(); 6159 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 6160 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 6161 6162 // Get the operands for the STACKRESTORE. 6163 SDValue Chain = Op.getOperand(0); 6164 SDValue SaveSP = Op.getOperand(1); 6165 6166 // Load the old link SP. 6167 SDValue LoadLinkSP = 6168 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); 6169 6170 // Restore the stack pointer. 6171 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 6172 6173 // Store the old link SP. 6174 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); 6175 } 6176 6177 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { 6178 MachineFunction &MF = DAG.getMachineFunction(); 6179 bool isPPC64 = Subtarget.isPPC64(); 6180 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6181 6182 // Get current frame pointer save index. The users of this index will be 6183 // primarily DYNALLOC instructions. 6184 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6185 int RASI = FI->getReturnAddrSaveIndex(); 6186 6187 // If the frame pointer save index hasn't been defined yet. 6188 if (!RASI) { 6189 // Find out what the fix offset of the frame pointer save area. 6190 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); 6191 // Allocate the frame index for frame pointer save area. 6192 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false); 6193 // Save the result. 6194 FI->setReturnAddrSaveIndex(RASI); 6195 } 6196 return DAG.getFrameIndex(RASI, PtrVT); 6197 } 6198 6199 SDValue 6200 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 6201 MachineFunction &MF = DAG.getMachineFunction(); 6202 bool isPPC64 = Subtarget.isPPC64(); 6203 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6204 6205 // Get current frame pointer save index. The users of this index will be 6206 // primarily DYNALLOC instructions. 6207 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6208 int FPSI = FI->getFramePointerSaveIndex(); 6209 6210 // If the frame pointer save index hasn't been defined yet. 6211 if (!FPSI) { 6212 // Find out what the fix offset of the frame pointer save area. 6213 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); 6214 // Allocate the frame index for frame pointer save area. 6215 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 6216 // Save the result. 6217 FI->setFramePointerSaveIndex(FPSI); 6218 } 6219 return DAG.getFrameIndex(FPSI, PtrVT); 6220 } 6221 6222 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 6223 SelectionDAG &DAG) const { 6224 // Get the inputs. 6225 SDValue Chain = Op.getOperand(0); 6226 SDValue Size = Op.getOperand(1); 6227 SDLoc dl(Op); 6228 6229 // Get the corect type for pointers. 6230 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6231 // Negate the size. 6232 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 6233 DAG.getConstant(0, dl, PtrVT), Size); 6234 // Construct a node for the frame pointer save index. 6235 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6236 // Build a DYNALLOC node. 6237 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 6238 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 6239 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 6240 } 6241 6242 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op, 6243 SelectionDAG &DAG) const { 6244 MachineFunction &MF = DAG.getMachineFunction(); 6245 6246 bool isPPC64 = Subtarget.isPPC64(); 6247 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6248 6249 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false); 6250 return DAG.getFrameIndex(FI, PtrVT); 6251 } 6252 6253 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 6254 SelectionDAG &DAG) const { 6255 SDLoc DL(Op); 6256 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 6257 DAG.getVTList(MVT::i32, MVT::Other), 6258 Op.getOperand(0), Op.getOperand(1)); 6259 } 6260 6261 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 6262 SelectionDAG &DAG) const { 6263 SDLoc DL(Op); 6264 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 6265 Op.getOperand(0), Op.getOperand(1)); 6266 } 6267 6268 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 6269 if (Op.getValueType().isVector()) 6270 return LowerVectorLoad(Op, DAG); 6271 6272 assert(Op.getValueType() == MVT::i1 && 6273 "Custom lowering only for i1 loads"); 6274 6275 // First, load 8 bits into 32 bits, then truncate to 1 bit. 6276 6277 SDLoc dl(Op); 6278 LoadSDNode *LD = cast<LoadSDNode>(Op); 6279 6280 SDValue Chain = LD->getChain(); 6281 SDValue BasePtr = LD->getBasePtr(); 6282 MachineMemOperand *MMO = LD->getMemOperand(); 6283 6284 SDValue NewLD = 6285 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, 6286 BasePtr, MVT::i8, MMO); 6287 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 6288 6289 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 6290 return DAG.getMergeValues(Ops, dl); 6291 } 6292 6293 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 6294 if (Op.getOperand(1).getValueType().isVector()) 6295 return LowerVectorStore(Op, DAG); 6296 6297 assert(Op.getOperand(1).getValueType() == MVT::i1 && 6298 "Custom lowering only for i1 stores"); 6299 6300 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 6301 6302 SDLoc dl(Op); 6303 StoreSDNode *ST = cast<StoreSDNode>(Op); 6304 6305 SDValue Chain = ST->getChain(); 6306 SDValue BasePtr = ST->getBasePtr(); 6307 SDValue Value = ST->getValue(); 6308 MachineMemOperand *MMO = ST->getMemOperand(); 6309 6310 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), 6311 Value); 6312 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 6313 } 6314 6315 // FIXME: Remove this once the ANDI glue bug is fixed: 6316 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 6317 assert(Op.getValueType() == MVT::i1 && 6318 "Custom lowering only for i1 results"); 6319 6320 SDLoc DL(Op); 6321 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 6322 Op.getOperand(0)); 6323 } 6324 6325 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 6326 /// possible. 6327 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 6328 // Not FP? Not a fsel. 6329 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 6330 !Op.getOperand(2).getValueType().isFloatingPoint()) 6331 return Op; 6332 6333 // We might be able to do better than this under some circumstances, but in 6334 // general, fsel-based lowering of select is a finite-math-only optimization. 6335 // For more information, see section F.3 of the 2.06 ISA specification. 6336 if (!DAG.getTarget().Options.NoInfsFPMath || 6337 !DAG.getTarget().Options.NoNaNsFPMath) 6338 return Op; 6339 // TODO: Propagate flags from the select rather than global settings. 6340 SDNodeFlags Flags; 6341 Flags.setNoInfs(true); 6342 Flags.setNoNaNs(true); 6343 6344 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 6345 6346 EVT ResVT = Op.getValueType(); 6347 EVT CmpVT = Op.getOperand(0).getValueType(); 6348 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6349 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 6350 SDLoc dl(Op); 6351 6352 // If the RHS of the comparison is a 0.0, we don't need to do the 6353 // subtraction at all. 6354 SDValue Sel1; 6355 if (isFloatingPointZero(RHS)) 6356 switch (CC) { 6357 default: break; // SETUO etc aren't handled by fsel. 6358 case ISD::SETNE: 6359 std::swap(TV, FV); 6360 case ISD::SETEQ: 6361 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6362 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6363 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6364 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6365 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6366 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6367 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 6368 case ISD::SETULT: 6369 case ISD::SETLT: 6370 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6371 case ISD::SETOGE: 6372 case ISD::SETGE: 6373 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6374 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6375 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6376 case ISD::SETUGT: 6377 case ISD::SETGT: 6378 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6379 case ISD::SETOLE: 6380 case ISD::SETLE: 6381 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6382 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6383 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6384 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 6385 } 6386 6387 SDValue Cmp; 6388 switch (CC) { 6389 default: break; // SETUO etc aren't handled by fsel. 6390 case ISD::SETNE: 6391 std::swap(TV, FV); 6392 case ISD::SETEQ: 6393 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6394 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6395 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6396 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6397 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6398 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6399 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6400 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 6401 case ISD::SETULT: 6402 case ISD::SETLT: 6403 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6404 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6405 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6406 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6407 case ISD::SETOGE: 6408 case ISD::SETGE: 6409 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6410 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6411 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6412 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6413 case ISD::SETUGT: 6414 case ISD::SETGT: 6415 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); 6416 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6417 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6418 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6419 case ISD::SETOLE: 6420 case ISD::SETLE: 6421 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); 6422 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6423 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6424 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6425 } 6426 return Op; 6427 } 6428 6429 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 6430 SelectionDAG &DAG, 6431 const SDLoc &dl) const { 6432 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 6433 SDValue Src = Op.getOperand(0); 6434 if (Src.getValueType() == MVT::f32) 6435 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 6436 6437 SDValue Tmp; 6438 switch (Op.getSimpleValueType().SimpleTy) { 6439 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 6440 case MVT::i32: 6441 Tmp = DAG.getNode( 6442 Op.getOpcode() == ISD::FP_TO_SINT 6443 ? PPCISD::FCTIWZ 6444 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 6445 dl, MVT::f64, Src); 6446 break; 6447 case MVT::i64: 6448 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 6449 "i64 FP_TO_UINT is supported only with FPCVT"); 6450 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 6451 PPCISD::FCTIDUZ, 6452 dl, MVT::f64, Src); 6453 break; 6454 } 6455 6456 // Convert the FP value to an int value through memory. 6457 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 6458 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 6459 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 6460 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 6461 MachinePointerInfo MPI = 6462 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); 6463 6464 // Emit a store to the stack slot. 6465 SDValue Chain; 6466 if (i32Stack) { 6467 MachineFunction &MF = DAG.getMachineFunction(); 6468 MachineMemOperand *MMO = 6469 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 6470 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 6471 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 6472 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 6473 } else 6474 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); 6475 6476 // Result is a load from the stack slot. If loading 4 bytes, make sure to 6477 // add in a bias on big endian. 6478 if (Op.getValueType() == MVT::i32 && !i32Stack) { 6479 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 6480 DAG.getConstant(4, dl, FIPtr.getValueType())); 6481 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); 6482 } 6483 6484 RLI.Chain = Chain; 6485 RLI.Ptr = FIPtr; 6486 RLI.MPI = MPI; 6487 } 6488 6489 /// \brief Custom lowers floating point to integer conversions to use 6490 /// the direct move instructions available in ISA 2.07 to avoid the 6491 /// need for load/store combinations. 6492 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, 6493 SelectionDAG &DAG, 6494 const SDLoc &dl) const { 6495 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 6496 SDValue Src = Op.getOperand(0); 6497 6498 if (Src.getValueType() == MVT::f32) 6499 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 6500 6501 SDValue Tmp; 6502 switch (Op.getSimpleValueType().SimpleTy) { 6503 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 6504 case MVT::i32: 6505 Tmp = DAG.getNode( 6506 Op.getOpcode() == ISD::FP_TO_SINT 6507 ? PPCISD::FCTIWZ 6508 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 6509 dl, MVT::f64, Src); 6510 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); 6511 break; 6512 case MVT::i64: 6513 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 6514 "i64 FP_TO_UINT is supported only with FPCVT"); 6515 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 6516 PPCISD::FCTIDUZ, 6517 dl, MVT::f64, Src); 6518 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); 6519 break; 6520 } 6521 return Tmp; 6522 } 6523 6524 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 6525 const SDLoc &dl) const { 6526 if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) 6527 return LowerFP_TO_INTDirectMove(Op, DAG, dl); 6528 6529 ReuseLoadInfo RLI; 6530 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 6531 6532 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, 6533 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); 6534 } 6535 6536 // We're trying to insert a regular store, S, and then a load, L. If the 6537 // incoming value, O, is a load, we might just be able to have our load use the 6538 // address used by O. However, we don't know if anything else will store to 6539 // that address before we can load from it. To prevent this situation, we need 6540 // to insert our load, L, into the chain as a peer of O. To do this, we give L 6541 // the same chain operand as O, we create a token factor from the chain results 6542 // of O and L, and we replace all uses of O's chain result with that token 6543 // factor (see spliceIntoChain below for this last part). 6544 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, 6545 ReuseLoadInfo &RLI, 6546 SelectionDAG &DAG, 6547 ISD::LoadExtType ET) const { 6548 SDLoc dl(Op); 6549 if (ET == ISD::NON_EXTLOAD && 6550 (Op.getOpcode() == ISD::FP_TO_UINT || 6551 Op.getOpcode() == ISD::FP_TO_SINT) && 6552 isOperationLegalOrCustom(Op.getOpcode(), 6553 Op.getOperand(0).getValueType())) { 6554 6555 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 6556 return true; 6557 } 6558 6559 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); 6560 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || 6561 LD->isNonTemporal()) 6562 return false; 6563 if (LD->getMemoryVT() != MemVT) 6564 return false; 6565 6566 RLI.Ptr = LD->getBasePtr(); 6567 if (LD->isIndexed() && !LD->getOffset().isUndef()) { 6568 assert(LD->getAddressingMode() == ISD::PRE_INC && 6569 "Non-pre-inc AM on PPC?"); 6570 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, 6571 LD->getOffset()); 6572 } 6573 6574 RLI.Chain = LD->getChain(); 6575 RLI.MPI = LD->getPointerInfo(); 6576 RLI.IsDereferenceable = LD->isDereferenceable(); 6577 RLI.IsInvariant = LD->isInvariant(); 6578 RLI.Alignment = LD->getAlignment(); 6579 RLI.AAInfo = LD->getAAInfo(); 6580 RLI.Ranges = LD->getRanges(); 6581 6582 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); 6583 return true; 6584 } 6585 6586 // Given the head of the old chain, ResChain, insert a token factor containing 6587 // it and NewResChain, and make users of ResChain now be users of that token 6588 // factor. 6589 void PPCTargetLowering::spliceIntoChain(SDValue ResChain, 6590 SDValue NewResChain, 6591 SelectionDAG &DAG) const { 6592 if (!ResChain) 6593 return; 6594 6595 SDLoc dl(NewResChain); 6596 6597 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 6598 NewResChain, DAG.getUNDEF(MVT::Other)); 6599 assert(TF.getNode() != NewResChain.getNode() && 6600 "A new TF really is required here"); 6601 6602 DAG.ReplaceAllUsesOfValueWith(ResChain, TF); 6603 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); 6604 } 6605 6606 /// \brief Analyze profitability of direct move 6607 /// prefer float load to int load plus direct move 6608 /// when there is no integer use of int load 6609 static bool directMoveIsProfitable(const SDValue &Op) { 6610 SDNode *Origin = Op.getOperand(0).getNode(); 6611 if (Origin->getOpcode() != ISD::LOAD) 6612 return true; 6613 6614 for (SDNode::use_iterator UI = Origin->use_begin(), 6615 UE = Origin->use_end(); 6616 UI != UE; ++UI) { 6617 6618 // Only look at the users of the loaded value. 6619 if (UI.getUse().get().getResNo() != 0) 6620 continue; 6621 6622 if (UI->getOpcode() != ISD::SINT_TO_FP && 6623 UI->getOpcode() != ISD::UINT_TO_FP) 6624 return true; 6625 } 6626 6627 return false; 6628 } 6629 6630 /// \brief Custom lowers integer to floating point conversions to use 6631 /// the direct move instructions available in ISA 2.07 to avoid the 6632 /// need for load/store combinations. 6633 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, 6634 SelectionDAG &DAG, 6635 const SDLoc &dl) const { 6636 assert((Op.getValueType() == MVT::f32 || 6637 Op.getValueType() == MVT::f64) && 6638 "Invalid floating point type as target of conversion"); 6639 assert(Subtarget.hasFPCVT() && 6640 "Int to FP conversions with direct moves require FPCVT"); 6641 SDValue FP; 6642 SDValue Src = Op.getOperand(0); 6643 bool SinglePrec = Op.getValueType() == MVT::f32; 6644 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; 6645 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; 6646 unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : 6647 (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); 6648 6649 if (WordInt) { 6650 FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, 6651 dl, MVT::f64, Src); 6652 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 6653 } 6654 else { 6655 FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); 6656 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 6657 } 6658 6659 return FP; 6660 } 6661 6662 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 6663 SelectionDAG &DAG) const { 6664 SDLoc dl(Op); 6665 6666 if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { 6667 if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) 6668 return SDValue(); 6669 6670 SDValue Value = Op.getOperand(0); 6671 // The values are now known to be -1 (false) or 1 (true). To convert this 6672 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 6673 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 6674 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 6675 6676 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 6677 6678 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 6679 6680 if (Op.getValueType() != MVT::v4f64) 6681 Value = DAG.getNode(ISD::FP_ROUND, dl, 6682 Op.getValueType(), Value, 6683 DAG.getIntPtrConstant(1, dl)); 6684 return Value; 6685 } 6686 6687 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 6688 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 6689 return SDValue(); 6690 6691 if (Op.getOperand(0).getValueType() == MVT::i1) 6692 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 6693 DAG.getConstantFP(1.0, dl, Op.getValueType()), 6694 DAG.getConstantFP(0.0, dl, Op.getValueType())); 6695 6696 // If we have direct moves, we can do all the conversion, skip the store/load 6697 // however, without FPCVT we can't do most conversions. 6698 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && 6699 Subtarget.isPPC64() && Subtarget.hasFPCVT()) 6700 return LowerINT_TO_FPDirectMove(Op, DAG, dl); 6701 6702 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 6703 "UINT_TO_FP is supported only with FPCVT"); 6704 6705 // If we have FCFIDS, then use it when converting to single-precision. 6706 // Otherwise, convert to double-precision and then round. 6707 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6708 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 6709 : PPCISD::FCFIDS) 6710 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 6711 : PPCISD::FCFID); 6712 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6713 ? MVT::f32 6714 : MVT::f64; 6715 6716 if (Op.getOperand(0).getValueType() == MVT::i64) { 6717 SDValue SINT = Op.getOperand(0); 6718 // When converting to single-precision, we actually need to convert 6719 // to double-precision first and then round to single-precision. 6720 // To avoid double-rounding effects during that operation, we have 6721 // to prepare the input operand. Bits that might be truncated when 6722 // converting to double-precision are replaced by a bit that won't 6723 // be lost at this stage, but is below the single-precision rounding 6724 // position. 6725 // 6726 // However, if -enable-unsafe-fp-math is in effect, accept double 6727 // rounding to avoid the extra overhead. 6728 if (Op.getValueType() == MVT::f32 && 6729 !Subtarget.hasFPCVT() && 6730 !DAG.getTarget().Options.UnsafeFPMath) { 6731 6732 // Twiddle input to make sure the low 11 bits are zero. (If this 6733 // is the case, we are guaranteed the value will fit into the 53 bit 6734 // mantissa of an IEEE double-precision value without rounding.) 6735 // If any of those low 11 bits were not zero originally, make sure 6736 // bit 12 (value 2048) is set instead, so that the final rounding 6737 // to single-precision gets the correct result. 6738 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6739 SINT, DAG.getConstant(2047, dl, MVT::i64)); 6740 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 6741 Round, DAG.getConstant(2047, dl, MVT::i64)); 6742 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 6743 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6744 Round, DAG.getConstant(-2048, dl, MVT::i64)); 6745 6746 // However, we cannot use that value unconditionally: if the magnitude 6747 // of the input value is small, the bit-twiddling we did above might 6748 // end up visibly changing the output. Fortunately, in that case, we 6749 // don't need to twiddle bits since the original input will convert 6750 // exactly to double-precision floating-point already. Therefore, 6751 // construct a conditional to use the original value if the top 11 6752 // bits are all sign-bit copies, and use the rounded value computed 6753 // above otherwise. 6754 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 6755 SINT, DAG.getConstant(53, dl, MVT::i32)); 6756 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 6757 Cond, DAG.getConstant(1, dl, MVT::i64)); 6758 Cond = DAG.getSetCC(dl, MVT::i32, 6759 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); 6760 6761 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 6762 } 6763 6764 ReuseLoadInfo RLI; 6765 SDValue Bits; 6766 6767 MachineFunction &MF = DAG.getMachineFunction(); 6768 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { 6769 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, 6770 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); 6771 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6772 } else if (Subtarget.hasLFIWAX() && 6773 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { 6774 MachineMemOperand *MMO = 6775 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6776 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6777 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6778 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, 6779 DAG.getVTList(MVT::f64, MVT::Other), 6780 Ops, MVT::i32, MMO); 6781 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6782 } else if (Subtarget.hasFPCVT() && 6783 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { 6784 MachineMemOperand *MMO = 6785 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6786 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6787 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6788 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, 6789 DAG.getVTList(MVT::f64, MVT::Other), 6790 Ops, MVT::i32, MMO); 6791 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6792 } else if (((Subtarget.hasLFIWAX() && 6793 SINT.getOpcode() == ISD::SIGN_EXTEND) || 6794 (Subtarget.hasFPCVT() && 6795 SINT.getOpcode() == ISD::ZERO_EXTEND)) && 6796 SINT.getOperand(0).getValueType() == MVT::i32) { 6797 MachineFrameInfo &MFI = MF.getFrameInfo(); 6798 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6799 6800 int FrameIdx = MFI.CreateStackObject(4, 4, false); 6801 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6802 6803 SDValue Store = 6804 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, 6805 MachinePointerInfo::getFixedStack( 6806 DAG.getMachineFunction(), FrameIdx)); 6807 6808 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6809 "Expected an i32 store"); 6810 6811 RLI.Ptr = FIdx; 6812 RLI.Chain = Store; 6813 RLI.MPI = 6814 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 6815 RLI.Alignment = 4; 6816 6817 MachineMemOperand *MMO = 6818 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6819 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6820 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6821 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? 6822 PPCISD::LFIWZX : PPCISD::LFIWAX, 6823 dl, DAG.getVTList(MVT::f64, MVT::Other), 6824 Ops, MVT::i32, MMO); 6825 } else 6826 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 6827 6828 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 6829 6830 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6831 FP = DAG.getNode(ISD::FP_ROUND, dl, 6832 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 6833 return FP; 6834 } 6835 6836 assert(Op.getOperand(0).getValueType() == MVT::i32 && 6837 "Unhandled INT_TO_FP type in custom expander!"); 6838 // Since we only generate this in 64-bit mode, we can take advantage of 6839 // 64-bit registers. In particular, sign extend the input value into the 6840 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 6841 // then lfd it and fcfid it. 6842 MachineFunction &MF = DAG.getMachineFunction(); 6843 MachineFrameInfo &MFI = MF.getFrameInfo(); 6844 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6845 6846 SDValue Ld; 6847 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 6848 ReuseLoadInfo RLI; 6849 bool ReusingLoad; 6850 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, 6851 DAG))) { 6852 int FrameIdx = MFI.CreateStackObject(4, 4, false); 6853 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6854 6855 SDValue Store = 6856 DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 6857 MachinePointerInfo::getFixedStack( 6858 DAG.getMachineFunction(), FrameIdx)); 6859 6860 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6861 "Expected an i32 store"); 6862 6863 RLI.Ptr = FIdx; 6864 RLI.Chain = Store; 6865 RLI.MPI = 6866 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 6867 RLI.Alignment = 4; 6868 } 6869 6870 MachineMemOperand *MMO = 6871 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6872 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6873 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6874 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 6875 PPCISD::LFIWZX : PPCISD::LFIWAX, 6876 dl, DAG.getVTList(MVT::f64, MVT::Other), 6877 Ops, MVT::i32, MMO); 6878 if (ReusingLoad) 6879 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); 6880 } else { 6881 assert(Subtarget.isPPC64() && 6882 "i32->FP without LFIWAX supported only on PPC64"); 6883 6884 int FrameIdx = MFI.CreateStackObject(8, 8, false); 6885 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6886 6887 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 6888 Op.getOperand(0)); 6889 6890 // STD the extended value into the stack slot. 6891 SDValue Store = DAG.getStore( 6892 DAG.getEntryNode(), dl, Ext64, FIdx, 6893 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 6894 6895 // Load the value as a double. 6896 Ld = DAG.getLoad( 6897 MVT::f64, dl, Store, FIdx, 6898 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 6899 } 6900 6901 // FCFID it and return it. 6902 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 6903 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6904 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, 6905 DAG.getIntPtrConstant(0, dl)); 6906 return FP; 6907 } 6908 6909 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 6910 SelectionDAG &DAG) const { 6911 SDLoc dl(Op); 6912 /* 6913 The rounding mode is in bits 30:31 of FPSR, and has the following 6914 settings: 6915 00 Round to nearest 6916 01 Round to 0 6917 10 Round to +inf 6918 11 Round to -inf 6919 6920 FLT_ROUNDS, on the other hand, expects the following: 6921 -1 Undefined 6922 0 Round to 0 6923 1 Round to nearest 6924 2 Round to +inf 6925 3 Round to -inf 6926 6927 To perform the conversion, we do: 6928 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 6929 */ 6930 6931 MachineFunction &MF = DAG.getMachineFunction(); 6932 EVT VT = Op.getValueType(); 6933 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6934 6935 // Save FP Control Word to register 6936 EVT NodeTys[] = { 6937 MVT::f64, // return register 6938 MVT::Glue // unused in this context 6939 }; 6940 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 6941 6942 // Save FP register to stack slot 6943 int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false); 6944 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 6945 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, 6946 MachinePointerInfo()); 6947 6948 // Load FP Control Word from low 32 bits of stack slot. 6949 SDValue Four = DAG.getConstant(4, dl, PtrVT); 6950 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 6951 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); 6952 6953 // Transform as necessary 6954 SDValue CWD1 = 6955 DAG.getNode(ISD::AND, dl, MVT::i32, 6956 CWD, DAG.getConstant(3, dl, MVT::i32)); 6957 SDValue CWD2 = 6958 DAG.getNode(ISD::SRL, dl, MVT::i32, 6959 DAG.getNode(ISD::AND, dl, MVT::i32, 6960 DAG.getNode(ISD::XOR, dl, MVT::i32, 6961 CWD, DAG.getConstant(3, dl, MVT::i32)), 6962 DAG.getConstant(3, dl, MVT::i32)), 6963 DAG.getConstant(1, dl, MVT::i32)); 6964 6965 SDValue RetVal = 6966 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 6967 6968 return DAG.getNode((VT.getSizeInBits() < 16 ? 6969 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 6970 } 6971 6972 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 6973 EVT VT = Op.getValueType(); 6974 unsigned BitWidth = VT.getSizeInBits(); 6975 SDLoc dl(Op); 6976 assert(Op.getNumOperands() == 3 && 6977 VT == Op.getOperand(1).getValueType() && 6978 "Unexpected SHL!"); 6979 6980 // Expand into a bunch of logical ops. Note that these ops 6981 // depend on the PPC behavior for oversized shift amounts. 6982 SDValue Lo = Op.getOperand(0); 6983 SDValue Hi = Op.getOperand(1); 6984 SDValue Amt = Op.getOperand(2); 6985 EVT AmtVT = Amt.getValueType(); 6986 6987 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6988 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 6989 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 6990 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 6991 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 6992 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6993 DAG.getConstant(-BitWidth, dl, AmtVT)); 6994 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 6995 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 6996 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 6997 SDValue OutOps[] = { OutLo, OutHi }; 6998 return DAG.getMergeValues(OutOps, dl); 6999 } 7000 7001 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 7002 EVT VT = Op.getValueType(); 7003 SDLoc dl(Op); 7004 unsigned BitWidth = VT.getSizeInBits(); 7005 assert(Op.getNumOperands() == 3 && 7006 VT == Op.getOperand(1).getValueType() && 7007 "Unexpected SRL!"); 7008 7009 // Expand into a bunch of logical ops. Note that these ops 7010 // depend on the PPC behavior for oversized shift amounts. 7011 SDValue Lo = Op.getOperand(0); 7012 SDValue Hi = Op.getOperand(1); 7013 SDValue Amt = Op.getOperand(2); 7014 EVT AmtVT = Amt.getValueType(); 7015 7016 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 7017 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 7018 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 7019 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 7020 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 7021 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 7022 DAG.getConstant(-BitWidth, dl, AmtVT)); 7023 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 7024 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 7025 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 7026 SDValue OutOps[] = { OutLo, OutHi }; 7027 return DAG.getMergeValues(OutOps, dl); 7028 } 7029 7030 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 7031 SDLoc dl(Op); 7032 EVT VT = Op.getValueType(); 7033 unsigned BitWidth = VT.getSizeInBits(); 7034 assert(Op.getNumOperands() == 3 && 7035 VT == Op.getOperand(1).getValueType() && 7036 "Unexpected SRA!"); 7037 7038 // Expand into a bunch of logical ops, followed by a select_cc. 7039 SDValue Lo = Op.getOperand(0); 7040 SDValue Hi = Op.getOperand(1); 7041 SDValue Amt = Op.getOperand(2); 7042 EVT AmtVT = Amt.getValueType(); 7043 7044 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 7045 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 7046 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 7047 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 7048 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 7049 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 7050 DAG.getConstant(-BitWidth, dl, AmtVT)); 7051 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 7052 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 7053 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), 7054 Tmp4, Tmp6, ISD::SETLE); 7055 SDValue OutOps[] = { OutLo, OutHi }; 7056 return DAG.getMergeValues(OutOps, dl); 7057 } 7058 7059 //===----------------------------------------------------------------------===// 7060 // Vector related lowering. 7061 // 7062 7063 /// BuildSplatI - Build a canonical splati of Val with an element size of 7064 /// SplatSize. Cast the result to VT. 7065 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 7066 SelectionDAG &DAG, const SDLoc &dl) { 7067 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 7068 7069 static const MVT VTys[] = { // canonical VT to use for each size. 7070 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 7071 }; 7072 7073 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 7074 7075 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 7076 if (Val == -1) 7077 SplatSize = 1; 7078 7079 EVT CanonicalVT = VTys[SplatSize-1]; 7080 7081 // Build a canonical splat for this value. 7082 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); 7083 } 7084 7085 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the 7086 /// specified intrinsic ID. 7087 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, 7088 const SDLoc &dl, EVT DestVT = MVT::Other) { 7089 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 7090 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7091 DAG.getConstant(IID, dl, MVT::i32), Op); 7092 } 7093 7094 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 7095 /// specified intrinsic ID. 7096 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 7097 SelectionDAG &DAG, const SDLoc &dl, 7098 EVT DestVT = MVT::Other) { 7099 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 7100 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7101 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); 7102 } 7103 7104 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 7105 /// specified intrinsic ID. 7106 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 7107 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, 7108 EVT DestVT = MVT::Other) { 7109 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 7110 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7111 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); 7112 } 7113 7114 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 7115 /// amount. The result has the specified value type. 7116 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, 7117 SelectionDAG &DAG, const SDLoc &dl) { 7118 // Force LHS/RHS to be the right type. 7119 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 7120 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 7121 7122 int Ops[16]; 7123 for (unsigned i = 0; i != 16; ++i) 7124 Ops[i] = i + Amt; 7125 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 7126 return DAG.getNode(ISD::BITCAST, dl, VT, T); 7127 } 7128 7129 /// Do we have an efficient pattern in a .td file for this node? 7130 /// 7131 /// \param V - pointer to the BuildVectorSDNode being matched 7132 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves? 7133 /// 7134 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR 7135 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where 7136 /// the opposite is true (expansion is beneficial) are: 7137 /// - The node builds a vector out of integers that are not 32 or 64-bits 7138 /// - The node builds a vector out of constants 7139 /// - The node is a "load-and-splat" 7140 /// In all other cases, we will choose to keep the BUILD_VECTOR. 7141 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, 7142 bool HasDirectMove) { 7143 EVT VecVT = V->getValueType(0); 7144 bool RightType = VecVT == MVT::v2f64 || VecVT == MVT::v4f32 || 7145 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32)); 7146 if (!RightType) 7147 return false; 7148 7149 bool IsSplat = true; 7150 bool IsLoad = false; 7151 SDValue Op0 = V->getOperand(0); 7152 7153 // This function is called in a block that confirms the node is not a constant 7154 // splat. So a constant BUILD_VECTOR here means the vector is built out of 7155 // different constants. 7156 if (V->isConstant()) 7157 return false; 7158 for (int i = 0, e = V->getNumOperands(); i < e; ++i) { 7159 if (V->getOperand(i).isUndef()) 7160 return false; 7161 // We want to expand nodes that represent load-and-splat even if the 7162 // loaded value is a floating point truncation or conversion to int. 7163 if (V->getOperand(i).getOpcode() == ISD::LOAD || 7164 (V->getOperand(i).getOpcode() == ISD::FP_ROUND && 7165 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || 7166 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT && 7167 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || 7168 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT && 7169 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD)) 7170 IsLoad = true; 7171 // If the operands are different or the input is not a load and has more 7172 // uses than just this BV node, then it isn't a splat. 7173 if (V->getOperand(i) != Op0 || 7174 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode()))) 7175 IsSplat = false; 7176 } 7177 return !(IsSplat && IsLoad); 7178 } 7179 7180 // If this is a case we can't handle, return null and let the default 7181 // expansion code take care of it. If we CAN select this case, and if it 7182 // selects to a single instruction, return Op. Otherwise, if we can codegen 7183 // this case more efficiently than a constant pool load, lower it to the 7184 // sequence of ops that should be used. 7185 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 7186 SelectionDAG &DAG) const { 7187 SDLoc dl(Op); 7188 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 7189 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 7190 7191 if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { 7192 // We first build an i32 vector, load it into a QPX register, 7193 // then convert it to a floating-point vector and compare it 7194 // to a zero vector to get the boolean result. 7195 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 7196 int FrameIdx = MFI.CreateStackObject(16, 16, false); 7197 MachinePointerInfo PtrInfo = 7198 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7199 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7200 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7201 7202 assert(BVN->getNumOperands() == 4 && 7203 "BUILD_VECTOR for v4i1 does not have 4 operands"); 7204 7205 bool IsConst = true; 7206 for (unsigned i = 0; i < 4; ++i) { 7207 if (BVN->getOperand(i).isUndef()) continue; 7208 if (!isa<ConstantSDNode>(BVN->getOperand(i))) { 7209 IsConst = false; 7210 break; 7211 } 7212 } 7213 7214 if (IsConst) { 7215 Constant *One = 7216 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); 7217 Constant *NegOne = 7218 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); 7219 7220 Constant *CV[4]; 7221 for (unsigned i = 0; i < 4; ++i) { 7222 if (BVN->getOperand(i).isUndef()) 7223 CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); 7224 else if (isNullConstant(BVN->getOperand(i))) 7225 CV[i] = NegOne; 7226 else 7227 CV[i] = One; 7228 } 7229 7230 Constant *CP = ConstantVector::get(CV); 7231 SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), 7232 16 /* alignment */); 7233 7234 SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; 7235 SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other}); 7236 return DAG.getMemIntrinsicNode( 7237 PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, 7238 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 7239 } 7240 7241 SmallVector<SDValue, 4> Stores; 7242 for (unsigned i = 0; i < 4; ++i) { 7243 if (BVN->getOperand(i).isUndef()) continue; 7244 7245 unsigned Offset = 4*i; 7246 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 7247 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 7248 7249 unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); 7250 if (StoreSize > 4) { 7251 Stores.push_back( 7252 DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx, 7253 PtrInfo.getWithOffset(Offset), MVT::i32)); 7254 } else { 7255 SDValue StoreValue = BVN->getOperand(i); 7256 if (StoreSize < 4) 7257 StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); 7258 7259 Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx, 7260 PtrInfo.getWithOffset(Offset))); 7261 } 7262 } 7263 7264 SDValue StoreChain; 7265 if (!Stores.empty()) 7266 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 7267 else 7268 StoreChain = DAG.getEntryNode(); 7269 7270 // Now load from v4i32 into the QPX register; this will extend it to 7271 // v4i64 but not yet convert it to a floating point. Nevertheless, this 7272 // is typed as v4f64 because the QPX register integer states are not 7273 // explicitly represented. 7274 7275 SDValue Ops[] = {StoreChain, 7276 DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32), 7277 FIdx}; 7278 SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other}); 7279 7280 SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, 7281 dl, VTs, Ops, MVT::v4i32, PtrInfo); 7282 LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 7283 DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), 7284 LoadedVect); 7285 7286 SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); 7287 7288 return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); 7289 } 7290 7291 // All other QPX vectors are handled by generic code. 7292 if (Subtarget.hasQPX()) 7293 return SDValue(); 7294 7295 // Check if this is a splat of a constant value. 7296 APInt APSplatBits, APSplatUndef; 7297 unsigned SplatBitSize; 7298 bool HasAnyUndefs; 7299 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 7300 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || 7301 SplatBitSize > 32) { 7302 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be 7303 // lowered to VSX instructions under certain conditions. 7304 // Without VSX, there is no pattern more efficient than expanding the node. 7305 if (Subtarget.hasVSX() && 7306 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove())) 7307 return Op; 7308 return SDValue(); 7309 } 7310 7311 unsigned SplatBits = APSplatBits.getZExtValue(); 7312 unsigned SplatUndef = APSplatUndef.getZExtValue(); 7313 unsigned SplatSize = SplatBitSize / 8; 7314 7315 // First, handle single instruction cases. 7316 7317 // All zeros? 7318 if (SplatBits == 0) { 7319 // Canonicalize all zero vectors to be v4i32. 7320 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 7321 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); 7322 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 7323 } 7324 return Op; 7325 } 7326 7327 // We have XXSPLTIB for constant splats one byte wide 7328 if (Subtarget.hasP9Vector() && SplatSize == 1) { 7329 // This is a splat of 1-byte elements with some elements potentially undef. 7330 // Rather than trying to match undef in the SDAG patterns, ensure that all 7331 // elements are the same constant. 7332 if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) { 7333 SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits, 7334 dl, MVT::i32)); 7335 SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops); 7336 if (Op.getValueType() != MVT::v16i8) 7337 return DAG.getBitcast(Op.getValueType(), NewBV); 7338 return NewBV; 7339 } 7340 return Op; 7341 } 7342 7343 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 7344 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 7345 (32-SplatBitSize)); 7346 if (SextVal >= -16 && SextVal <= 15) 7347 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 7348 7349 // Two instruction sequences. 7350 7351 // If this value is in the range [-32,30] and is even, use: 7352 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 7353 // If this value is in the range [17,31] and is odd, use: 7354 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 7355 // If this value is in the range [-31,-17] and is odd, use: 7356 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 7357 // Note the last two are three-instruction sequences. 7358 if (SextVal >= -32 && SextVal <= 31) { 7359 // To avoid having these optimizations undone by constant folding, 7360 // we convert to a pseudo that will be expanded later into one of 7361 // the above forms. 7362 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); 7363 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 7364 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 7365 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); 7366 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 7367 if (VT == Op.getValueType()) 7368 return RetVal; 7369 else 7370 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 7371 } 7372 7373 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 7374 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 7375 // for fneg/fabs. 7376 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 7377 // Make -1 and vspltisw -1: 7378 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 7379 7380 // Make the VSLW intrinsic, computing 0x8000_0000. 7381 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 7382 OnesV, DAG, dl); 7383 7384 // xor by OnesV to invert it. 7385 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 7386 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7387 } 7388 7389 // Check to see if this is a wide variety of vsplti*, binop self cases. 7390 static const signed char SplatCsts[] = { 7391 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 7392 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 7393 }; 7394 7395 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 7396 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 7397 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 7398 int i = SplatCsts[idx]; 7399 7400 // Figure out what shift amount will be used by altivec if shifted by i in 7401 // this splat size. 7402 unsigned TypeShiftAmt = i & (SplatBitSize-1); 7403 7404 // vsplti + shl self. 7405 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 7406 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7407 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7408 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 7409 Intrinsic::ppc_altivec_vslw 7410 }; 7411 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7412 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7413 } 7414 7415 // vsplti + srl self. 7416 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 7417 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7418 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7419 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 7420 Intrinsic::ppc_altivec_vsrw 7421 }; 7422 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7423 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7424 } 7425 7426 // vsplti + sra self. 7427 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 7428 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7429 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7430 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 7431 Intrinsic::ppc_altivec_vsraw 7432 }; 7433 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7434 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7435 } 7436 7437 // vsplti + rol self. 7438 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 7439 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 7440 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7441 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7442 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 7443 Intrinsic::ppc_altivec_vrlw 7444 }; 7445 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7446 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7447 } 7448 7449 // t = vsplti c, result = vsldoi t, t, 1 7450 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 7451 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7452 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; 7453 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7454 } 7455 // t = vsplti c, result = vsldoi t, t, 2 7456 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 7457 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7458 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; 7459 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7460 } 7461 // t = vsplti c, result = vsldoi t, t, 3 7462 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 7463 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7464 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; 7465 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7466 } 7467 } 7468 7469 return SDValue(); 7470 } 7471 7472 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 7473 /// the specified operations to build the shuffle. 7474 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 7475 SDValue RHS, SelectionDAG &DAG, 7476 const SDLoc &dl) { 7477 unsigned OpNum = (PFEntry >> 26) & 0x0F; 7478 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 7479 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 7480 7481 enum { 7482 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 7483 OP_VMRGHW, 7484 OP_VMRGLW, 7485 OP_VSPLTISW0, 7486 OP_VSPLTISW1, 7487 OP_VSPLTISW2, 7488 OP_VSPLTISW3, 7489 OP_VSLDOI4, 7490 OP_VSLDOI8, 7491 OP_VSLDOI12 7492 }; 7493 7494 if (OpNum == OP_COPY) { 7495 if (LHSID == (1*9+2)*9+3) return LHS; 7496 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 7497 return RHS; 7498 } 7499 7500 SDValue OpLHS, OpRHS; 7501 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 7502 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 7503 7504 int ShufIdxs[16]; 7505 switch (OpNum) { 7506 default: llvm_unreachable("Unknown i32 permute!"); 7507 case OP_VMRGHW: 7508 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 7509 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 7510 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 7511 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 7512 break; 7513 case OP_VMRGLW: 7514 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 7515 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 7516 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 7517 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 7518 break; 7519 case OP_VSPLTISW0: 7520 for (unsigned i = 0; i != 16; ++i) 7521 ShufIdxs[i] = (i&3)+0; 7522 break; 7523 case OP_VSPLTISW1: 7524 for (unsigned i = 0; i != 16; ++i) 7525 ShufIdxs[i] = (i&3)+4; 7526 break; 7527 case OP_VSPLTISW2: 7528 for (unsigned i = 0; i != 16; ++i) 7529 ShufIdxs[i] = (i&3)+8; 7530 break; 7531 case OP_VSPLTISW3: 7532 for (unsigned i = 0; i != 16; ++i) 7533 ShufIdxs[i] = (i&3)+12; 7534 break; 7535 case OP_VSLDOI4: 7536 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 7537 case OP_VSLDOI8: 7538 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 7539 case OP_VSLDOI12: 7540 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 7541 } 7542 EVT VT = OpLHS.getValueType(); 7543 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 7544 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 7545 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 7546 return DAG.getNode(ISD::BITCAST, dl, VT, T); 7547 } 7548 7549 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 7550 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 7551 /// return the code it can be lowered into. Worst case, it can always be 7552 /// lowered into a vperm. 7553 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 7554 SelectionDAG &DAG) const { 7555 SDLoc dl(Op); 7556 SDValue V1 = Op.getOperand(0); 7557 SDValue V2 = Op.getOperand(1); 7558 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 7559 EVT VT = Op.getValueType(); 7560 bool isLittleEndian = Subtarget.isLittleEndian(); 7561 7562 unsigned ShiftElts, InsertAtByte; 7563 bool Swap; 7564 if (Subtarget.hasP9Vector() && 7565 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, 7566 isLittleEndian)) { 7567 if (Swap) 7568 std::swap(V1, V2); 7569 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 7570 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); 7571 if (ShiftElts) { 7572 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, 7573 DAG.getConstant(ShiftElts, dl, MVT::i32)); 7574 SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl, 7575 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 7576 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 7577 } 7578 SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2, 7579 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 7580 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 7581 } 7582 7583 if (Subtarget.hasVSX()) { 7584 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { 7585 int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); 7586 7587 // If the source for the shuffle is a scalar_to_vector that came from a 7588 // 32-bit load, it will have used LXVWSX so we don't need to splat again. 7589 if (Subtarget.hasP9Vector() && 7590 ((isLittleEndian && SplatIdx == 3) || 7591 (!isLittleEndian && SplatIdx == 0))) { 7592 SDValue Src = V1.getOperand(0); 7593 if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR && 7594 Src.getOperand(0).getOpcode() == ISD::LOAD && 7595 Src.getOperand(0).hasOneUse()) 7596 return V1; 7597 } 7598 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 7599 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, 7600 DAG.getConstant(SplatIdx, dl, MVT::i32)); 7601 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); 7602 } 7603 7604 // Left shifts of 8 bytes are actually swaps. Convert accordingly. 7605 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { 7606 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); 7607 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); 7608 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); 7609 } 7610 7611 } 7612 7613 if (Subtarget.hasQPX()) { 7614 if (VT.getVectorNumElements() != 4) 7615 return SDValue(); 7616 7617 if (V2.isUndef()) V2 = V1; 7618 7619 int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); 7620 if (AlignIdx != -1) { 7621 return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, 7622 DAG.getConstant(AlignIdx, dl, MVT::i32)); 7623 } else if (SVOp->isSplat()) { 7624 int SplatIdx = SVOp->getSplatIndex(); 7625 if (SplatIdx >= 4) { 7626 std::swap(V1, V2); 7627 SplatIdx -= 4; 7628 } 7629 7630 return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, 7631 DAG.getConstant(SplatIdx, dl, MVT::i32)); 7632 } 7633 7634 // Lower this into a qvgpci/qvfperm pair. 7635 7636 // Compute the qvgpci literal 7637 unsigned idx = 0; 7638 for (unsigned i = 0; i < 4; ++i) { 7639 int m = SVOp->getMaskElt(i); 7640 unsigned mm = m >= 0 ? (unsigned) m : i; 7641 idx |= mm << (3-i)*3; 7642 } 7643 7644 SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, 7645 DAG.getConstant(idx, dl, MVT::i32)); 7646 return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); 7647 } 7648 7649 // Cases that are handled by instructions that take permute immediates 7650 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 7651 // selected by the instruction selector. 7652 if (V2.isUndef()) { 7653 if (PPC::isSplatShuffleMask(SVOp, 1) || 7654 PPC::isSplatShuffleMask(SVOp, 2) || 7655 PPC::isSplatShuffleMask(SVOp, 4) || 7656 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 7657 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 7658 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 7659 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 7660 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 7661 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 7662 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 7663 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 7664 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || 7665 (Subtarget.hasP8Altivec() && ( 7666 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || 7667 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || 7668 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { 7669 return Op; 7670 } 7671 } 7672 7673 // Altivec has a variety of "shuffle immediates" that take two vector inputs 7674 // and produce a fixed permutation. If any of these match, do not lower to 7675 // VPERM. 7676 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 7677 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 7678 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 7679 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 7680 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 7681 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 7682 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 7683 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 7684 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 7685 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || 7686 (Subtarget.hasP8Altivec() && ( 7687 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || 7688 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || 7689 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) 7690 return Op; 7691 7692 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 7693 // perfect shuffle table to emit an optimal matching sequence. 7694 ArrayRef<int> PermMask = SVOp->getMask(); 7695 7696 unsigned PFIndexes[4]; 7697 bool isFourElementShuffle = true; 7698 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 7699 unsigned EltNo = 8; // Start out undef. 7700 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 7701 if (PermMask[i*4+j] < 0) 7702 continue; // Undef, ignore it. 7703 7704 unsigned ByteSource = PermMask[i*4+j]; 7705 if ((ByteSource & 3) != j) { 7706 isFourElementShuffle = false; 7707 break; 7708 } 7709 7710 if (EltNo == 8) { 7711 EltNo = ByteSource/4; 7712 } else if (EltNo != ByteSource/4) { 7713 isFourElementShuffle = false; 7714 break; 7715 } 7716 } 7717 PFIndexes[i] = EltNo; 7718 } 7719 7720 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 7721 // perfect shuffle vector to determine if it is cost effective to do this as 7722 // discrete instructions, or whether we should use a vperm. 7723 // For now, we skip this for little endian until such time as we have a 7724 // little-endian perfect shuffle table. 7725 if (isFourElementShuffle && !isLittleEndian) { 7726 // Compute the index in the perfect shuffle table. 7727 unsigned PFTableIndex = 7728 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 7729 7730 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 7731 unsigned Cost = (PFEntry >> 30); 7732 7733 // Determining when to avoid vperm is tricky. Many things affect the cost 7734 // of vperm, particularly how many times the perm mask needs to be computed. 7735 // For example, if the perm mask can be hoisted out of a loop or is already 7736 // used (perhaps because there are multiple permutes with the same shuffle 7737 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 7738 // the loop requires an extra register. 7739 // 7740 // As a compromise, we only emit discrete instructions if the shuffle can be 7741 // generated in 3 or fewer operations. When we have loop information 7742 // available, if this block is within a loop, we should avoid using vperm 7743 // for 3-operation perms and use a constant pool load instead. 7744 if (Cost < 3) 7745 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 7746 } 7747 7748 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 7749 // vector that will get spilled to the constant pool. 7750 if (V2.isUndef()) V2 = V1; 7751 7752 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 7753 // that it is in input element units, not in bytes. Convert now. 7754 7755 // For little endian, the order of the input vectors is reversed, and 7756 // the permutation mask is complemented with respect to 31. This is 7757 // necessary to produce proper semantics with the big-endian-biased vperm 7758 // instruction. 7759 EVT EltVT = V1.getValueType().getVectorElementType(); 7760 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 7761 7762 SmallVector<SDValue, 16> ResultMask; 7763 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 7764 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 7765 7766 for (unsigned j = 0; j != BytesPerElement; ++j) 7767 if (isLittleEndian) 7768 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), 7769 dl, MVT::i32)); 7770 else 7771 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, 7772 MVT::i32)); 7773 } 7774 7775 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); 7776 if (isLittleEndian) 7777 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7778 V2, V1, VPermMask); 7779 else 7780 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7781 V1, V2, VPermMask); 7782 } 7783 7784 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a 7785 /// vector comparison. If it is, return true and fill in Opc/isDot with 7786 /// information about the intrinsic. 7787 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, 7788 bool &isDot, const PPCSubtarget &Subtarget) { 7789 unsigned IntrinsicID = 7790 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 7791 CompareOpc = -1; 7792 isDot = false; 7793 switch (IntrinsicID) { 7794 default: return false; 7795 // Comparison predicates. 7796 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 7797 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 7798 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 7799 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 7800 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 7801 case Intrinsic::ppc_altivec_vcmpequd_p: 7802 if (Subtarget.hasP8Altivec()) { 7803 CompareOpc = 199; 7804 isDot = 1; 7805 } else 7806 return false; 7807 7808 break; 7809 case Intrinsic::ppc_altivec_vcmpneb_p: 7810 case Intrinsic::ppc_altivec_vcmpneh_p: 7811 case Intrinsic::ppc_altivec_vcmpnew_p: 7812 case Intrinsic::ppc_altivec_vcmpnezb_p: 7813 case Intrinsic::ppc_altivec_vcmpnezh_p: 7814 case Intrinsic::ppc_altivec_vcmpnezw_p: 7815 if (Subtarget.hasP9Altivec()) { 7816 switch(IntrinsicID) { 7817 default: llvm_unreachable("Unknown comparison intrinsic."); 7818 case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break; 7819 case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break; 7820 case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break; 7821 case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break; 7822 case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break; 7823 case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break; 7824 } 7825 isDot = 1; 7826 } else 7827 return false; 7828 7829 break; 7830 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 7831 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 7832 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 7833 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 7834 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 7835 case Intrinsic::ppc_altivec_vcmpgtsd_p: 7836 if (Subtarget.hasP8Altivec()) { 7837 CompareOpc = 967; 7838 isDot = 1; 7839 } else 7840 return false; 7841 7842 break; 7843 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 7844 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 7845 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 7846 case Intrinsic::ppc_altivec_vcmpgtud_p: 7847 if (Subtarget.hasP8Altivec()) { 7848 CompareOpc = 711; 7849 isDot = 1; 7850 } else 7851 return false; 7852 7853 break; 7854 // VSX predicate comparisons use the same infrastructure 7855 case Intrinsic::ppc_vsx_xvcmpeqdp_p: 7856 case Intrinsic::ppc_vsx_xvcmpgedp_p: 7857 case Intrinsic::ppc_vsx_xvcmpgtdp_p: 7858 case Intrinsic::ppc_vsx_xvcmpeqsp_p: 7859 case Intrinsic::ppc_vsx_xvcmpgesp_p: 7860 case Intrinsic::ppc_vsx_xvcmpgtsp_p: 7861 if (Subtarget.hasVSX()) { 7862 switch (IntrinsicID) { 7863 case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; 7864 case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; 7865 case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; 7866 case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; 7867 case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; 7868 case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; 7869 } 7870 isDot = 1; 7871 } 7872 else 7873 return false; 7874 7875 break; 7876 7877 // Normal Comparisons. 7878 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 7879 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 7880 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 7881 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 7882 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 7883 case Intrinsic::ppc_altivec_vcmpequd: 7884 if (Subtarget.hasP8Altivec()) { 7885 CompareOpc = 199; 7886 isDot = 0; 7887 } else 7888 return false; 7889 7890 break; 7891 case Intrinsic::ppc_altivec_vcmpneb: 7892 case Intrinsic::ppc_altivec_vcmpneh: 7893 case Intrinsic::ppc_altivec_vcmpnew: 7894 case Intrinsic::ppc_altivec_vcmpnezb: 7895 case Intrinsic::ppc_altivec_vcmpnezh: 7896 case Intrinsic::ppc_altivec_vcmpnezw: 7897 if (Subtarget.hasP9Altivec()) { 7898 switch (IntrinsicID) { 7899 default: llvm_unreachable("Unknown comparison intrinsic."); 7900 case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break; 7901 case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break; 7902 case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break; 7903 case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break; 7904 case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break; 7905 case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break; 7906 } 7907 isDot = 0; 7908 } else 7909 return false; 7910 break; 7911 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 7912 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 7913 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 7914 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 7915 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 7916 case Intrinsic::ppc_altivec_vcmpgtsd: 7917 if (Subtarget.hasP8Altivec()) { 7918 CompareOpc = 967; 7919 isDot = 0; 7920 } else 7921 return false; 7922 7923 break; 7924 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 7925 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 7926 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 7927 case Intrinsic::ppc_altivec_vcmpgtud: 7928 if (Subtarget.hasP8Altivec()) { 7929 CompareOpc = 711; 7930 isDot = 0; 7931 } else 7932 return false; 7933 7934 break; 7935 } 7936 return true; 7937 } 7938 7939 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 7940 /// lower, do it, otherwise return null. 7941 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 7942 SelectionDAG &DAG) const { 7943 unsigned IntrinsicID = 7944 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7945 7946 if (IntrinsicID == Intrinsic::thread_pointer) { 7947 // Reads the thread pointer register, used for __builtin_thread_pointer. 7948 bool is64bit = Subtarget.isPPC64(); 7949 return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 7950 is64bit ? MVT::i64 : MVT::i32); 7951 } 7952 7953 // If this is a lowered altivec predicate compare, CompareOpc is set to the 7954 // opcode number of the comparison. 7955 SDLoc dl(Op); 7956 int CompareOpc; 7957 bool isDot; 7958 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) 7959 return SDValue(); // Don't custom lower most intrinsics. 7960 7961 // If this is a non-dot comparison, make the VCMP node and we are done. 7962 if (!isDot) { 7963 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 7964 Op.getOperand(1), Op.getOperand(2), 7965 DAG.getConstant(CompareOpc, dl, MVT::i32)); 7966 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 7967 } 7968 7969 // Create the PPCISD altivec 'dot' comparison node. 7970 SDValue Ops[] = { 7971 Op.getOperand(2), // LHS 7972 Op.getOperand(3), // RHS 7973 DAG.getConstant(CompareOpc, dl, MVT::i32) 7974 }; 7975 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 7976 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 7977 7978 // Now that we have the comparison, emit a copy from the CR to a GPR. 7979 // This is flagged to the above dot comparison. 7980 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 7981 DAG.getRegister(PPC::CR6, MVT::i32), 7982 CompNode.getValue(1)); 7983 7984 // Unpack the result based on how the target uses it. 7985 unsigned BitNo; // Bit # of CR6. 7986 bool InvertBit; // Invert result? 7987 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 7988 default: // Can't happen, don't crash on invalid number though. 7989 case 0: // Return the value of the EQ bit of CR6. 7990 BitNo = 0; InvertBit = false; 7991 break; 7992 case 1: // Return the inverted value of the EQ bit of CR6. 7993 BitNo = 0; InvertBit = true; 7994 break; 7995 case 2: // Return the value of the LT bit of CR6. 7996 BitNo = 2; InvertBit = false; 7997 break; 7998 case 3: // Return the inverted value of the LT bit of CR6. 7999 BitNo = 2; InvertBit = true; 8000 break; 8001 } 8002 8003 // Shift the bit into the low position. 8004 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 8005 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); 8006 // Isolate the bit. 8007 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 8008 DAG.getConstant(1, dl, MVT::i32)); 8009 8010 // If we are supposed to, toggle the bit. 8011 if (InvertBit) 8012 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 8013 DAG.getConstant(1, dl, MVT::i32)); 8014 return Flags; 8015 } 8016 8017 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 8018 SelectionDAG &DAG) const { 8019 SDLoc dl(Op); 8020 // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int 8021 // instructions), but for smaller types, we need to first extend up to v2i32 8022 // before doing going farther. 8023 if (Op.getValueType() == MVT::v2i64) { 8024 EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 8025 if (ExtVT != MVT::v2i32) { 8026 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); 8027 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, 8028 DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), 8029 ExtVT.getVectorElementType(), 4))); 8030 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); 8031 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, 8032 DAG.getValueType(MVT::v2i32)); 8033 } 8034 8035 return Op; 8036 } 8037 8038 return SDValue(); 8039 } 8040 8041 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 8042 SelectionDAG &DAG) const { 8043 SDLoc dl(Op); 8044 // Create a stack slot that is 16-byte aligned. 8045 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 8046 int FrameIdx = MFI.CreateStackObject(16, 16, false); 8047 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8048 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 8049 8050 // Store the input value into Value#0 of the stack slot. 8051 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 8052 MachinePointerInfo()); 8053 // Load it out. 8054 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); 8055 } 8056 8057 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 8058 SelectionDAG &DAG) const { 8059 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && 8060 "Should only be called for ISD::INSERT_VECTOR_ELT"); 8061 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 8062 // We have legal lowering for constant indices but not for variable ones. 8063 if (C) 8064 return Op; 8065 return SDValue(); 8066 } 8067 8068 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 8069 SelectionDAG &DAG) const { 8070 SDLoc dl(Op); 8071 SDNode *N = Op.getNode(); 8072 8073 assert(N->getOperand(0).getValueType() == MVT::v4i1 && 8074 "Unknown extract_vector_elt type"); 8075 8076 SDValue Value = N->getOperand(0); 8077 8078 // The first part of this is like the store lowering except that we don't 8079 // need to track the chain. 8080 8081 // The values are now known to be -1 (false) or 1 (true). To convert this 8082 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 8083 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 8084 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 8085 8086 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 8087 // understand how to form the extending load. 8088 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 8089 8090 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 8091 8092 // Now convert to an integer and store. 8093 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 8094 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 8095 Value); 8096 8097 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 8098 int FrameIdx = MFI.CreateStackObject(16, 16, false); 8099 MachinePointerInfo PtrInfo = 8100 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 8101 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8102 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 8103 8104 SDValue StoreChain = DAG.getEntryNode(); 8105 SDValue Ops[] = {StoreChain, 8106 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 8107 Value, FIdx}; 8108 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 8109 8110 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 8111 dl, VTs, Ops, MVT::v4i32, PtrInfo); 8112 8113 // Extract the value requested. 8114 unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 8115 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 8116 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 8117 8118 SDValue IntVal = 8119 DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); 8120 8121 if (!Subtarget.useCRBits()) 8122 return IntVal; 8123 8124 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); 8125 } 8126 8127 /// Lowering for QPX v4i1 loads 8128 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, 8129 SelectionDAG &DAG) const { 8130 SDLoc dl(Op); 8131 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); 8132 SDValue LoadChain = LN->getChain(); 8133 SDValue BasePtr = LN->getBasePtr(); 8134 8135 if (Op.getValueType() == MVT::v4f64 || 8136 Op.getValueType() == MVT::v4f32) { 8137 EVT MemVT = LN->getMemoryVT(); 8138 unsigned Alignment = LN->getAlignment(); 8139 8140 // If this load is properly aligned, then it is legal. 8141 if (Alignment >= MemVT.getStoreSize()) 8142 return Op; 8143 8144 EVT ScalarVT = Op.getValueType().getScalarType(), 8145 ScalarMemVT = MemVT.getScalarType(); 8146 unsigned Stride = ScalarMemVT.getStoreSize(); 8147 8148 SDValue Vals[4], LoadChains[4]; 8149 for (unsigned Idx = 0; Idx < 4; ++Idx) { 8150 SDValue Load; 8151 if (ScalarVT != ScalarMemVT) 8152 Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, 8153 BasePtr, 8154 LN->getPointerInfo().getWithOffset(Idx * Stride), 8155 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 8156 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 8157 else 8158 Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, 8159 LN->getPointerInfo().getWithOffset(Idx * Stride), 8160 MinAlign(Alignment, Idx * Stride), 8161 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 8162 8163 if (Idx == 0 && LN->isIndexed()) { 8164 assert(LN->getAddressingMode() == ISD::PRE_INC && 8165 "Unknown addressing mode on vector load"); 8166 Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), 8167 LN->getAddressingMode()); 8168 } 8169 8170 Vals[Idx] = Load; 8171 LoadChains[Idx] = Load.getValue(1); 8172 8173 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 8174 DAG.getConstant(Stride, dl, 8175 BasePtr.getValueType())); 8176 } 8177 8178 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 8179 SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals); 8180 8181 if (LN->isIndexed()) { 8182 SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; 8183 return DAG.getMergeValues(RetOps, dl); 8184 } 8185 8186 SDValue RetOps[] = { Value, TF }; 8187 return DAG.getMergeValues(RetOps, dl); 8188 } 8189 8190 assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); 8191 assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); 8192 8193 // To lower v4i1 from a byte array, we load the byte elements of the 8194 // vector and then reuse the BUILD_VECTOR logic. 8195 8196 SDValue VectElmts[4], VectElmtChains[4]; 8197 for (unsigned i = 0; i < 4; ++i) { 8198 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 8199 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 8200 8201 VectElmts[i] = DAG.getExtLoad( 8202 ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx, 8203 LN->getPointerInfo().getWithOffset(i), MVT::i8, 8204 /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo()); 8205 VectElmtChains[i] = VectElmts[i].getValue(1); 8206 } 8207 8208 LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); 8209 SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts); 8210 8211 SDValue RVals[] = { Value, LoadChain }; 8212 return DAG.getMergeValues(RVals, dl); 8213 } 8214 8215 /// Lowering for QPX v4i1 stores 8216 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, 8217 SelectionDAG &DAG) const { 8218 SDLoc dl(Op); 8219 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode()); 8220 SDValue StoreChain = SN->getChain(); 8221 SDValue BasePtr = SN->getBasePtr(); 8222 SDValue Value = SN->getValue(); 8223 8224 if (Value.getValueType() == MVT::v4f64 || 8225 Value.getValueType() == MVT::v4f32) { 8226 EVT MemVT = SN->getMemoryVT(); 8227 unsigned Alignment = SN->getAlignment(); 8228 8229 // If this store is properly aligned, then it is legal. 8230 if (Alignment >= MemVT.getStoreSize()) 8231 return Op; 8232 8233 EVT ScalarVT = Value.getValueType().getScalarType(), 8234 ScalarMemVT = MemVT.getScalarType(); 8235 unsigned Stride = ScalarMemVT.getStoreSize(); 8236 8237 SDValue Stores[4]; 8238 for (unsigned Idx = 0; Idx < 4; ++Idx) { 8239 SDValue Ex = DAG.getNode( 8240 ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, 8241 DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); 8242 SDValue Store; 8243 if (ScalarVT != ScalarMemVT) 8244 Store = 8245 DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, 8246 SN->getPointerInfo().getWithOffset(Idx * Stride), 8247 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 8248 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 8249 else 8250 Store = DAG.getStore(StoreChain, dl, Ex, BasePtr, 8251 SN->getPointerInfo().getWithOffset(Idx * Stride), 8252 MinAlign(Alignment, Idx * Stride), 8253 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 8254 8255 if (Idx == 0 && SN->isIndexed()) { 8256 assert(SN->getAddressingMode() == ISD::PRE_INC && 8257 "Unknown addressing mode on vector store"); 8258 Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), 8259 SN->getAddressingMode()); 8260 } 8261 8262 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 8263 DAG.getConstant(Stride, dl, 8264 BasePtr.getValueType())); 8265 Stores[Idx] = Store; 8266 } 8267 8268 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 8269 8270 if (SN->isIndexed()) { 8271 SDValue RetOps[] = { TF, Stores[0].getValue(1) }; 8272 return DAG.getMergeValues(RetOps, dl); 8273 } 8274 8275 return TF; 8276 } 8277 8278 assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); 8279 assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); 8280 8281 // The values are now known to be -1 (false) or 1 (true). To convert this 8282 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 8283 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 8284 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 8285 8286 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 8287 // understand how to form the extending load. 8288 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 8289 8290 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 8291 8292 // Now convert to an integer and store. 8293 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 8294 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 8295 Value); 8296 8297 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 8298 int FrameIdx = MFI.CreateStackObject(16, 16, false); 8299 MachinePointerInfo PtrInfo = 8300 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 8301 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8302 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 8303 8304 SDValue Ops[] = {StoreChain, 8305 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 8306 Value, FIdx}; 8307 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 8308 8309 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 8310 dl, VTs, Ops, MVT::v4i32, PtrInfo); 8311 8312 // Move data into the byte array. 8313 SDValue Loads[4], LoadChains[4]; 8314 for (unsigned i = 0; i < 4; ++i) { 8315 unsigned Offset = 4*i; 8316 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 8317 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 8318 8319 Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, 8320 PtrInfo.getWithOffset(Offset)); 8321 LoadChains[i] = Loads[i].getValue(1); 8322 } 8323 8324 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 8325 8326 SDValue Stores[4]; 8327 for (unsigned i = 0; i < 4; ++i) { 8328 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 8329 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 8330 8331 Stores[i] = DAG.getTruncStore( 8332 StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), 8333 MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(), 8334 SN->getAAInfo()); 8335 } 8336 8337 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 8338 8339 return StoreChain; 8340 } 8341 8342 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 8343 SDLoc dl(Op); 8344 if (Op.getValueType() == MVT::v4i32) { 8345 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8346 8347 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 8348 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 8349 8350 SDValue RHSSwap = // = vrlw RHS, 16 8351 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 8352 8353 // Shrinkify inputs to v8i16. 8354 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 8355 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 8356 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 8357 8358 // Low parts multiplied together, generating 32-bit results (we ignore the 8359 // top parts). 8360 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 8361 LHS, RHS, DAG, dl, MVT::v4i32); 8362 8363 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 8364 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 8365 // Shift the high parts up 16 bits. 8366 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 8367 Neg16, DAG, dl); 8368 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 8369 } else if (Op.getValueType() == MVT::v8i16) { 8370 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8371 8372 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 8373 8374 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 8375 LHS, RHS, Zero, DAG, dl); 8376 } else if (Op.getValueType() == MVT::v16i8) { 8377 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8378 bool isLittleEndian = Subtarget.isLittleEndian(); 8379 8380 // Multiply the even 8-bit parts, producing 16-bit sums. 8381 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 8382 LHS, RHS, DAG, dl, MVT::v8i16); 8383 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 8384 8385 // Multiply the odd 8-bit parts, producing 16-bit sums. 8386 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 8387 LHS, RHS, DAG, dl, MVT::v8i16); 8388 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 8389 8390 // Merge the results together. Because vmuleub and vmuloub are 8391 // instructions with a big-endian bias, we must reverse the 8392 // element numbering and reverse the meaning of "odd" and "even" 8393 // when generating little endian code. 8394 int Ops[16]; 8395 for (unsigned i = 0; i != 8; ++i) { 8396 if (isLittleEndian) { 8397 Ops[i*2 ] = 2*i; 8398 Ops[i*2+1] = 2*i+16; 8399 } else { 8400 Ops[i*2 ] = 2*i+1; 8401 Ops[i*2+1] = 2*i+1+16; 8402 } 8403 } 8404 if (isLittleEndian) 8405 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 8406 else 8407 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 8408 } else { 8409 llvm_unreachable("Unknown mul to lower!"); 8410 } 8411 } 8412 8413 /// LowerOperation - Provide custom lowering hooks for some operations. 8414 /// 8415 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 8416 switch (Op.getOpcode()) { 8417 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 8418 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 8419 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 8420 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 8421 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 8422 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 8423 case ISD::SETCC: return LowerSETCC(Op, DAG); 8424 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 8425 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 8426 case ISD::VASTART: 8427 return LowerVASTART(Op, DAG); 8428 8429 case ISD::VAARG: 8430 return LowerVAARG(Op, DAG); 8431 8432 case ISD::VACOPY: 8433 return LowerVACOPY(Op, DAG); 8434 8435 case ISD::STACKRESTORE: 8436 return LowerSTACKRESTORE(Op, DAG); 8437 8438 case ISD::DYNAMIC_STACKALLOC: 8439 return LowerDYNAMIC_STACKALLOC(Op, DAG); 8440 8441 case ISD::GET_DYNAMIC_AREA_OFFSET: 8442 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); 8443 8444 case ISD::EH_DWARF_CFA: 8445 return LowerEH_DWARF_CFA(Op, DAG); 8446 8447 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 8448 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 8449 8450 case ISD::LOAD: return LowerLOAD(Op, DAG); 8451 case ISD::STORE: return LowerSTORE(Op, DAG); 8452 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 8453 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 8454 case ISD::FP_TO_UINT: 8455 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 8456 SDLoc(Op)); 8457 case ISD::UINT_TO_FP: 8458 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 8459 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 8460 8461 // Lower 64-bit shifts. 8462 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 8463 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 8464 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 8465 8466 // Vector-related lowering. 8467 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 8468 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 8469 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 8470 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 8471 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 8472 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 8473 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 8474 case ISD::MUL: return LowerMUL(Op, DAG); 8475 8476 // For counter-based loop handling. 8477 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 8478 8479 // Frame & Return address. 8480 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 8481 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 8482 } 8483 } 8484 8485 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 8486 SmallVectorImpl<SDValue>&Results, 8487 SelectionDAG &DAG) const { 8488 SDLoc dl(N); 8489 switch (N->getOpcode()) { 8490 default: 8491 llvm_unreachable("Do not know how to custom type legalize this operation!"); 8492 case ISD::READCYCLECOUNTER: { 8493 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 8494 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); 8495 8496 Results.push_back(RTB); 8497 Results.push_back(RTB.getValue(1)); 8498 Results.push_back(RTB.getValue(2)); 8499 break; 8500 } 8501 case ISD::INTRINSIC_W_CHAIN: { 8502 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 8503 Intrinsic::ppc_is_decremented_ctr_nonzero) 8504 break; 8505 8506 assert(N->getValueType(0) == MVT::i1 && 8507 "Unexpected result type for CTR decrement intrinsic"); 8508 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), 8509 N->getValueType(0)); 8510 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 8511 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 8512 N->getOperand(1)); 8513 8514 Results.push_back(NewInt); 8515 Results.push_back(NewInt.getValue(1)); 8516 break; 8517 } 8518 case ISD::VAARG: { 8519 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) 8520 return; 8521 8522 EVT VT = N->getValueType(0); 8523 8524 if (VT == MVT::i64) { 8525 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); 8526 8527 Results.push_back(NewNode); 8528 Results.push_back(NewNode.getValue(1)); 8529 } 8530 return; 8531 } 8532 case ISD::FP_ROUND_INREG: { 8533 assert(N->getValueType(0) == MVT::ppcf128); 8534 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 8535 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 8536 MVT::f64, N->getOperand(0), 8537 DAG.getIntPtrConstant(0, dl)); 8538 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 8539 MVT::f64, N->getOperand(0), 8540 DAG.getIntPtrConstant(1, dl)); 8541 8542 // Add the two halves of the long double in round-to-zero mode. 8543 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 8544 8545 // We know the low half is about to be thrown away, so just use something 8546 // convenient. 8547 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 8548 FPreg, FPreg)); 8549 return; 8550 } 8551 case ISD::FP_TO_SINT: 8552 case ISD::FP_TO_UINT: 8553 // LowerFP_TO_INT() can only handle f32 and f64. 8554 if (N->getOperand(0).getValueType() == MVT::ppcf128) 8555 return; 8556 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 8557 return; 8558 } 8559 } 8560 8561 //===----------------------------------------------------------------------===// 8562 // Other Lowering Code 8563 //===----------------------------------------------------------------------===// 8564 8565 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { 8566 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 8567 Function *Func = Intrinsic::getDeclaration(M, Id); 8568 return Builder.CreateCall(Func, {}); 8569 } 8570 8571 // The mappings for emitLeading/TrailingFence is taken from 8572 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 8573 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 8574 AtomicOrdering Ord, bool IsStore, 8575 bool IsLoad) const { 8576 if (Ord == AtomicOrdering::SequentiallyConsistent) 8577 return callIntrinsic(Builder, Intrinsic::ppc_sync); 8578 if (isReleaseOrStronger(Ord)) 8579 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 8580 return nullptr; 8581 } 8582 8583 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 8584 AtomicOrdering Ord, bool IsStore, 8585 bool IsLoad) const { 8586 if (IsLoad && isAcquireOrStronger(Ord)) 8587 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 8588 // FIXME: this is too conservative, a dependent branch + isync is enough. 8589 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and 8590 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 8591 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. 8592 return nullptr; 8593 } 8594 8595 MachineBasicBlock * 8596 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, 8597 unsigned AtomicSize, 8598 unsigned BinOpcode, 8599 unsigned CmpOpcode, 8600 unsigned CmpPred) const { 8601 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 8602 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8603 8604 auto LoadMnemonic = PPC::LDARX; 8605 auto StoreMnemonic = PPC::STDCX; 8606 switch (AtomicSize) { 8607 default: 8608 llvm_unreachable("Unexpected size of atomic entity"); 8609 case 1: 8610 LoadMnemonic = PPC::LBARX; 8611 StoreMnemonic = PPC::STBCX; 8612 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 8613 break; 8614 case 2: 8615 LoadMnemonic = PPC::LHARX; 8616 StoreMnemonic = PPC::STHCX; 8617 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 8618 break; 8619 case 4: 8620 LoadMnemonic = PPC::LWARX; 8621 StoreMnemonic = PPC::STWCX; 8622 break; 8623 case 8: 8624 LoadMnemonic = PPC::LDARX; 8625 StoreMnemonic = PPC::STDCX; 8626 break; 8627 } 8628 8629 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8630 MachineFunction *F = BB->getParent(); 8631 MachineFunction::iterator It = ++BB->getIterator(); 8632 8633 unsigned dest = MI.getOperand(0).getReg(); 8634 unsigned ptrA = MI.getOperand(1).getReg(); 8635 unsigned ptrB = MI.getOperand(2).getReg(); 8636 unsigned incr = MI.getOperand(3).getReg(); 8637 DebugLoc dl = MI.getDebugLoc(); 8638 8639 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 8640 MachineBasicBlock *loop2MBB = 8641 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; 8642 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8643 F->insert(It, loopMBB); 8644 if (CmpOpcode) 8645 F->insert(It, loop2MBB); 8646 F->insert(It, exitMBB); 8647 exitMBB->splice(exitMBB->begin(), BB, 8648 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8649 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8650 8651 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8652 unsigned TmpReg = (!BinOpcode) ? incr : 8653 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass 8654 : &PPC::GPRCRegClass); 8655 8656 // thisMBB: 8657 // ... 8658 // fallthrough --> loopMBB 8659 BB->addSuccessor(loopMBB); 8660 8661 // loopMBB: 8662 // l[wd]arx dest, ptr 8663 // add r0, dest, incr 8664 // st[wd]cx. r0, ptr 8665 // bne- loopMBB 8666 // fallthrough --> exitMBB 8667 8668 // For max/min... 8669 // loopMBB: 8670 // l[wd]arx dest, ptr 8671 // cmpl?[wd] incr, dest 8672 // bgt exitMBB 8673 // loop2MBB: 8674 // st[wd]cx. dest, ptr 8675 // bne- loopMBB 8676 // fallthrough --> exitMBB 8677 8678 BB = loopMBB; 8679 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 8680 .addReg(ptrA).addReg(ptrB); 8681 if (BinOpcode) 8682 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 8683 if (CmpOpcode) { 8684 // Signed comparisons of byte or halfword values must be sign-extended. 8685 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { 8686 unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 8687 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), 8688 ExtReg).addReg(dest); 8689 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 8690 .addReg(incr).addReg(ExtReg); 8691 } else 8692 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 8693 .addReg(incr).addReg(dest); 8694 8695 BuildMI(BB, dl, TII->get(PPC::BCC)) 8696 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); 8697 BB->addSuccessor(loop2MBB); 8698 BB->addSuccessor(exitMBB); 8699 BB = loop2MBB; 8700 } 8701 BuildMI(BB, dl, TII->get(StoreMnemonic)) 8702 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 8703 BuildMI(BB, dl, TII->get(PPC::BCC)) 8704 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 8705 BB->addSuccessor(loopMBB); 8706 BB->addSuccessor(exitMBB); 8707 8708 // exitMBB: 8709 // ... 8710 BB = exitMBB; 8711 return BB; 8712 } 8713 8714 MachineBasicBlock * 8715 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, 8716 MachineBasicBlock *BB, 8717 bool is8bit, // operation 8718 unsigned BinOpcode, 8719 unsigned CmpOpcode, 8720 unsigned CmpPred) const { 8721 // If we support part-word atomic mnemonics, just use them 8722 if (Subtarget.hasPartwordAtomics()) 8723 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, 8724 CmpOpcode, CmpPred); 8725 8726 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 8727 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8728 // In 64 bit mode we have to use 64 bits for addresses, even though the 8729 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 8730 // registers without caring whether they're 32 or 64, but here we're 8731 // doing actual arithmetic on the addresses. 8732 bool is64bit = Subtarget.isPPC64(); 8733 bool isLittleEndian = Subtarget.isLittleEndian(); 8734 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 8735 8736 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8737 MachineFunction *F = BB->getParent(); 8738 MachineFunction::iterator It = ++BB->getIterator(); 8739 8740 unsigned dest = MI.getOperand(0).getReg(); 8741 unsigned ptrA = MI.getOperand(1).getReg(); 8742 unsigned ptrB = MI.getOperand(2).getReg(); 8743 unsigned incr = MI.getOperand(3).getReg(); 8744 DebugLoc dl = MI.getDebugLoc(); 8745 8746 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 8747 MachineBasicBlock *loop2MBB = 8748 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; 8749 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8750 F->insert(It, loopMBB); 8751 if (CmpOpcode) 8752 F->insert(It, loop2MBB); 8753 F->insert(It, exitMBB); 8754 exitMBB->splice(exitMBB->begin(), BB, 8755 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8756 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8757 8758 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8759 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 8760 : &PPC::GPRCRegClass; 8761 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 8762 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 8763 unsigned ShiftReg = 8764 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); 8765 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 8766 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 8767 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 8768 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 8769 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 8770 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 8771 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 8772 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 8773 unsigned Ptr1Reg; 8774 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 8775 8776 // thisMBB: 8777 // ... 8778 // fallthrough --> loopMBB 8779 BB->addSuccessor(loopMBB); 8780 8781 // The 4-byte load must be aligned, while a char or short may be 8782 // anywhere in the word. Hence all this nasty bookkeeping code. 8783 // add ptr1, ptrA, ptrB [copy if ptrA==0] 8784 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 8785 // xori shift, shift1, 24 [16] 8786 // rlwinm ptr, ptr1, 0, 0, 29 8787 // slw incr2, incr, shift 8788 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 8789 // slw mask, mask2, shift 8790 // loopMBB: 8791 // lwarx tmpDest, ptr 8792 // add tmp, tmpDest, incr2 8793 // andc tmp2, tmpDest, mask 8794 // and tmp3, tmp, mask 8795 // or tmp4, tmp3, tmp2 8796 // stwcx. tmp4, ptr 8797 // bne- loopMBB 8798 // fallthrough --> exitMBB 8799 // srw dest, tmpDest, shift 8800 if (ptrA != ZeroReg) { 8801 Ptr1Reg = RegInfo.createVirtualRegister(RC); 8802 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 8803 .addReg(ptrA).addReg(ptrB); 8804 } else { 8805 Ptr1Reg = ptrB; 8806 } 8807 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 8808 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 8809 if (!isLittleEndian) 8810 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 8811 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 8812 if (is64bit) 8813 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 8814 .addReg(Ptr1Reg).addImm(0).addImm(61); 8815 else 8816 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 8817 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 8818 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 8819 .addReg(incr).addReg(ShiftReg); 8820 if (is8bit) 8821 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 8822 else { 8823 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 8824 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 8825 } 8826 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 8827 .addReg(Mask2Reg).addReg(ShiftReg); 8828 8829 BB = loopMBB; 8830 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 8831 .addReg(ZeroReg).addReg(PtrReg); 8832 if (BinOpcode) 8833 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 8834 .addReg(Incr2Reg).addReg(TmpDestReg); 8835 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 8836 .addReg(TmpDestReg).addReg(MaskReg); 8837 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 8838 .addReg(TmpReg).addReg(MaskReg); 8839 if (CmpOpcode) { 8840 // For unsigned comparisons, we can directly compare the shifted values. 8841 // For signed comparisons we shift and sign extend. 8842 unsigned SReg = RegInfo.createVirtualRegister(RC); 8843 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg) 8844 .addReg(TmpDestReg).addReg(MaskReg); 8845 unsigned ValueReg = SReg; 8846 unsigned CmpReg = Incr2Reg; 8847 if (CmpOpcode == PPC::CMPW) { 8848 ValueReg = RegInfo.createVirtualRegister(RC); 8849 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) 8850 .addReg(SReg).addReg(ShiftReg); 8851 unsigned ValueSReg = RegInfo.createVirtualRegister(RC); 8852 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) 8853 .addReg(ValueReg); 8854 ValueReg = ValueSReg; 8855 CmpReg = incr; 8856 } 8857 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 8858 .addReg(CmpReg).addReg(ValueReg); 8859 BuildMI(BB, dl, TII->get(PPC::BCC)) 8860 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); 8861 BB->addSuccessor(loop2MBB); 8862 BB->addSuccessor(exitMBB); 8863 BB = loop2MBB; 8864 } 8865 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 8866 .addReg(Tmp3Reg).addReg(Tmp2Reg); 8867 BuildMI(BB, dl, TII->get(PPC::STWCX)) 8868 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 8869 BuildMI(BB, dl, TII->get(PPC::BCC)) 8870 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 8871 BB->addSuccessor(loopMBB); 8872 BB->addSuccessor(exitMBB); 8873 8874 // exitMBB: 8875 // ... 8876 BB = exitMBB; 8877 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 8878 .addReg(ShiftReg); 8879 return BB; 8880 } 8881 8882 llvm::MachineBasicBlock * 8883 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, 8884 MachineBasicBlock *MBB) const { 8885 DebugLoc DL = MI.getDebugLoc(); 8886 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8887 8888 MachineFunction *MF = MBB->getParent(); 8889 MachineRegisterInfo &MRI = MF->getRegInfo(); 8890 8891 const BasicBlock *BB = MBB->getBasicBlock(); 8892 MachineFunction::iterator I = ++MBB->getIterator(); 8893 8894 // Memory Reference 8895 MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); 8896 MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); 8897 8898 unsigned DstReg = MI.getOperand(0).getReg(); 8899 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 8900 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 8901 unsigned mainDstReg = MRI.createVirtualRegister(RC); 8902 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 8903 8904 MVT PVT = getPointerTy(MF->getDataLayout()); 8905 assert((PVT == MVT::i64 || PVT == MVT::i32) && 8906 "Invalid Pointer Size!"); 8907 // For v = setjmp(buf), we generate 8908 // 8909 // thisMBB: 8910 // SjLjSetup mainMBB 8911 // bl mainMBB 8912 // v_restore = 1 8913 // b sinkMBB 8914 // 8915 // mainMBB: 8916 // buf[LabelOffset] = LR 8917 // v_main = 0 8918 // 8919 // sinkMBB: 8920 // v = phi(main, restore) 8921 // 8922 8923 MachineBasicBlock *thisMBB = MBB; 8924 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 8925 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 8926 MF->insert(I, mainMBB); 8927 MF->insert(I, sinkMBB); 8928 8929 MachineInstrBuilder MIB; 8930 8931 // Transfer the remainder of BB and its successor edges to sinkMBB. 8932 sinkMBB->splice(sinkMBB->begin(), MBB, 8933 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 8934 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 8935 8936 // Note that the structure of the jmp_buf used here is not compatible 8937 // with that used by libc, and is not designed to be. Specifically, it 8938 // stores only those 'reserved' registers that LLVM does not otherwise 8939 // understand how to spill. Also, by convention, by the time this 8940 // intrinsic is called, Clang has already stored the frame address in the 8941 // first slot of the buffer and stack address in the third. Following the 8942 // X86 target code, we'll store the jump address in the second slot. We also 8943 // need to save the TOC pointer (R2) to handle jumps between shared 8944 // libraries, and that will be stored in the fourth slot. The thread 8945 // identifier (R13) is not affected. 8946 8947 // thisMBB: 8948 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 8949 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 8950 const int64_t BPOffset = 4 * PVT.getStoreSize(); 8951 8952 // Prepare IP either in reg. 8953 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 8954 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 8955 unsigned BufReg = MI.getOperand(1).getReg(); 8956 8957 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 8958 setUsesTOCBasePtr(*MBB->getParent()); 8959 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 8960 .addReg(PPC::X2) 8961 .addImm(TOCOffset) 8962 .addReg(BufReg); 8963 MIB.setMemRefs(MMOBegin, MMOEnd); 8964 } 8965 8966 // Naked functions never have a base pointer, and so we use r1. For all 8967 // other functions, this decision must be delayed until during PEI. 8968 unsigned BaseReg; 8969 if (MF->getFunction()->hasFnAttribute(Attribute::Naked)) 8970 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 8971 else 8972 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 8973 8974 MIB = BuildMI(*thisMBB, MI, DL, 8975 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 8976 .addReg(BaseReg) 8977 .addImm(BPOffset) 8978 .addReg(BufReg); 8979 MIB.setMemRefs(MMOBegin, MMOEnd); 8980 8981 // Setup 8982 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 8983 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 8984 MIB.addRegMask(TRI->getNoPreservedMask()); 8985 8986 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 8987 8988 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 8989 .addMBB(mainMBB); 8990 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 8991 8992 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); 8993 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); 8994 8995 // mainMBB: 8996 // mainDstReg = 0 8997 MIB = 8998 BuildMI(mainMBB, DL, 8999 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 9000 9001 // Store IP 9002 if (Subtarget.isPPC64()) { 9003 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 9004 .addReg(LabelReg) 9005 .addImm(LabelOffset) 9006 .addReg(BufReg); 9007 } else { 9008 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 9009 .addReg(LabelReg) 9010 .addImm(LabelOffset) 9011 .addReg(BufReg); 9012 } 9013 9014 MIB.setMemRefs(MMOBegin, MMOEnd); 9015 9016 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 9017 mainMBB->addSuccessor(sinkMBB); 9018 9019 // sinkMBB: 9020 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 9021 TII->get(PPC::PHI), DstReg) 9022 .addReg(mainDstReg).addMBB(mainMBB) 9023 .addReg(restoreDstReg).addMBB(thisMBB); 9024 9025 MI.eraseFromParent(); 9026 return sinkMBB; 9027 } 9028 9029 MachineBasicBlock * 9030 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, 9031 MachineBasicBlock *MBB) const { 9032 DebugLoc DL = MI.getDebugLoc(); 9033 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 9034 9035 MachineFunction *MF = MBB->getParent(); 9036 MachineRegisterInfo &MRI = MF->getRegInfo(); 9037 9038 // Memory Reference 9039 MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); 9040 MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); 9041 9042 MVT PVT = getPointerTy(MF->getDataLayout()); 9043 assert((PVT == MVT::i64 || PVT == MVT::i32) && 9044 "Invalid Pointer Size!"); 9045 9046 const TargetRegisterClass *RC = 9047 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 9048 unsigned Tmp = MRI.createVirtualRegister(RC); 9049 // Since FP is only updated here but NOT referenced, it's treated as GPR. 9050 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 9051 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 9052 unsigned BP = 9053 (PVT == MVT::i64) 9054 ? PPC::X30 9055 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29 9056 : PPC::R30); 9057 9058 MachineInstrBuilder MIB; 9059 9060 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 9061 const int64_t SPOffset = 2 * PVT.getStoreSize(); 9062 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 9063 const int64_t BPOffset = 4 * PVT.getStoreSize(); 9064 9065 unsigned BufReg = MI.getOperand(0).getReg(); 9066 9067 // Reload FP (the jumped-to function may not have had a 9068 // frame pointer, and if so, then its r31 will be restored 9069 // as necessary). 9070 if (PVT == MVT::i64) { 9071 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 9072 .addImm(0) 9073 .addReg(BufReg); 9074 } else { 9075 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 9076 .addImm(0) 9077 .addReg(BufReg); 9078 } 9079 MIB.setMemRefs(MMOBegin, MMOEnd); 9080 9081 // Reload IP 9082 if (PVT == MVT::i64) { 9083 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 9084 .addImm(LabelOffset) 9085 .addReg(BufReg); 9086 } else { 9087 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 9088 .addImm(LabelOffset) 9089 .addReg(BufReg); 9090 } 9091 MIB.setMemRefs(MMOBegin, MMOEnd); 9092 9093 // Reload SP 9094 if (PVT == MVT::i64) { 9095 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 9096 .addImm(SPOffset) 9097 .addReg(BufReg); 9098 } else { 9099 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 9100 .addImm(SPOffset) 9101 .addReg(BufReg); 9102 } 9103 MIB.setMemRefs(MMOBegin, MMOEnd); 9104 9105 // Reload BP 9106 if (PVT == MVT::i64) { 9107 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 9108 .addImm(BPOffset) 9109 .addReg(BufReg); 9110 } else { 9111 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 9112 .addImm(BPOffset) 9113 .addReg(BufReg); 9114 } 9115 MIB.setMemRefs(MMOBegin, MMOEnd); 9116 9117 // Reload TOC 9118 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 9119 setUsesTOCBasePtr(*MBB->getParent()); 9120 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 9121 .addImm(TOCOffset) 9122 .addReg(BufReg); 9123 9124 MIB.setMemRefs(MMOBegin, MMOEnd); 9125 } 9126 9127 // Jump 9128 BuildMI(*MBB, MI, DL, 9129 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 9130 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 9131 9132 MI.eraseFromParent(); 9133 return MBB; 9134 } 9135 9136 MachineBasicBlock * 9137 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 9138 MachineBasicBlock *BB) const { 9139 if (MI.getOpcode() == TargetOpcode::STACKMAP || 9140 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 9141 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && 9142 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 9143 // Call lowering should have added an r2 operand to indicate a dependence 9144 // on the TOC base pointer value. It can't however, because there is no 9145 // way to mark the dependence as implicit there, and so the stackmap code 9146 // will confuse it with a regular operand. Instead, add the dependence 9147 // here. 9148 setUsesTOCBasePtr(*BB->getParent()); 9149 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); 9150 } 9151 9152 return emitPatchPoint(MI, BB); 9153 } 9154 9155 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || 9156 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) { 9157 return emitEHSjLjSetJmp(MI, BB); 9158 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || 9159 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) { 9160 return emitEHSjLjLongJmp(MI, BB); 9161 } 9162 9163 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 9164 9165 // To "insert" these instructions we actually have to insert their 9166 // control-flow patterns. 9167 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 9168 MachineFunction::iterator It = ++BB->getIterator(); 9169 9170 MachineFunction *F = BB->getParent(); 9171 9172 if (Subtarget.hasISEL() && 9173 (MI.getOpcode() == PPC::SELECT_CC_I4 || 9174 MI.getOpcode() == PPC::SELECT_CC_I8 || 9175 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) { 9176 SmallVector<MachineOperand, 2> Cond; 9177 if (MI.getOpcode() == PPC::SELECT_CC_I4 || 9178 MI.getOpcode() == PPC::SELECT_CC_I8) 9179 Cond.push_back(MI.getOperand(4)); 9180 else 9181 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 9182 Cond.push_back(MI.getOperand(1)); 9183 9184 DebugLoc dl = MI.getDebugLoc(); 9185 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, 9186 MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); 9187 } else if (MI.getOpcode() == PPC::SELECT_CC_I4 || 9188 MI.getOpcode() == PPC::SELECT_CC_I8 || 9189 MI.getOpcode() == PPC::SELECT_CC_F4 || 9190 MI.getOpcode() == PPC::SELECT_CC_F8 || 9191 MI.getOpcode() == PPC::SELECT_CC_QFRC || 9192 MI.getOpcode() == PPC::SELECT_CC_QSRC || 9193 MI.getOpcode() == PPC::SELECT_CC_QBRC || 9194 MI.getOpcode() == PPC::SELECT_CC_VRRC || 9195 MI.getOpcode() == PPC::SELECT_CC_VSFRC || 9196 MI.getOpcode() == PPC::SELECT_CC_VSSRC || 9197 MI.getOpcode() == PPC::SELECT_CC_VSRC || 9198 MI.getOpcode() == PPC::SELECT_I4 || 9199 MI.getOpcode() == PPC::SELECT_I8 || 9200 MI.getOpcode() == PPC::SELECT_F4 || 9201 MI.getOpcode() == PPC::SELECT_F8 || 9202 MI.getOpcode() == PPC::SELECT_QFRC || 9203 MI.getOpcode() == PPC::SELECT_QSRC || 9204 MI.getOpcode() == PPC::SELECT_QBRC || 9205 MI.getOpcode() == PPC::SELECT_VRRC || 9206 MI.getOpcode() == PPC::SELECT_VSFRC || 9207 MI.getOpcode() == PPC::SELECT_VSSRC || 9208 MI.getOpcode() == PPC::SELECT_VSRC) { 9209 // The incoming instruction knows the destination vreg to set, the 9210 // condition code register to branch on, the true/false values to 9211 // select between, and a branch opcode to use. 9212 9213 // thisMBB: 9214 // ... 9215 // TrueVal = ... 9216 // cmpTY ccX, r1, r2 9217 // bCC copy1MBB 9218 // fallthrough --> copy0MBB 9219 MachineBasicBlock *thisMBB = BB; 9220 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 9221 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 9222 DebugLoc dl = MI.getDebugLoc(); 9223 F->insert(It, copy0MBB); 9224 F->insert(It, sinkMBB); 9225 9226 // Transfer the remainder of BB and its successor edges to sinkMBB. 9227 sinkMBB->splice(sinkMBB->begin(), BB, 9228 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9229 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 9230 9231 // Next, add the true and fallthrough blocks as its successors. 9232 BB->addSuccessor(copy0MBB); 9233 BB->addSuccessor(sinkMBB); 9234 9235 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || 9236 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || 9237 MI.getOpcode() == PPC::SELECT_QFRC || 9238 MI.getOpcode() == PPC::SELECT_QSRC || 9239 MI.getOpcode() == PPC::SELECT_QBRC || 9240 MI.getOpcode() == PPC::SELECT_VRRC || 9241 MI.getOpcode() == PPC::SELECT_VSFRC || 9242 MI.getOpcode() == PPC::SELECT_VSSRC || 9243 MI.getOpcode() == PPC::SELECT_VSRC) { 9244 BuildMI(BB, dl, TII->get(PPC::BC)) 9245 .addReg(MI.getOperand(1).getReg()) 9246 .addMBB(sinkMBB); 9247 } else { 9248 unsigned SelectPred = MI.getOperand(4).getImm(); 9249 BuildMI(BB, dl, TII->get(PPC::BCC)) 9250 .addImm(SelectPred) 9251 .addReg(MI.getOperand(1).getReg()) 9252 .addMBB(sinkMBB); 9253 } 9254 9255 // copy0MBB: 9256 // %FalseValue = ... 9257 // # fallthrough to sinkMBB 9258 BB = copy0MBB; 9259 9260 // Update machine-CFG edges 9261 BB->addSuccessor(sinkMBB); 9262 9263 // sinkMBB: 9264 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 9265 // ... 9266 BB = sinkMBB; 9267 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) 9268 .addReg(MI.getOperand(3).getReg()) 9269 .addMBB(copy0MBB) 9270 .addReg(MI.getOperand(2).getReg()) 9271 .addMBB(thisMBB); 9272 } else if (MI.getOpcode() == PPC::ReadTB) { 9273 // To read the 64-bit time-base register on a 32-bit target, we read the 9274 // two halves. Should the counter have wrapped while it was being read, we 9275 // need to try again. 9276 // ... 9277 // readLoop: 9278 // mfspr Rx,TBU # load from TBU 9279 // mfspr Ry,TB # load from TB 9280 // mfspr Rz,TBU # load from TBU 9281 // cmpw crX,Rx,Rz # check if 'old'='new' 9282 // bne readLoop # branch if they're not equal 9283 // ... 9284 9285 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); 9286 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 9287 DebugLoc dl = MI.getDebugLoc(); 9288 F->insert(It, readMBB); 9289 F->insert(It, sinkMBB); 9290 9291 // Transfer the remainder of BB and its successor edges to sinkMBB. 9292 sinkMBB->splice(sinkMBB->begin(), BB, 9293 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9294 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 9295 9296 BB->addSuccessor(readMBB); 9297 BB = readMBB; 9298 9299 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9300 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 9301 unsigned LoReg = MI.getOperand(0).getReg(); 9302 unsigned HiReg = MI.getOperand(1).getReg(); 9303 9304 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); 9305 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); 9306 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); 9307 9308 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 9309 9310 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) 9311 .addReg(HiReg).addReg(ReadAgainReg); 9312 BuildMI(BB, dl, TII->get(PPC::BCC)) 9313 .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); 9314 9315 BB->addSuccessor(readMBB); 9316 BB->addSuccessor(sinkMBB); 9317 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 9318 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 9319 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 9320 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 9321 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 9322 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); 9323 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 9324 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); 9325 9326 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 9327 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 9328 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 9329 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 9330 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 9331 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); 9332 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 9333 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); 9334 9335 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 9336 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 9337 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 9338 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 9339 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 9340 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); 9341 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 9342 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); 9343 9344 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 9345 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 9346 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 9347 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 9348 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 9349 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); 9350 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 9351 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); 9352 9353 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 9354 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 9355 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 9356 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 9357 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 9358 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); 9359 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 9360 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); 9361 9362 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 9363 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 9364 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 9365 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 9366 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 9367 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); 9368 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 9369 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); 9370 9371 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) 9372 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); 9373 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) 9374 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); 9375 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) 9376 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); 9377 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) 9378 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); 9379 9380 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) 9381 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); 9382 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) 9383 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); 9384 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) 9385 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); 9386 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) 9387 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); 9388 9389 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) 9390 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); 9391 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) 9392 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); 9393 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) 9394 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); 9395 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) 9396 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); 9397 9398 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) 9399 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); 9400 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) 9401 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); 9402 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) 9403 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); 9404 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) 9405 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); 9406 9407 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) 9408 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 9409 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) 9410 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 9411 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32) 9412 BB = EmitAtomicBinary(MI, BB, 4, 0); 9413 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) 9414 BB = EmitAtomicBinary(MI, BB, 8, 0); 9415 9416 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 9417 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || 9418 (Subtarget.hasPartwordAtomics() && 9419 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || 9420 (Subtarget.hasPartwordAtomics() && 9421 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { 9422 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 9423 9424 auto LoadMnemonic = PPC::LDARX; 9425 auto StoreMnemonic = PPC::STDCX; 9426 switch (MI.getOpcode()) { 9427 default: 9428 llvm_unreachable("Compare and swap of unknown size"); 9429 case PPC::ATOMIC_CMP_SWAP_I8: 9430 LoadMnemonic = PPC::LBARX; 9431 StoreMnemonic = PPC::STBCX; 9432 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 9433 break; 9434 case PPC::ATOMIC_CMP_SWAP_I16: 9435 LoadMnemonic = PPC::LHARX; 9436 StoreMnemonic = PPC::STHCX; 9437 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 9438 break; 9439 case PPC::ATOMIC_CMP_SWAP_I32: 9440 LoadMnemonic = PPC::LWARX; 9441 StoreMnemonic = PPC::STWCX; 9442 break; 9443 case PPC::ATOMIC_CMP_SWAP_I64: 9444 LoadMnemonic = PPC::LDARX; 9445 StoreMnemonic = PPC::STDCX; 9446 break; 9447 } 9448 unsigned dest = MI.getOperand(0).getReg(); 9449 unsigned ptrA = MI.getOperand(1).getReg(); 9450 unsigned ptrB = MI.getOperand(2).getReg(); 9451 unsigned oldval = MI.getOperand(3).getReg(); 9452 unsigned newval = MI.getOperand(4).getReg(); 9453 DebugLoc dl = MI.getDebugLoc(); 9454 9455 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 9456 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 9457 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 9458 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9459 F->insert(It, loop1MBB); 9460 F->insert(It, loop2MBB); 9461 F->insert(It, midMBB); 9462 F->insert(It, exitMBB); 9463 exitMBB->splice(exitMBB->begin(), BB, 9464 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9465 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9466 9467 // thisMBB: 9468 // ... 9469 // fallthrough --> loopMBB 9470 BB->addSuccessor(loop1MBB); 9471 9472 // loop1MBB: 9473 // l[bhwd]arx dest, ptr 9474 // cmp[wd] dest, oldval 9475 // bne- midMBB 9476 // loop2MBB: 9477 // st[bhwd]cx. newval, ptr 9478 // bne- loopMBB 9479 // b exitBB 9480 // midMBB: 9481 // st[bhwd]cx. dest, ptr 9482 // exitBB: 9483 BB = loop1MBB; 9484 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 9485 .addReg(ptrA).addReg(ptrB); 9486 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 9487 .addReg(oldval).addReg(dest); 9488 BuildMI(BB, dl, TII->get(PPC::BCC)) 9489 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 9490 BB->addSuccessor(loop2MBB); 9491 BB->addSuccessor(midMBB); 9492 9493 BB = loop2MBB; 9494 BuildMI(BB, dl, TII->get(StoreMnemonic)) 9495 .addReg(newval).addReg(ptrA).addReg(ptrB); 9496 BuildMI(BB, dl, TII->get(PPC::BCC)) 9497 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 9498 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 9499 BB->addSuccessor(loop1MBB); 9500 BB->addSuccessor(exitMBB); 9501 9502 BB = midMBB; 9503 BuildMI(BB, dl, TII->get(StoreMnemonic)) 9504 .addReg(dest).addReg(ptrA).addReg(ptrB); 9505 BB->addSuccessor(exitMBB); 9506 9507 // exitMBB: 9508 // ... 9509 BB = exitMBB; 9510 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 9511 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 9512 // We must use 64-bit registers for addresses when targeting 64-bit, 9513 // since we're actually doing arithmetic on them. Other registers 9514 // can be 32-bit. 9515 bool is64bit = Subtarget.isPPC64(); 9516 bool isLittleEndian = Subtarget.isLittleEndian(); 9517 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 9518 9519 unsigned dest = MI.getOperand(0).getReg(); 9520 unsigned ptrA = MI.getOperand(1).getReg(); 9521 unsigned ptrB = MI.getOperand(2).getReg(); 9522 unsigned oldval = MI.getOperand(3).getReg(); 9523 unsigned newval = MI.getOperand(4).getReg(); 9524 DebugLoc dl = MI.getDebugLoc(); 9525 9526 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 9527 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 9528 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 9529 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9530 F->insert(It, loop1MBB); 9531 F->insert(It, loop2MBB); 9532 F->insert(It, midMBB); 9533 F->insert(It, exitMBB); 9534 exitMBB->splice(exitMBB->begin(), BB, 9535 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9536 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9537 9538 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9539 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 9540 : &PPC::GPRCRegClass; 9541 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 9542 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 9543 unsigned ShiftReg = 9544 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); 9545 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 9546 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 9547 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 9548 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 9549 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 9550 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 9551 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 9552 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 9553 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 9554 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 9555 unsigned Ptr1Reg; 9556 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 9557 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 9558 // thisMBB: 9559 // ... 9560 // fallthrough --> loopMBB 9561 BB->addSuccessor(loop1MBB); 9562 9563 // The 4-byte load must be aligned, while a char or short may be 9564 // anywhere in the word. Hence all this nasty bookkeeping code. 9565 // add ptr1, ptrA, ptrB [copy if ptrA==0] 9566 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 9567 // xori shift, shift1, 24 [16] 9568 // rlwinm ptr, ptr1, 0, 0, 29 9569 // slw newval2, newval, shift 9570 // slw oldval2, oldval,shift 9571 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 9572 // slw mask, mask2, shift 9573 // and newval3, newval2, mask 9574 // and oldval3, oldval2, mask 9575 // loop1MBB: 9576 // lwarx tmpDest, ptr 9577 // and tmp, tmpDest, mask 9578 // cmpw tmp, oldval3 9579 // bne- midMBB 9580 // loop2MBB: 9581 // andc tmp2, tmpDest, mask 9582 // or tmp4, tmp2, newval3 9583 // stwcx. tmp4, ptr 9584 // bne- loop1MBB 9585 // b exitBB 9586 // midMBB: 9587 // stwcx. tmpDest, ptr 9588 // exitBB: 9589 // srw dest, tmpDest, shift 9590 if (ptrA != ZeroReg) { 9591 Ptr1Reg = RegInfo.createVirtualRegister(RC); 9592 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 9593 .addReg(ptrA).addReg(ptrB); 9594 } else { 9595 Ptr1Reg = ptrB; 9596 } 9597 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 9598 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 9599 if (!isLittleEndian) 9600 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 9601 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 9602 if (is64bit) 9603 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 9604 .addReg(Ptr1Reg).addImm(0).addImm(61); 9605 else 9606 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 9607 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 9608 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 9609 .addReg(newval).addReg(ShiftReg); 9610 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 9611 .addReg(oldval).addReg(ShiftReg); 9612 if (is8bit) 9613 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 9614 else { 9615 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 9616 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 9617 .addReg(Mask3Reg).addImm(65535); 9618 } 9619 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 9620 .addReg(Mask2Reg).addReg(ShiftReg); 9621 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 9622 .addReg(NewVal2Reg).addReg(MaskReg); 9623 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 9624 .addReg(OldVal2Reg).addReg(MaskReg); 9625 9626 BB = loop1MBB; 9627 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 9628 .addReg(ZeroReg).addReg(PtrReg); 9629 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 9630 .addReg(TmpDestReg).addReg(MaskReg); 9631 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 9632 .addReg(TmpReg).addReg(OldVal3Reg); 9633 BuildMI(BB, dl, TII->get(PPC::BCC)) 9634 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 9635 BB->addSuccessor(loop2MBB); 9636 BB->addSuccessor(midMBB); 9637 9638 BB = loop2MBB; 9639 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 9640 .addReg(TmpDestReg).addReg(MaskReg); 9641 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 9642 .addReg(Tmp2Reg).addReg(NewVal3Reg); 9643 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 9644 .addReg(ZeroReg).addReg(PtrReg); 9645 BuildMI(BB, dl, TII->get(PPC::BCC)) 9646 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 9647 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 9648 BB->addSuccessor(loop1MBB); 9649 BB->addSuccessor(exitMBB); 9650 9651 BB = midMBB; 9652 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 9653 .addReg(ZeroReg).addReg(PtrReg); 9654 BB->addSuccessor(exitMBB); 9655 9656 // exitMBB: 9657 // ... 9658 BB = exitMBB; 9659 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 9660 .addReg(ShiftReg); 9661 } else if (MI.getOpcode() == PPC::FADDrtz) { 9662 // This pseudo performs an FADD with rounding mode temporarily forced 9663 // to round-to-zero. We emit this via custom inserter since the FPSCR 9664 // is not modeled at the SelectionDAG level. 9665 unsigned Dest = MI.getOperand(0).getReg(); 9666 unsigned Src1 = MI.getOperand(1).getReg(); 9667 unsigned Src2 = MI.getOperand(2).getReg(); 9668 DebugLoc dl = MI.getDebugLoc(); 9669 9670 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9671 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 9672 9673 // Save FPSCR value. 9674 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 9675 9676 // Set rounding mode to round-to-zero. 9677 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 9678 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 9679 9680 // Perform addition. 9681 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 9682 9683 // Restore FPSCR value. 9684 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); 9685 } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 9686 MI.getOpcode() == PPC::ANDIo_1_GT_BIT || 9687 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 9688 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) { 9689 unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 9690 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) 9691 ? PPC::ANDIo8 9692 : PPC::ANDIo; 9693 bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 9694 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); 9695 9696 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9697 unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? 9698 &PPC::GPRCRegClass : 9699 &PPC::G8RCRegClass); 9700 9701 DebugLoc dl = MI.getDebugLoc(); 9702 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 9703 .addReg(MI.getOperand(1).getReg()) 9704 .addImm(1); 9705 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 9706 MI.getOperand(0).getReg()) 9707 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 9708 } else if (MI.getOpcode() == PPC::TCHECK_RET) { 9709 DebugLoc Dl = MI.getDebugLoc(); 9710 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9711 unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 9712 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); 9713 return BB; 9714 } else { 9715 llvm_unreachable("Unexpected instr type to insert"); 9716 } 9717 9718 MI.eraseFromParent(); // The pseudo instruction is gone now. 9719 return BB; 9720 } 9721 9722 //===----------------------------------------------------------------------===// 9723 // Target Optimization Hooks 9724 //===----------------------------------------------------------------------===// 9725 9726 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { 9727 // For the estimates, convergence is quadratic, so we essentially double the 9728 // number of digits correct after every iteration. For both FRE and FRSQRTE, 9729 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), 9730 // this is 2^-14. IEEE float has 23 digits and double has 52 digits. 9731 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 9732 if (VT.getScalarType() == MVT::f64) 9733 RefinementSteps++; 9734 return RefinementSteps; 9735 } 9736 9737 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, 9738 int Enabled, int &RefinementSteps, 9739 bool &UseOneConstNR, 9740 bool Reciprocal) const { 9741 EVT VT = Operand.getValueType(); 9742 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 9743 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 9744 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 9745 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 9746 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 9747 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 9748 if (RefinementSteps == ReciprocalEstimate::Unspecified) 9749 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 9750 9751 UseOneConstNR = true; 9752 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); 9753 } 9754 return SDValue(); 9755 } 9756 9757 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, 9758 int Enabled, 9759 int &RefinementSteps) const { 9760 EVT VT = Operand.getValueType(); 9761 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 9762 (VT == MVT::f64 && Subtarget.hasFRE()) || 9763 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 9764 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 9765 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 9766 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 9767 if (RefinementSteps == ReciprocalEstimate::Unspecified) 9768 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 9769 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); 9770 } 9771 return SDValue(); 9772 } 9773 9774 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { 9775 // Note: This functionality is used only when unsafe-fp-math is enabled, and 9776 // on cores with reciprocal estimates (which are used when unsafe-fp-math is 9777 // enabled for division), this functionality is redundant with the default 9778 // combiner logic (once the division -> reciprocal/multiply transformation 9779 // has taken place). As a result, this matters more for older cores than for 9780 // newer ones. 9781 9782 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 9783 // reciprocal if there are two or more FDIVs (for embedded cores with only 9784 // one FP pipeline) for three or more FDIVs (for generic OOO cores). 9785 switch (Subtarget.getDarwinDirective()) { 9786 default: 9787 return 3; 9788 case PPC::DIR_440: 9789 case PPC::DIR_A2: 9790 case PPC::DIR_E500mc: 9791 case PPC::DIR_E5500: 9792 return 2; 9793 } 9794 } 9795 9796 // isConsecutiveLSLoc needs to work even if all adds have not yet been 9797 // collapsed, and so we need to look through chains of them. 9798 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, 9799 int64_t& Offset, SelectionDAG &DAG) { 9800 if (DAG.isBaseWithConstantOffset(Loc)) { 9801 Base = Loc.getOperand(0); 9802 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); 9803 9804 // The base might itself be a base plus an offset, and if so, accumulate 9805 // that as well. 9806 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); 9807 } 9808 } 9809 9810 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, 9811 unsigned Bytes, int Dist, 9812 SelectionDAG &DAG) { 9813 if (VT.getSizeInBits() / 8 != Bytes) 9814 return false; 9815 9816 SDValue BaseLoc = Base->getBasePtr(); 9817 if (Loc.getOpcode() == ISD::FrameIndex) { 9818 if (BaseLoc.getOpcode() != ISD::FrameIndex) 9819 return false; 9820 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 9821 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 9822 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 9823 int FS = MFI.getObjectSize(FI); 9824 int BFS = MFI.getObjectSize(BFI); 9825 if (FS != BFS || FS != (int)Bytes) return false; 9826 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); 9827 } 9828 9829 SDValue Base1 = Loc, Base2 = BaseLoc; 9830 int64_t Offset1 = 0, Offset2 = 0; 9831 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); 9832 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); 9833 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) 9834 return true; 9835 9836 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9837 const GlobalValue *GV1 = nullptr; 9838 const GlobalValue *GV2 = nullptr; 9839 Offset1 = 0; 9840 Offset2 = 0; 9841 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 9842 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 9843 if (isGA1 && isGA2 && GV1 == GV2) 9844 return Offset1 == (Offset2 + Dist*Bytes); 9845 return false; 9846 } 9847 9848 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 9849 // not enforce equality of the chain operands. 9850 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, 9851 unsigned Bytes, int Dist, 9852 SelectionDAG &DAG) { 9853 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { 9854 EVT VT = LS->getMemoryVT(); 9855 SDValue Loc = LS->getBasePtr(); 9856 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); 9857 } 9858 9859 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 9860 EVT VT; 9861 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9862 default: return false; 9863 case Intrinsic::ppc_qpx_qvlfd: 9864 case Intrinsic::ppc_qpx_qvlfda: 9865 VT = MVT::v4f64; 9866 break; 9867 case Intrinsic::ppc_qpx_qvlfs: 9868 case Intrinsic::ppc_qpx_qvlfsa: 9869 VT = MVT::v4f32; 9870 break; 9871 case Intrinsic::ppc_qpx_qvlfcd: 9872 case Intrinsic::ppc_qpx_qvlfcda: 9873 VT = MVT::v2f64; 9874 break; 9875 case Intrinsic::ppc_qpx_qvlfcs: 9876 case Intrinsic::ppc_qpx_qvlfcsa: 9877 VT = MVT::v2f32; 9878 break; 9879 case Intrinsic::ppc_qpx_qvlfiwa: 9880 case Intrinsic::ppc_qpx_qvlfiwz: 9881 case Intrinsic::ppc_altivec_lvx: 9882 case Intrinsic::ppc_altivec_lvxl: 9883 case Intrinsic::ppc_vsx_lxvw4x: 9884 case Intrinsic::ppc_vsx_lxvw4x_be: 9885 VT = MVT::v4i32; 9886 break; 9887 case Intrinsic::ppc_vsx_lxvd2x: 9888 case Intrinsic::ppc_vsx_lxvd2x_be: 9889 VT = MVT::v2f64; 9890 break; 9891 case Intrinsic::ppc_altivec_lvebx: 9892 VT = MVT::i8; 9893 break; 9894 case Intrinsic::ppc_altivec_lvehx: 9895 VT = MVT::i16; 9896 break; 9897 case Intrinsic::ppc_altivec_lvewx: 9898 VT = MVT::i32; 9899 break; 9900 } 9901 9902 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); 9903 } 9904 9905 if (N->getOpcode() == ISD::INTRINSIC_VOID) { 9906 EVT VT; 9907 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9908 default: return false; 9909 case Intrinsic::ppc_qpx_qvstfd: 9910 case Intrinsic::ppc_qpx_qvstfda: 9911 VT = MVT::v4f64; 9912 break; 9913 case Intrinsic::ppc_qpx_qvstfs: 9914 case Intrinsic::ppc_qpx_qvstfsa: 9915 VT = MVT::v4f32; 9916 break; 9917 case Intrinsic::ppc_qpx_qvstfcd: 9918 case Intrinsic::ppc_qpx_qvstfcda: 9919 VT = MVT::v2f64; 9920 break; 9921 case Intrinsic::ppc_qpx_qvstfcs: 9922 case Intrinsic::ppc_qpx_qvstfcsa: 9923 VT = MVT::v2f32; 9924 break; 9925 case Intrinsic::ppc_qpx_qvstfiw: 9926 case Intrinsic::ppc_qpx_qvstfiwa: 9927 case Intrinsic::ppc_altivec_stvx: 9928 case Intrinsic::ppc_altivec_stvxl: 9929 case Intrinsic::ppc_vsx_stxvw4x: 9930 VT = MVT::v4i32; 9931 break; 9932 case Intrinsic::ppc_vsx_stxvd2x: 9933 VT = MVT::v2f64; 9934 break; 9935 case Intrinsic::ppc_vsx_stxvw4x_be: 9936 VT = MVT::v4i32; 9937 break; 9938 case Intrinsic::ppc_vsx_stxvd2x_be: 9939 VT = MVT::v2f64; 9940 break; 9941 case Intrinsic::ppc_altivec_stvebx: 9942 VT = MVT::i8; 9943 break; 9944 case Intrinsic::ppc_altivec_stvehx: 9945 VT = MVT::i16; 9946 break; 9947 case Intrinsic::ppc_altivec_stvewx: 9948 VT = MVT::i32; 9949 break; 9950 } 9951 9952 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); 9953 } 9954 9955 return false; 9956 } 9957 9958 // Return true is there is a nearyby consecutive load to the one provided 9959 // (regardless of alignment). We search up and down the chain, looking though 9960 // token factors and other loads (but nothing else). As a result, a true result 9961 // indicates that it is safe to create a new consecutive load adjacent to the 9962 // load provided. 9963 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 9964 SDValue Chain = LD->getChain(); 9965 EVT VT = LD->getMemoryVT(); 9966 9967 SmallSet<SDNode *, 16> LoadRoots; 9968 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 9969 SmallSet<SDNode *, 16> Visited; 9970 9971 // First, search up the chain, branching to follow all token-factor operands. 9972 // If we find a consecutive load, then we're done, otherwise, record all 9973 // nodes just above the top-level loads and token factors. 9974 while (!Queue.empty()) { 9975 SDNode *ChainNext = Queue.pop_back_val(); 9976 if (!Visited.insert(ChainNext).second) 9977 continue; 9978 9979 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { 9980 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 9981 return true; 9982 9983 if (!Visited.count(ChainLD->getChain().getNode())) 9984 Queue.push_back(ChainLD->getChain().getNode()); 9985 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 9986 for (const SDUse &O : ChainNext->ops()) 9987 if (!Visited.count(O.getNode())) 9988 Queue.push_back(O.getNode()); 9989 } else 9990 LoadRoots.insert(ChainNext); 9991 } 9992 9993 // Second, search down the chain, starting from the top-level nodes recorded 9994 // in the first phase. These top-level nodes are the nodes just above all 9995 // loads and token factors. Starting with their uses, recursively look though 9996 // all loads (just the chain uses) and token factors to find a consecutive 9997 // load. 9998 Visited.clear(); 9999 Queue.clear(); 10000 10001 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 10002 IE = LoadRoots.end(); I != IE; ++I) { 10003 Queue.push_back(*I); 10004 10005 while (!Queue.empty()) { 10006 SDNode *LoadRoot = Queue.pop_back_val(); 10007 if (!Visited.insert(LoadRoot).second) 10008 continue; 10009 10010 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) 10011 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 10012 return true; 10013 10014 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 10015 UE = LoadRoot->use_end(); UI != UE; ++UI) 10016 if (((isa<MemSDNode>(*UI) && 10017 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || 10018 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 10019 Queue.push_back(*UI); 10020 } 10021 } 10022 10023 return false; 10024 } 10025 10026 10027 /// This function is called when we have proved that a SETCC node can be replaced 10028 /// by subtraction (and other supporting instructions) so that the result of 10029 /// comparison is kept in a GPR instead of CR. This function is purely for 10030 /// codegen purposes and has some flags to guide the codegen process. 10031 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, 10032 bool Swap, SDLoc &DL, SelectionDAG &DAG) { 10033 10034 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); 10035 10036 // Zero extend the operands to the largest legal integer. Originally, they 10037 // must be of a strictly smaller size. 10038 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0), 10039 DAG.getConstant(Size, DL, MVT::i32)); 10040 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1), 10041 DAG.getConstant(Size, DL, MVT::i32)); 10042 10043 // Swap if needed. Depends on the condition code. 10044 if (Swap) 10045 std::swap(Op0, Op1); 10046 10047 // Subtract extended integers. 10048 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1); 10049 10050 // Move the sign bit to the least significant position and zero out the rest. 10051 // Now the least significant bit carries the result of original comparison. 10052 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode, 10053 DAG.getConstant(Size - 1, DL, MVT::i32)); 10054 auto Final = Shifted; 10055 10056 // Complement the result if needed. Based on the condition code. 10057 if (Complement) 10058 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted, 10059 DAG.getConstant(1, DL, MVT::i64)); 10060 10061 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final); 10062 } 10063 10064 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, 10065 DAGCombinerInfo &DCI) const { 10066 10067 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); 10068 10069 SelectionDAG &DAG = DCI.DAG; 10070 SDLoc DL(N); 10071 10072 // Size of integers being compared has a critical role in the following 10073 // analysis, so we prefer to do this when all types are legal. 10074 if (!DCI.isAfterLegalizeVectorOps()) 10075 return SDValue(); 10076 10077 // If all users of SETCC extend its value to a legal integer type 10078 // then we replace SETCC with a subtraction 10079 for (SDNode::use_iterator UI = N->use_begin(), 10080 UE = N->use_end(); UI != UE; ++UI) { 10081 if (UI->getOpcode() != ISD::ZERO_EXTEND) 10082 return SDValue(); 10083 } 10084 10085 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 10086 auto OpSize = N->getOperand(0).getValueSizeInBits(); 10087 10088 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits(); 10089 10090 if (OpSize < Size) { 10091 switch (CC) { 10092 default: break; 10093 case ISD::SETULT: 10094 return generateEquivalentSub(N, Size, false, false, DL, DAG); 10095 case ISD::SETULE: 10096 return generateEquivalentSub(N, Size, true, true, DL, DAG); 10097 case ISD::SETUGT: 10098 return generateEquivalentSub(N, Size, false, true, DL, DAG); 10099 case ISD::SETUGE: 10100 return generateEquivalentSub(N, Size, true, false, DL, DAG); 10101 } 10102 } 10103 10104 return SDValue(); 10105 } 10106 10107 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 10108 DAGCombinerInfo &DCI) const { 10109 SelectionDAG &DAG = DCI.DAG; 10110 SDLoc dl(N); 10111 10112 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); 10113 // If we're tracking CR bits, we need to be careful that we don't have: 10114 // trunc(binary-ops(zext(x), zext(y))) 10115 // or 10116 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 10117 // such that we're unnecessarily moving things into GPRs when it would be 10118 // better to keep them in CR bits. 10119 10120 // Note that trunc here can be an actual i1 trunc, or can be the effective 10121 // truncation that comes from a setcc or select_cc. 10122 if (N->getOpcode() == ISD::TRUNCATE && 10123 N->getValueType(0) != MVT::i1) 10124 return SDValue(); 10125 10126 if (N->getOperand(0).getValueType() != MVT::i32 && 10127 N->getOperand(0).getValueType() != MVT::i64) 10128 return SDValue(); 10129 10130 if (N->getOpcode() == ISD::SETCC || 10131 N->getOpcode() == ISD::SELECT_CC) { 10132 // If we're looking at a comparison, then we need to make sure that the 10133 // high bits (all except for the first) don't matter the result. 10134 ISD::CondCode CC = 10135 cast<CondCodeSDNode>(N->getOperand( 10136 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 10137 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 10138 10139 if (ISD::isSignedIntSetCC(CC)) { 10140 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 10141 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 10142 return SDValue(); 10143 } else if (ISD::isUnsignedIntSetCC(CC)) { 10144 if (!DAG.MaskedValueIsZero(N->getOperand(0), 10145 APInt::getHighBitsSet(OpBits, OpBits-1)) || 10146 !DAG.MaskedValueIsZero(N->getOperand(1), 10147 APInt::getHighBitsSet(OpBits, OpBits-1))) 10148 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI) 10149 : SDValue()); 10150 } else { 10151 // This is neither a signed nor an unsigned comparison, just make sure 10152 // that the high bits are equal. 10153 APInt Op1Zero, Op1One; 10154 APInt Op2Zero, Op2One; 10155 DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); 10156 DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); 10157 10158 // We don't really care about what is known about the first bit (if 10159 // anything), so clear it in all masks prior to comparing them. 10160 Op1Zero.clearBit(0); Op1One.clearBit(0); 10161 Op2Zero.clearBit(0); Op2One.clearBit(0); 10162 10163 if (Op1Zero != Op2Zero || Op1One != Op2One) 10164 return SDValue(); 10165 } 10166 } 10167 10168 // We now know that the higher-order bits are irrelevant, we just need to 10169 // make sure that all of the intermediate operations are bit operations, and 10170 // all inputs are extensions. 10171 if (N->getOperand(0).getOpcode() != ISD::AND && 10172 N->getOperand(0).getOpcode() != ISD::OR && 10173 N->getOperand(0).getOpcode() != ISD::XOR && 10174 N->getOperand(0).getOpcode() != ISD::SELECT && 10175 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 10176 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 10177 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 10178 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 10179 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 10180 return SDValue(); 10181 10182 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 10183 N->getOperand(1).getOpcode() != ISD::AND && 10184 N->getOperand(1).getOpcode() != ISD::OR && 10185 N->getOperand(1).getOpcode() != ISD::XOR && 10186 N->getOperand(1).getOpcode() != ISD::SELECT && 10187 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 10188 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 10189 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 10190 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 10191 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 10192 return SDValue(); 10193 10194 SmallVector<SDValue, 4> Inputs; 10195 SmallVector<SDValue, 8> BinOps, PromOps; 10196 SmallPtrSet<SDNode *, 16> Visited; 10197 10198 for (unsigned i = 0; i < 2; ++i) { 10199 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 10200 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 10201 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 10202 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 10203 isa<ConstantSDNode>(N->getOperand(i))) 10204 Inputs.push_back(N->getOperand(i)); 10205 else 10206 BinOps.push_back(N->getOperand(i)); 10207 10208 if (N->getOpcode() == ISD::TRUNCATE) 10209 break; 10210 } 10211 10212 // Visit all inputs, collect all binary operations (and, or, xor and 10213 // select) that are all fed by extensions. 10214 while (!BinOps.empty()) { 10215 SDValue BinOp = BinOps.back(); 10216 BinOps.pop_back(); 10217 10218 if (!Visited.insert(BinOp.getNode()).second) 10219 continue; 10220 10221 PromOps.push_back(BinOp); 10222 10223 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 10224 // The condition of the select is not promoted. 10225 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 10226 continue; 10227 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 10228 continue; 10229 10230 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 10231 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 10232 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 10233 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 10234 isa<ConstantSDNode>(BinOp.getOperand(i))) { 10235 Inputs.push_back(BinOp.getOperand(i)); 10236 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 10237 BinOp.getOperand(i).getOpcode() == ISD::OR || 10238 BinOp.getOperand(i).getOpcode() == ISD::XOR || 10239 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 10240 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 10241 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 10242 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 10243 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 10244 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 10245 BinOps.push_back(BinOp.getOperand(i)); 10246 } else { 10247 // We have an input that is not an extension or another binary 10248 // operation; we'll abort this transformation. 10249 return SDValue(); 10250 } 10251 } 10252 } 10253 10254 // Make sure that this is a self-contained cluster of operations (which 10255 // is not quite the same thing as saying that everything has only one 10256 // use). 10257 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10258 if (isa<ConstantSDNode>(Inputs[i])) 10259 continue; 10260 10261 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 10262 UE = Inputs[i].getNode()->use_end(); 10263 UI != UE; ++UI) { 10264 SDNode *User = *UI; 10265 if (User != N && !Visited.count(User)) 10266 return SDValue(); 10267 10268 // Make sure that we're not going to promote the non-output-value 10269 // operand(s) or SELECT or SELECT_CC. 10270 // FIXME: Although we could sometimes handle this, and it does occur in 10271 // practice that one of the condition inputs to the select is also one of 10272 // the outputs, we currently can't deal with this. 10273 if (User->getOpcode() == ISD::SELECT) { 10274 if (User->getOperand(0) == Inputs[i]) 10275 return SDValue(); 10276 } else if (User->getOpcode() == ISD::SELECT_CC) { 10277 if (User->getOperand(0) == Inputs[i] || 10278 User->getOperand(1) == Inputs[i]) 10279 return SDValue(); 10280 } 10281 } 10282 } 10283 10284 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 10285 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 10286 UE = PromOps[i].getNode()->use_end(); 10287 UI != UE; ++UI) { 10288 SDNode *User = *UI; 10289 if (User != N && !Visited.count(User)) 10290 return SDValue(); 10291 10292 // Make sure that we're not going to promote the non-output-value 10293 // operand(s) or SELECT or SELECT_CC. 10294 // FIXME: Although we could sometimes handle this, and it does occur in 10295 // practice that one of the condition inputs to the select is also one of 10296 // the outputs, we currently can't deal with this. 10297 if (User->getOpcode() == ISD::SELECT) { 10298 if (User->getOperand(0) == PromOps[i]) 10299 return SDValue(); 10300 } else if (User->getOpcode() == ISD::SELECT_CC) { 10301 if (User->getOperand(0) == PromOps[i] || 10302 User->getOperand(1) == PromOps[i]) 10303 return SDValue(); 10304 } 10305 } 10306 } 10307 10308 // Replace all inputs with the extension operand. 10309 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10310 // Constants may have users outside the cluster of to-be-promoted nodes, 10311 // and so we need to replace those as we do the promotions. 10312 if (isa<ConstantSDNode>(Inputs[i])) 10313 continue; 10314 else 10315 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 10316 } 10317 10318 std::list<HandleSDNode> PromOpHandles; 10319 for (auto &PromOp : PromOps) 10320 PromOpHandles.emplace_back(PromOp); 10321 10322 // Replace all operations (these are all the same, but have a different 10323 // (i1) return type). DAG.getNode will validate that the types of 10324 // a binary operator match, so go through the list in reverse so that 10325 // we've likely promoted both operands first. Any intermediate truncations or 10326 // extensions disappear. 10327 while (!PromOpHandles.empty()) { 10328 SDValue PromOp = PromOpHandles.back().getValue(); 10329 PromOpHandles.pop_back(); 10330 10331 if (PromOp.getOpcode() == ISD::TRUNCATE || 10332 PromOp.getOpcode() == ISD::SIGN_EXTEND || 10333 PromOp.getOpcode() == ISD::ZERO_EXTEND || 10334 PromOp.getOpcode() == ISD::ANY_EXTEND) { 10335 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 10336 PromOp.getOperand(0).getValueType() != MVT::i1) { 10337 // The operand is not yet ready (see comment below). 10338 PromOpHandles.emplace_front(PromOp); 10339 continue; 10340 } 10341 10342 SDValue RepValue = PromOp.getOperand(0); 10343 if (isa<ConstantSDNode>(RepValue)) 10344 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 10345 10346 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 10347 continue; 10348 } 10349 10350 unsigned C; 10351 switch (PromOp.getOpcode()) { 10352 default: C = 0; break; 10353 case ISD::SELECT: C = 1; break; 10354 case ISD::SELECT_CC: C = 2; break; 10355 } 10356 10357 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 10358 PromOp.getOperand(C).getValueType() != MVT::i1) || 10359 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 10360 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 10361 // The to-be-promoted operands of this node have not yet been 10362 // promoted (this should be rare because we're going through the 10363 // list backward, but if one of the operands has several users in 10364 // this cluster of to-be-promoted nodes, it is possible). 10365 PromOpHandles.emplace_front(PromOp); 10366 continue; 10367 } 10368 10369 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 10370 PromOp.getNode()->op_end()); 10371 10372 // If there are any constant inputs, make sure they're replaced now. 10373 for (unsigned i = 0; i < 2; ++i) 10374 if (isa<ConstantSDNode>(Ops[C+i])) 10375 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 10376 10377 DAG.ReplaceAllUsesOfValueWith(PromOp, 10378 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 10379 } 10380 10381 // Now we're left with the initial truncation itself. 10382 if (N->getOpcode() == ISD::TRUNCATE) 10383 return N->getOperand(0); 10384 10385 // Otherwise, this is a comparison. The operands to be compared have just 10386 // changed type (to i1), but everything else is the same. 10387 return SDValue(N, 0); 10388 } 10389 10390 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 10391 DAGCombinerInfo &DCI) const { 10392 SelectionDAG &DAG = DCI.DAG; 10393 SDLoc dl(N); 10394 10395 // If we're tracking CR bits, we need to be careful that we don't have: 10396 // zext(binary-ops(trunc(x), trunc(y))) 10397 // or 10398 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 10399 // such that we're unnecessarily moving things into CR bits that can more 10400 // efficiently stay in GPRs. Note that if we're not certain that the high 10401 // bits are set as required by the final extension, we still may need to do 10402 // some masking to get the proper behavior. 10403 10404 // This same functionality is important on PPC64 when dealing with 10405 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 10406 // the return values of functions. Because it is so similar, it is handled 10407 // here as well. 10408 10409 if (N->getValueType(0) != MVT::i32 && 10410 N->getValueType(0) != MVT::i64) 10411 return SDValue(); 10412 10413 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || 10414 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) 10415 return SDValue(); 10416 10417 if (N->getOperand(0).getOpcode() != ISD::AND && 10418 N->getOperand(0).getOpcode() != ISD::OR && 10419 N->getOperand(0).getOpcode() != ISD::XOR && 10420 N->getOperand(0).getOpcode() != ISD::SELECT && 10421 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 10422 return SDValue(); 10423 10424 SmallVector<SDValue, 4> Inputs; 10425 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 10426 SmallPtrSet<SDNode *, 16> Visited; 10427 10428 // Visit all inputs, collect all binary operations (and, or, xor and 10429 // select) that are all fed by truncations. 10430 while (!BinOps.empty()) { 10431 SDValue BinOp = BinOps.back(); 10432 BinOps.pop_back(); 10433 10434 if (!Visited.insert(BinOp.getNode()).second) 10435 continue; 10436 10437 PromOps.push_back(BinOp); 10438 10439 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 10440 // The condition of the select is not promoted. 10441 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 10442 continue; 10443 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 10444 continue; 10445 10446 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 10447 isa<ConstantSDNode>(BinOp.getOperand(i))) { 10448 Inputs.push_back(BinOp.getOperand(i)); 10449 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 10450 BinOp.getOperand(i).getOpcode() == ISD::OR || 10451 BinOp.getOperand(i).getOpcode() == ISD::XOR || 10452 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 10453 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 10454 BinOps.push_back(BinOp.getOperand(i)); 10455 } else { 10456 // We have an input that is not a truncation or another binary 10457 // operation; we'll abort this transformation. 10458 return SDValue(); 10459 } 10460 } 10461 } 10462 10463 // The operands of a select that must be truncated when the select is 10464 // promoted because the operand is actually part of the to-be-promoted set. 10465 DenseMap<SDNode *, EVT> SelectTruncOp[2]; 10466 10467 // Make sure that this is a self-contained cluster of operations (which 10468 // is not quite the same thing as saying that everything has only one 10469 // use). 10470 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10471 if (isa<ConstantSDNode>(Inputs[i])) 10472 continue; 10473 10474 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 10475 UE = Inputs[i].getNode()->use_end(); 10476 UI != UE; ++UI) { 10477 SDNode *User = *UI; 10478 if (User != N && !Visited.count(User)) 10479 return SDValue(); 10480 10481 // If we're going to promote the non-output-value operand(s) or SELECT or 10482 // SELECT_CC, record them for truncation. 10483 if (User->getOpcode() == ISD::SELECT) { 10484 if (User->getOperand(0) == Inputs[i]) 10485 SelectTruncOp[0].insert(std::make_pair(User, 10486 User->getOperand(0).getValueType())); 10487 } else if (User->getOpcode() == ISD::SELECT_CC) { 10488 if (User->getOperand(0) == Inputs[i]) 10489 SelectTruncOp[0].insert(std::make_pair(User, 10490 User->getOperand(0).getValueType())); 10491 if (User->getOperand(1) == Inputs[i]) 10492 SelectTruncOp[1].insert(std::make_pair(User, 10493 User->getOperand(1).getValueType())); 10494 } 10495 } 10496 } 10497 10498 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 10499 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 10500 UE = PromOps[i].getNode()->use_end(); 10501 UI != UE; ++UI) { 10502 SDNode *User = *UI; 10503 if (User != N && !Visited.count(User)) 10504 return SDValue(); 10505 10506 // If we're going to promote the non-output-value operand(s) or SELECT or 10507 // SELECT_CC, record them for truncation. 10508 if (User->getOpcode() == ISD::SELECT) { 10509 if (User->getOperand(0) == PromOps[i]) 10510 SelectTruncOp[0].insert(std::make_pair(User, 10511 User->getOperand(0).getValueType())); 10512 } else if (User->getOpcode() == ISD::SELECT_CC) { 10513 if (User->getOperand(0) == PromOps[i]) 10514 SelectTruncOp[0].insert(std::make_pair(User, 10515 User->getOperand(0).getValueType())); 10516 if (User->getOperand(1) == PromOps[i]) 10517 SelectTruncOp[1].insert(std::make_pair(User, 10518 User->getOperand(1).getValueType())); 10519 } 10520 } 10521 } 10522 10523 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 10524 bool ReallyNeedsExt = false; 10525 if (N->getOpcode() != ISD::ANY_EXTEND) { 10526 // If all of the inputs are not already sign/zero extended, then 10527 // we'll still need to do that at the end. 10528 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10529 if (isa<ConstantSDNode>(Inputs[i])) 10530 continue; 10531 10532 unsigned OpBits = 10533 Inputs[i].getOperand(0).getValueSizeInBits(); 10534 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 10535 10536 if ((N->getOpcode() == ISD::ZERO_EXTEND && 10537 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 10538 APInt::getHighBitsSet(OpBits, 10539 OpBits-PromBits))) || 10540 (N->getOpcode() == ISD::SIGN_EXTEND && 10541 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 10542 (OpBits-(PromBits-1)))) { 10543 ReallyNeedsExt = true; 10544 break; 10545 } 10546 } 10547 } 10548 10549 // Replace all inputs, either with the truncation operand, or a 10550 // truncation or extension to the final output type. 10551 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10552 // Constant inputs need to be replaced with the to-be-promoted nodes that 10553 // use them because they might have users outside of the cluster of 10554 // promoted nodes. 10555 if (isa<ConstantSDNode>(Inputs[i])) 10556 continue; 10557 10558 SDValue InSrc = Inputs[i].getOperand(0); 10559 if (Inputs[i].getValueType() == N->getValueType(0)) 10560 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 10561 else if (N->getOpcode() == ISD::SIGN_EXTEND) 10562 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10563 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 10564 else if (N->getOpcode() == ISD::ZERO_EXTEND) 10565 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10566 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 10567 else 10568 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10569 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 10570 } 10571 10572 std::list<HandleSDNode> PromOpHandles; 10573 for (auto &PromOp : PromOps) 10574 PromOpHandles.emplace_back(PromOp); 10575 10576 // Replace all operations (these are all the same, but have a different 10577 // (promoted) return type). DAG.getNode will validate that the types of 10578 // a binary operator match, so go through the list in reverse so that 10579 // we've likely promoted both operands first. 10580 while (!PromOpHandles.empty()) { 10581 SDValue PromOp = PromOpHandles.back().getValue(); 10582 PromOpHandles.pop_back(); 10583 10584 unsigned C; 10585 switch (PromOp.getOpcode()) { 10586 default: C = 0; break; 10587 case ISD::SELECT: C = 1; break; 10588 case ISD::SELECT_CC: C = 2; break; 10589 } 10590 10591 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 10592 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 10593 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 10594 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 10595 // The to-be-promoted operands of this node have not yet been 10596 // promoted (this should be rare because we're going through the 10597 // list backward, but if one of the operands has several users in 10598 // this cluster of to-be-promoted nodes, it is possible). 10599 PromOpHandles.emplace_front(PromOp); 10600 continue; 10601 } 10602 10603 // For SELECT and SELECT_CC nodes, we do a similar check for any 10604 // to-be-promoted comparison inputs. 10605 if (PromOp.getOpcode() == ISD::SELECT || 10606 PromOp.getOpcode() == ISD::SELECT_CC) { 10607 if ((SelectTruncOp[0].count(PromOp.getNode()) && 10608 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || 10609 (SelectTruncOp[1].count(PromOp.getNode()) && 10610 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { 10611 PromOpHandles.emplace_front(PromOp); 10612 continue; 10613 } 10614 } 10615 10616 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 10617 PromOp.getNode()->op_end()); 10618 10619 // If this node has constant inputs, then they'll need to be promoted here. 10620 for (unsigned i = 0; i < 2; ++i) { 10621 if (!isa<ConstantSDNode>(Ops[C+i])) 10622 continue; 10623 if (Ops[C+i].getValueType() == N->getValueType(0)) 10624 continue; 10625 10626 if (N->getOpcode() == ISD::SIGN_EXTEND) 10627 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10628 else if (N->getOpcode() == ISD::ZERO_EXTEND) 10629 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10630 else 10631 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10632 } 10633 10634 // If we've promoted the comparison inputs of a SELECT or SELECT_CC, 10635 // truncate them again to the original value type. 10636 if (PromOp.getOpcode() == ISD::SELECT || 10637 PromOp.getOpcode() == ISD::SELECT_CC) { 10638 auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); 10639 if (SI0 != SelectTruncOp[0].end()) 10640 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); 10641 auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); 10642 if (SI1 != SelectTruncOp[1].end()) 10643 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); 10644 } 10645 10646 DAG.ReplaceAllUsesOfValueWith(PromOp, 10647 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 10648 } 10649 10650 // Now we're left with the initial extension itself. 10651 if (!ReallyNeedsExt) 10652 return N->getOperand(0); 10653 10654 // To zero extend, just mask off everything except for the first bit (in the 10655 // i1 case). 10656 if (N->getOpcode() == ISD::ZERO_EXTEND) 10657 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 10658 DAG.getConstant(APInt::getLowBitsSet( 10659 N->getValueSizeInBits(0), PromBits), 10660 dl, N->getValueType(0))); 10661 10662 assert(N->getOpcode() == ISD::SIGN_EXTEND && 10663 "Invalid extension type"); 10664 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); 10665 SDValue ShiftCst = 10666 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); 10667 return DAG.getNode( 10668 ISD::SRA, dl, N->getValueType(0), 10669 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), 10670 ShiftCst); 10671 } 10672 10673 /// \brief Reduces the number of fp-to-int conversion when building a vector. 10674 /// 10675 /// If this vector is built out of floating to integer conversions, 10676 /// transform it to a vector built out of floating point values followed by a 10677 /// single floating to integer conversion of the vector. 10678 /// Namely (build_vector (fptosi $A), (fptosi $B), ...) 10679 /// becomes (fptosi (build_vector ($A, $B, ...))) 10680 SDValue PPCTargetLowering:: 10681 combineElementTruncationToVectorTruncation(SDNode *N, 10682 DAGCombinerInfo &DCI) const { 10683 assert(N->getOpcode() == ISD::BUILD_VECTOR && 10684 "Should be called with a BUILD_VECTOR node"); 10685 10686 SelectionDAG &DAG = DCI.DAG; 10687 SDLoc dl(N); 10688 10689 SDValue FirstInput = N->getOperand(0); 10690 assert(FirstInput.getOpcode() == PPCISD::MFVSR && 10691 "The input operand must be an fp-to-int conversion."); 10692 10693 // This combine happens after legalization so the fp_to_[su]i nodes are 10694 // already converted to PPCSISD nodes. 10695 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode(); 10696 if (FirstConversion == PPCISD::FCTIDZ || 10697 FirstConversion == PPCISD::FCTIDUZ || 10698 FirstConversion == PPCISD::FCTIWZ || 10699 FirstConversion == PPCISD::FCTIWUZ) { 10700 bool IsSplat = true; 10701 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ || 10702 FirstConversion == PPCISD::FCTIWUZ; 10703 EVT SrcVT = FirstInput.getOperand(0).getValueType(); 10704 SmallVector<SDValue, 4> Ops; 10705 EVT TargetVT = N->getValueType(0); 10706 for (int i = 0, e = N->getNumOperands(); i < e; ++i) { 10707 if (N->getOperand(i).getOpcode() != PPCISD::MFVSR) 10708 return SDValue(); 10709 unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode(); 10710 if (NextConversion != FirstConversion) 10711 return SDValue(); 10712 if (N->getOperand(i) != FirstInput) 10713 IsSplat = false; 10714 } 10715 10716 // If this is a splat, we leave it as-is since there will be only a single 10717 // fp-to-int conversion followed by a splat of the integer. This is better 10718 // for 32-bit and smaller ints and neutral for 64-bit ints. 10719 if (IsSplat) 10720 return SDValue(); 10721 10722 // Now that we know we have the right type of node, get its operands 10723 for (int i = 0, e = N->getNumOperands(); i < e; ++i) { 10724 SDValue In = N->getOperand(i).getOperand(0); 10725 // For 32-bit values, we need to add an FP_ROUND node. 10726 if (Is32Bit) { 10727 if (In.isUndef()) 10728 Ops.push_back(DAG.getUNDEF(SrcVT)); 10729 else { 10730 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl, 10731 MVT::f32, In.getOperand(0), 10732 DAG.getIntPtrConstant(1, dl)); 10733 Ops.push_back(Trunc); 10734 } 10735 } else 10736 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0)); 10737 } 10738 10739 unsigned Opcode; 10740 if (FirstConversion == PPCISD::FCTIDZ || 10741 FirstConversion == PPCISD::FCTIWZ) 10742 Opcode = ISD::FP_TO_SINT; 10743 else 10744 Opcode = ISD::FP_TO_UINT; 10745 10746 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32; 10747 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops); 10748 return DAG.getNode(Opcode, dl, TargetVT, BV); 10749 } 10750 return SDValue(); 10751 } 10752 10753 /// \brief Reduce the number of loads when building a vector. 10754 /// 10755 /// Building a vector out of multiple loads can be converted to a load 10756 /// of the vector type if the loads are consecutive. If the loads are 10757 /// consecutive but in descending order, a shuffle is added at the end 10758 /// to reorder the vector. 10759 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { 10760 assert(N->getOpcode() == ISD::BUILD_VECTOR && 10761 "Should be called with a BUILD_VECTOR node"); 10762 10763 SDLoc dl(N); 10764 bool InputsAreConsecutiveLoads = true; 10765 bool InputsAreReverseConsecutive = true; 10766 unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8; 10767 SDValue FirstInput = N->getOperand(0); 10768 bool IsRoundOfExtLoad = false; 10769 10770 if (FirstInput.getOpcode() == ISD::FP_ROUND && 10771 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) { 10772 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0)); 10773 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD; 10774 } 10775 // Not a build vector of (possibly fp_rounded) loads. 10776 if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) 10777 return SDValue(); 10778 10779 for (int i = 1, e = N->getNumOperands(); i < e; ++i) { 10780 // If any inputs are fp_round(extload), they all must be. 10781 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND) 10782 return SDValue(); 10783 10784 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) : 10785 N->getOperand(i); 10786 if (NextInput.getOpcode() != ISD::LOAD) 10787 return SDValue(); 10788 10789 SDValue PreviousInput = 10790 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1); 10791 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput); 10792 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput); 10793 10794 // If any inputs are fp_round(extload), they all must be. 10795 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD) 10796 return SDValue(); 10797 10798 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG)) 10799 InputsAreConsecutiveLoads = false; 10800 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG)) 10801 InputsAreReverseConsecutive = false; 10802 10803 // Exit early if the loads are neither consecutive nor reverse consecutive. 10804 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive) 10805 return SDValue(); 10806 } 10807 10808 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && 10809 "The loads cannot be both consecutive and reverse consecutive."); 10810 10811 SDValue FirstLoadOp = 10812 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput; 10813 SDValue LastLoadOp = 10814 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) : 10815 N->getOperand(N->getNumOperands()-1); 10816 10817 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp); 10818 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp); 10819 if (InputsAreConsecutiveLoads) { 10820 assert(LD1 && "Input needs to be a LoadSDNode."); 10821 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(), 10822 LD1->getBasePtr(), LD1->getPointerInfo(), 10823 LD1->getAlignment()); 10824 } 10825 if (InputsAreReverseConsecutive) { 10826 assert(LDL && "Input needs to be a LoadSDNode."); 10827 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), 10828 LDL->getBasePtr(), LDL->getPointerInfo(), 10829 LDL->getAlignment()); 10830 SmallVector<int, 16> Ops; 10831 for (int i = N->getNumOperands() - 1; i >= 0; i--) 10832 Ops.push_back(i); 10833 10834 return DAG.getVectorShuffle(N->getValueType(0), dl, Load, 10835 DAG.getUNDEF(N->getValueType(0)), Ops); 10836 } 10837 return SDValue(); 10838 } 10839 10840 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, 10841 DAGCombinerInfo &DCI) const { 10842 assert(N->getOpcode() == ISD::BUILD_VECTOR && 10843 "Should be called with a BUILD_VECTOR node"); 10844 10845 SelectionDAG &DAG = DCI.DAG; 10846 SDLoc dl(N); 10847 10848 if (!Subtarget.hasVSX()) 10849 return SDValue(); 10850 10851 // The target independent DAG combiner will leave a build_vector of 10852 // float-to-int conversions intact. We can generate MUCH better code for 10853 // a float-to-int conversion of a vector of floats. 10854 SDValue FirstInput = N->getOperand(0); 10855 if (FirstInput.getOpcode() == PPCISD::MFVSR) { 10856 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI); 10857 if (Reduced) 10858 return Reduced; 10859 } 10860 10861 // If we're building a vector out of consecutive loads, just load that 10862 // vector type. 10863 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG); 10864 if (Reduced) 10865 return Reduced; 10866 10867 if (N->getValueType(0) != MVT::v2f64) 10868 return SDValue(); 10869 10870 // Looking for: 10871 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) 10872 if (FirstInput.getOpcode() != ISD::SINT_TO_FP && 10873 FirstInput.getOpcode() != ISD::UINT_TO_FP) 10874 return SDValue(); 10875 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && 10876 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) 10877 return SDValue(); 10878 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode()) 10879 return SDValue(); 10880 10881 SDValue Ext1 = FirstInput.getOperand(0); 10882 SDValue Ext2 = N->getOperand(1).getOperand(0); 10883 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 10884 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 10885 return SDValue(); 10886 10887 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1)); 10888 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1)); 10889 if (!Ext1Op || !Ext2Op) 10890 return SDValue(); 10891 if (Ext1.getValueType() != MVT::i32 || 10892 Ext2.getValueType() != MVT::i32) 10893 if (Ext1.getOperand(0) != Ext2.getOperand(0)) 10894 return SDValue(); 10895 10896 int FirstElem = Ext1Op->getZExtValue(); 10897 int SecondElem = Ext2Op->getZExtValue(); 10898 int SubvecIdx; 10899 if (FirstElem == 0 && SecondElem == 1) 10900 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; 10901 else if (FirstElem == 2 && SecondElem == 3) 10902 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; 10903 else 10904 return SDValue(); 10905 10906 SDValue SrcVec = Ext1.getOperand(0); 10907 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? 10908 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; 10909 return DAG.getNode(NodeType, dl, MVT::v2f64, 10910 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); 10911 } 10912 10913 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, 10914 DAGCombinerInfo &DCI) const { 10915 assert((N->getOpcode() == ISD::SINT_TO_FP || 10916 N->getOpcode() == ISD::UINT_TO_FP) && 10917 "Need an int -> FP conversion node here"); 10918 10919 if (useSoftFloat() || !Subtarget.has64BitSupport()) 10920 return SDValue(); 10921 10922 SelectionDAG &DAG = DCI.DAG; 10923 SDLoc dl(N); 10924 SDValue Op(N, 0); 10925 10926 SDValue FirstOperand(Op.getOperand(0)); 10927 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD && 10928 (FirstOperand.getValueType() == MVT::i8 || 10929 FirstOperand.getValueType() == MVT::i16); 10930 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) { 10931 bool Signed = N->getOpcode() == ISD::SINT_TO_FP; 10932 bool DstDouble = Op.getValueType() == MVT::f64; 10933 unsigned ConvOp = Signed ? 10934 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) : 10935 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS); 10936 SDValue WidthConst = 10937 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2, 10938 dl, false); 10939 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode()); 10940 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst }; 10941 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl, 10942 DAG.getVTList(MVT::f64, MVT::Other), 10943 Ops, MVT::i8, LDN->getMemOperand()); 10944 10945 // For signed conversion, we need to sign-extend the value in the VSR 10946 if (Signed) { 10947 SDValue ExtOps[] = { Ld, WidthConst }; 10948 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps); 10949 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext); 10950 } else 10951 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld); 10952 } 10953 10954 // Don't handle ppc_fp128 here or i1 conversions. 10955 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 10956 return SDValue(); 10957 if (Op.getOperand(0).getValueType() == MVT::i1) 10958 return SDValue(); 10959 10960 // For i32 intermediate values, unfortunately, the conversion functions 10961 // leave the upper 32 bits of the value are undefined. Within the set of 10962 // scalar instructions, we have no method for zero- or sign-extending the 10963 // value. Thus, we cannot handle i32 intermediate values here. 10964 if (Op.getOperand(0).getValueType() == MVT::i32) 10965 return SDValue(); 10966 10967 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 10968 "UINT_TO_FP is supported only with FPCVT"); 10969 10970 // If we have FCFIDS, then use it when converting to single-precision. 10971 // Otherwise, convert to double-precision and then round. 10972 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 10973 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 10974 : PPCISD::FCFIDS) 10975 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 10976 : PPCISD::FCFID); 10977 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 10978 ? MVT::f32 10979 : MVT::f64; 10980 10981 // If we're converting from a float, to an int, and back to a float again, 10982 // then we don't need the store/load pair at all. 10983 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && 10984 Subtarget.hasFPCVT()) || 10985 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { 10986 SDValue Src = Op.getOperand(0).getOperand(0); 10987 if (Src.getValueType() == MVT::f32) { 10988 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 10989 DCI.AddToWorklist(Src.getNode()); 10990 } else if (Src.getValueType() != MVT::f64) { 10991 // Make sure that we don't pick up a ppc_fp128 source value. 10992 return SDValue(); 10993 } 10994 10995 unsigned FCTOp = 10996 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 10997 PPCISD::FCTIDUZ; 10998 10999 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); 11000 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); 11001 11002 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { 11003 FP = DAG.getNode(ISD::FP_ROUND, dl, 11004 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 11005 DCI.AddToWorklist(FP.getNode()); 11006 } 11007 11008 return FP; 11009 } 11010 11011 return SDValue(); 11012 } 11013 11014 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for 11015 // builtins) into loads with swaps. 11016 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, 11017 DAGCombinerInfo &DCI) const { 11018 SelectionDAG &DAG = DCI.DAG; 11019 SDLoc dl(N); 11020 SDValue Chain; 11021 SDValue Base; 11022 MachineMemOperand *MMO; 11023 11024 switch (N->getOpcode()) { 11025 default: 11026 llvm_unreachable("Unexpected opcode for little endian VSX load"); 11027 case ISD::LOAD: { 11028 LoadSDNode *LD = cast<LoadSDNode>(N); 11029 Chain = LD->getChain(); 11030 Base = LD->getBasePtr(); 11031 MMO = LD->getMemOperand(); 11032 // If the MMO suggests this isn't a load of a full vector, leave 11033 // things alone. For a built-in, we have to make the change for 11034 // correctness, so if there is a size problem that will be a bug. 11035 if (MMO->getSize() < 16) 11036 return SDValue(); 11037 break; 11038 } 11039 case ISD::INTRINSIC_W_CHAIN: { 11040 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 11041 Chain = Intrin->getChain(); 11042 // Similarly to the store case below, Intrin->getBasePtr() doesn't get 11043 // us what we want. Get operand 2 instead. 11044 Base = Intrin->getOperand(2); 11045 MMO = Intrin->getMemOperand(); 11046 break; 11047 } 11048 } 11049 11050 MVT VecTy = N->getValueType(0).getSimpleVT(); 11051 SDValue LoadOps[] = { Chain, Base }; 11052 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, 11053 DAG.getVTList(MVT::v2f64, MVT::Other), 11054 LoadOps, MVT::v2f64, MMO); 11055 11056 DCI.AddToWorklist(Load.getNode()); 11057 Chain = Load.getValue(1); 11058 SDValue Swap = DAG.getNode( 11059 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); 11060 DCI.AddToWorklist(Swap.getNode()); 11061 11062 // Add a bitcast if the resulting load type doesn't match v2f64. 11063 if (VecTy != MVT::v2f64) { 11064 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); 11065 DCI.AddToWorklist(N.getNode()); 11066 // Package {bitcast value, swap's chain} to match Load's shape. 11067 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), 11068 N, Swap.getValue(1)); 11069 } 11070 11071 return Swap; 11072 } 11073 11074 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for 11075 // builtins) into stores with swaps. 11076 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, 11077 DAGCombinerInfo &DCI) const { 11078 SelectionDAG &DAG = DCI.DAG; 11079 SDLoc dl(N); 11080 SDValue Chain; 11081 SDValue Base; 11082 unsigned SrcOpnd; 11083 MachineMemOperand *MMO; 11084 11085 switch (N->getOpcode()) { 11086 default: 11087 llvm_unreachable("Unexpected opcode for little endian VSX store"); 11088 case ISD::STORE: { 11089 StoreSDNode *ST = cast<StoreSDNode>(N); 11090 Chain = ST->getChain(); 11091 Base = ST->getBasePtr(); 11092 MMO = ST->getMemOperand(); 11093 SrcOpnd = 1; 11094 // If the MMO suggests this isn't a store of a full vector, leave 11095 // things alone. For a built-in, we have to make the change for 11096 // correctness, so if there is a size problem that will be a bug. 11097 if (MMO->getSize() < 16) 11098 return SDValue(); 11099 break; 11100 } 11101 case ISD::INTRINSIC_VOID: { 11102 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 11103 Chain = Intrin->getChain(); 11104 // Intrin->getBasePtr() oddly does not get what we want. 11105 Base = Intrin->getOperand(3); 11106 MMO = Intrin->getMemOperand(); 11107 SrcOpnd = 2; 11108 break; 11109 } 11110 } 11111 11112 SDValue Src = N->getOperand(SrcOpnd); 11113 MVT VecTy = Src.getValueType().getSimpleVT(); 11114 11115 // All stores are done as v2f64 and possible bit cast. 11116 if (VecTy != MVT::v2f64) { 11117 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); 11118 DCI.AddToWorklist(Src.getNode()); 11119 } 11120 11121 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 11122 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); 11123 DCI.AddToWorklist(Swap.getNode()); 11124 Chain = Swap.getValue(1); 11125 SDValue StoreOps[] = { Chain, Swap, Base }; 11126 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, 11127 DAG.getVTList(MVT::Other), 11128 StoreOps, VecTy, MMO); 11129 DCI.AddToWorklist(Store.getNode()); 11130 return Store; 11131 } 11132 11133 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 11134 DAGCombinerInfo &DCI) const { 11135 SelectionDAG &DAG = DCI.DAG; 11136 SDLoc dl(N); 11137 switch (N->getOpcode()) { 11138 default: break; 11139 case PPCISD::SHL: 11140 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. 11141 return N->getOperand(0); 11142 break; 11143 case PPCISD::SRL: 11144 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. 11145 return N->getOperand(0); 11146 break; 11147 case PPCISD::SRA: 11148 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 11149 if (C->isNullValue() || // 0 >>s V -> 0. 11150 C->isAllOnesValue()) // -1 >>s V -> -1. 11151 return N->getOperand(0); 11152 } 11153 break; 11154 case ISD::SIGN_EXTEND: 11155 case ISD::ZERO_EXTEND: 11156 case ISD::ANY_EXTEND: 11157 return DAGCombineExtBoolTrunc(N, DCI); 11158 case ISD::TRUNCATE: 11159 case ISD::SETCC: 11160 case ISD::SELECT_CC: 11161 return DAGCombineTruncBoolExt(N, DCI); 11162 case ISD::SINT_TO_FP: 11163 case ISD::UINT_TO_FP: 11164 return combineFPToIntToFP(N, DCI); 11165 case ISD::STORE: { 11166 EVT Op1VT = N->getOperand(1).getValueType(); 11167 bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) || 11168 (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16)); 11169 11170 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 11171 if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() && 11172 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 11173 ValidTypeForStoreFltAsInt && 11174 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 11175 SDValue Val = N->getOperand(1).getOperand(0); 11176 if (Val.getValueType() == MVT::f32) { 11177 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 11178 DCI.AddToWorklist(Val.getNode()); 11179 } 11180 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 11181 DCI.AddToWorklist(Val.getNode()); 11182 11183 if (Op1VT == MVT::i32) { 11184 SDValue Ops[] = { 11185 N->getOperand(0), Val, N->getOperand(2), 11186 DAG.getValueType(N->getOperand(1).getValueType()) 11187 }; 11188 11189 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 11190 DAG.getVTList(MVT::Other), Ops, 11191 cast<StoreSDNode>(N)->getMemoryVT(), 11192 cast<StoreSDNode>(N)->getMemOperand()); 11193 } else { 11194 unsigned WidthInBytes = 11195 N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2; 11196 SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false); 11197 11198 SDValue Ops[] = { 11199 N->getOperand(0), Val, N->getOperand(2), WidthConst, 11200 DAG.getValueType(N->getOperand(1).getValueType()) 11201 }; 11202 Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl, 11203 DAG.getVTList(MVT::Other), Ops, 11204 cast<StoreSDNode>(N)->getMemoryVT(), 11205 cast<StoreSDNode>(N)->getMemOperand()); 11206 } 11207 11208 DCI.AddToWorklist(Val.getNode()); 11209 return Val; 11210 } 11211 11212 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 11213 if (cast<StoreSDNode>(N)->isUnindexed() && 11214 N->getOperand(1).getOpcode() == ISD::BSWAP && 11215 N->getOperand(1).getNode()->hasOneUse() && 11216 (N->getOperand(1).getValueType() == MVT::i32 || 11217 N->getOperand(1).getValueType() == MVT::i16 || 11218 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 11219 N->getOperand(1).getValueType() == MVT::i64))) { 11220 SDValue BSwapOp = N->getOperand(1).getOperand(0); 11221 // Do an any-extend to 32-bits if this is a half-word input. 11222 if (BSwapOp.getValueType() == MVT::i16) 11223 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 11224 11225 SDValue Ops[] = { 11226 N->getOperand(0), BSwapOp, N->getOperand(2), 11227 DAG.getValueType(N->getOperand(1).getValueType()) 11228 }; 11229 return 11230 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 11231 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 11232 cast<StoreSDNode>(N)->getMemOperand()); 11233 } 11234 11235 // For little endian, VSX stores require generating xxswapd/lxvd2x. 11236 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. 11237 EVT VT = N->getOperand(1).getValueType(); 11238 if (VT.isSimple()) { 11239 MVT StoreVT = VT.getSimpleVT(); 11240 if (Subtarget.needsSwapsForVSXMemOps() && 11241 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || 11242 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) 11243 return expandVSXStoreForLE(N, DCI); 11244 } 11245 break; 11246 } 11247 case ISD::LOAD: { 11248 LoadSDNode *LD = cast<LoadSDNode>(N); 11249 EVT VT = LD->getValueType(0); 11250 11251 // For little endian, VSX loads require generating lxvd2x/xxswapd. 11252 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. 11253 if (VT.isSimple()) { 11254 MVT LoadVT = VT.getSimpleVT(); 11255 if (Subtarget.needsSwapsForVSXMemOps() && 11256 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || 11257 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) 11258 return expandVSXLoadForLE(N, DCI); 11259 } 11260 11261 // We sometimes end up with a 64-bit integer load, from which we extract 11262 // two single-precision floating-point numbers. This happens with 11263 // std::complex<float>, and other similar structures, because of the way we 11264 // canonicalize structure copies. However, if we lack direct moves, 11265 // then the final bitcasts from the extracted integer values to the 11266 // floating-point numbers turn into store/load pairs. Even with direct moves, 11267 // just loading the two floating-point numbers is likely better. 11268 auto ReplaceTwoFloatLoad = [&]() { 11269 if (VT != MVT::i64) 11270 return false; 11271 11272 if (LD->getExtensionType() != ISD::NON_EXTLOAD || 11273 LD->isVolatile()) 11274 return false; 11275 11276 // We're looking for a sequence like this: 11277 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 11278 // t16: i64 = srl t13, Constant:i32<32> 11279 // t17: i32 = truncate t16 11280 // t18: f32 = bitcast t17 11281 // t19: i32 = truncate t13 11282 // t20: f32 = bitcast t19 11283 11284 if (!LD->hasNUsesOfValue(2, 0)) 11285 return false; 11286 11287 auto UI = LD->use_begin(); 11288 while (UI.getUse().getResNo() != 0) ++UI; 11289 SDNode *Trunc = *UI++; 11290 while (UI.getUse().getResNo() != 0) ++UI; 11291 SDNode *RightShift = *UI; 11292 if (Trunc->getOpcode() != ISD::TRUNCATE) 11293 std::swap(Trunc, RightShift); 11294 11295 if (Trunc->getOpcode() != ISD::TRUNCATE || 11296 Trunc->getValueType(0) != MVT::i32 || 11297 !Trunc->hasOneUse()) 11298 return false; 11299 if (RightShift->getOpcode() != ISD::SRL || 11300 !isa<ConstantSDNode>(RightShift->getOperand(1)) || 11301 RightShift->getConstantOperandVal(1) != 32 || 11302 !RightShift->hasOneUse()) 11303 return false; 11304 11305 SDNode *Trunc2 = *RightShift->use_begin(); 11306 if (Trunc2->getOpcode() != ISD::TRUNCATE || 11307 Trunc2->getValueType(0) != MVT::i32 || 11308 !Trunc2->hasOneUse()) 11309 return false; 11310 11311 SDNode *Bitcast = *Trunc->use_begin(); 11312 SDNode *Bitcast2 = *Trunc2->use_begin(); 11313 11314 if (Bitcast->getOpcode() != ISD::BITCAST || 11315 Bitcast->getValueType(0) != MVT::f32) 11316 return false; 11317 if (Bitcast2->getOpcode() != ISD::BITCAST || 11318 Bitcast2->getValueType(0) != MVT::f32) 11319 return false; 11320 11321 if (Subtarget.isLittleEndian()) 11322 std::swap(Bitcast, Bitcast2); 11323 11324 // Bitcast has the second float (in memory-layout order) and Bitcast2 11325 // has the first one. 11326 11327 SDValue BasePtr = LD->getBasePtr(); 11328 if (LD->isIndexed()) { 11329 assert(LD->getAddressingMode() == ISD::PRE_INC && 11330 "Non-pre-inc AM on PPC?"); 11331 BasePtr = 11332 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 11333 LD->getOffset()); 11334 } 11335 11336 auto MMOFlags = 11337 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; 11338 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, 11339 LD->getPointerInfo(), LD->getAlignment(), 11340 MMOFlags, LD->getAAInfo()); 11341 SDValue AddPtr = 11342 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), 11343 BasePtr, DAG.getIntPtrConstant(4, dl)); 11344 SDValue FloatLoad2 = DAG.getLoad( 11345 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, 11346 LD->getPointerInfo().getWithOffset(4), 11347 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); 11348 11349 if (LD->isIndexed()) { 11350 // Note that DAGCombine should re-form any pre-increment load(s) from 11351 // what is produced here if that makes sense. 11352 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); 11353 } 11354 11355 DCI.CombineTo(Bitcast2, FloatLoad); 11356 DCI.CombineTo(Bitcast, FloatLoad2); 11357 11358 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), 11359 SDValue(FloatLoad2.getNode(), 1)); 11360 return true; 11361 }; 11362 11363 if (ReplaceTwoFloatLoad()) 11364 return SDValue(N, 0); 11365 11366 EVT MemVT = LD->getMemoryVT(); 11367 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); 11368 unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); 11369 Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); 11370 unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); 11371 if (LD->isUnindexed() && VT.isVector() && 11372 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && 11373 // P8 and later hardware should just use LOAD. 11374 !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || 11375 VT == MVT::v4i32 || VT == MVT::v4f32)) || 11376 (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && 11377 LD->getAlignment() >= ScalarABIAlignment)) && 11378 LD->getAlignment() < ABIAlignment) { 11379 // This is a type-legal unaligned Altivec or QPX load. 11380 SDValue Chain = LD->getChain(); 11381 SDValue Ptr = LD->getBasePtr(); 11382 bool isLittleEndian = Subtarget.isLittleEndian(); 11383 11384 // This implements the loading of unaligned vectors as described in 11385 // the venerable Apple Velocity Engine overview. Specifically: 11386 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 11387 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 11388 // 11389 // The general idea is to expand a sequence of one or more unaligned 11390 // loads into an alignment-based permutation-control instruction (lvsl 11391 // or lvsr), a series of regular vector loads (which always truncate 11392 // their input address to an aligned address), and a series of 11393 // permutations. The results of these permutations are the requested 11394 // loaded values. The trick is that the last "extra" load is not taken 11395 // from the address you might suspect (sizeof(vector) bytes after the 11396 // last requested load), but rather sizeof(vector) - 1 bytes after the 11397 // last requested vector. The point of this is to avoid a page fault if 11398 // the base address happened to be aligned. This works because if the 11399 // base address is aligned, then adding less than a full vector length 11400 // will cause the last vector in the sequence to be (re)loaded. 11401 // Otherwise, the next vector will be fetched as you might suspect was 11402 // necessary. 11403 11404 // We might be able to reuse the permutation generation from 11405 // a different base address offset from this one by an aligned amount. 11406 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 11407 // optimization later. 11408 Intrinsic::ID Intr, IntrLD, IntrPerm; 11409 MVT PermCntlTy, PermTy, LDTy; 11410 if (Subtarget.hasAltivec()) { 11411 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : 11412 Intrinsic::ppc_altivec_lvsl; 11413 IntrLD = Intrinsic::ppc_altivec_lvx; 11414 IntrPerm = Intrinsic::ppc_altivec_vperm; 11415 PermCntlTy = MVT::v16i8; 11416 PermTy = MVT::v4i32; 11417 LDTy = MVT::v4i32; 11418 } else { 11419 Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : 11420 Intrinsic::ppc_qpx_qvlpcls; 11421 IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : 11422 Intrinsic::ppc_qpx_qvlfs; 11423 IntrPerm = Intrinsic::ppc_qpx_qvfperm; 11424 PermCntlTy = MVT::v4f64; 11425 PermTy = MVT::v4f64; 11426 LDTy = MemVT.getSimpleVT(); 11427 } 11428 11429 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); 11430 11431 // Create the new MMO for the new base load. It is like the original MMO, 11432 // but represents an area in memory almost twice the vector size centered 11433 // on the original address. If the address is unaligned, we might start 11434 // reading up to (sizeof(vector)-1) bytes below the address of the 11435 // original unaligned load. 11436 MachineFunction &MF = DAG.getMachineFunction(); 11437 MachineMemOperand *BaseMMO = 11438 MF.getMachineMemOperand(LD->getMemOperand(), 11439 -(long)MemVT.getStoreSize()+1, 11440 2*MemVT.getStoreSize()-1); 11441 11442 // Create the new base load. 11443 SDValue LDXIntID = 11444 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); 11445 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; 11446 SDValue BaseLoad = 11447 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 11448 DAG.getVTList(PermTy, MVT::Other), 11449 BaseLoadOps, LDTy, BaseMMO); 11450 11451 // Note that the value of IncOffset (which is provided to the next 11452 // load's pointer info offset value, and thus used to calculate the 11453 // alignment), and the value of IncValue (which is actually used to 11454 // increment the pointer value) are different! This is because we 11455 // require the next load to appear to be aligned, even though it 11456 // is actually offset from the base pointer by a lesser amount. 11457 int IncOffset = VT.getSizeInBits() / 8; 11458 int IncValue = IncOffset; 11459 11460 // Walk (both up and down) the chain looking for another load at the real 11461 // (aligned) offset (the alignment of the other load does not matter in 11462 // this case). If found, then do not use the offset reduction trick, as 11463 // that will prevent the loads from being later combined (as they would 11464 // otherwise be duplicates). 11465 if (!findConsecutiveLoad(LD, DAG)) 11466 --IncValue; 11467 11468 SDValue Increment = 11469 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); 11470 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 11471 11472 MachineMemOperand *ExtraMMO = 11473 MF.getMachineMemOperand(LD->getMemOperand(), 11474 1, 2*MemVT.getStoreSize()-1); 11475 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; 11476 SDValue ExtraLoad = 11477 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 11478 DAG.getVTList(PermTy, MVT::Other), 11479 ExtraLoadOps, LDTy, ExtraMMO); 11480 11481 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 11482 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 11483 11484 // Because vperm has a big-endian bias, we must reverse the order 11485 // of the input vectors and complement the permute control vector 11486 // when generating little endian code. We have already handled the 11487 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 11488 // and ExtraLoad here. 11489 SDValue Perm; 11490 if (isLittleEndian) 11491 Perm = BuildIntrinsicOp(IntrPerm, 11492 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 11493 else 11494 Perm = BuildIntrinsicOp(IntrPerm, 11495 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 11496 11497 if (VT != PermTy) 11498 Perm = Subtarget.hasAltivec() ? 11499 DAG.getNode(ISD::BITCAST, dl, VT, Perm) : 11500 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX 11501 DAG.getTargetConstant(1, dl, MVT::i64)); 11502 // second argument is 1 because this rounding 11503 // is always exact. 11504 11505 // The output of the permutation is our loaded result, the TokenFactor is 11506 // our new chain. 11507 DCI.CombineTo(N, Perm, TF); 11508 return SDValue(N, 0); 11509 } 11510 } 11511 break; 11512 case ISD::INTRINSIC_WO_CHAIN: { 11513 bool isLittleEndian = Subtarget.isLittleEndian(); 11514 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 11515 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr 11516 : Intrinsic::ppc_altivec_lvsl); 11517 if ((IID == Intr || 11518 IID == Intrinsic::ppc_qpx_qvlpcld || 11519 IID == Intrinsic::ppc_qpx_qvlpcls) && 11520 N->getOperand(1)->getOpcode() == ISD::ADD) { 11521 SDValue Add = N->getOperand(1); 11522 11523 int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? 11524 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; 11525 11526 if (DAG.MaskedValueIsZero(Add->getOperand(1), 11527 APInt::getAllOnesValue(Bits /* alignment */) 11528 .zext(Add.getScalarValueSizeInBits()))) { 11529 SDNode *BasePtr = Add->getOperand(0).getNode(); 11530 for (SDNode::use_iterator UI = BasePtr->use_begin(), 11531 UE = BasePtr->use_end(); 11532 UI != UE; ++UI) { 11533 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 11534 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) { 11535 // We've found another LVSL/LVSR, and this address is an aligned 11536 // multiple of that one. The results will be the same, so use the 11537 // one we've just found instead. 11538 11539 return SDValue(*UI, 0); 11540 } 11541 } 11542 } 11543 11544 if (isa<ConstantSDNode>(Add->getOperand(1))) { 11545 SDNode *BasePtr = Add->getOperand(0).getNode(); 11546 for (SDNode::use_iterator UI = BasePtr->use_begin(), 11547 UE = BasePtr->use_end(); UI != UE; ++UI) { 11548 if (UI->getOpcode() == ISD::ADD && 11549 isa<ConstantSDNode>(UI->getOperand(1)) && 11550 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() - 11551 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) % 11552 (1ULL << Bits) == 0) { 11553 SDNode *OtherAdd = *UI; 11554 for (SDNode::use_iterator VI = OtherAdd->use_begin(), 11555 VE = OtherAdd->use_end(); VI != VE; ++VI) { 11556 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 11557 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) { 11558 return SDValue(*VI, 0); 11559 } 11560 } 11561 } 11562 } 11563 } 11564 } 11565 } 11566 11567 break; 11568 case ISD::INTRINSIC_W_CHAIN: { 11569 // For little endian, VSX loads require generating lxvd2x/xxswapd. 11570 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. 11571 if (Subtarget.needsSwapsForVSXMemOps()) { 11572 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 11573 default: 11574 break; 11575 case Intrinsic::ppc_vsx_lxvw4x: 11576 case Intrinsic::ppc_vsx_lxvd2x: 11577 return expandVSXLoadForLE(N, DCI); 11578 } 11579 } 11580 break; 11581 } 11582 case ISD::INTRINSIC_VOID: { 11583 // For little endian, VSX stores require generating xxswapd/stxvd2x. 11584 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. 11585 if (Subtarget.needsSwapsForVSXMemOps()) { 11586 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 11587 default: 11588 break; 11589 case Intrinsic::ppc_vsx_stxvw4x: 11590 case Intrinsic::ppc_vsx_stxvd2x: 11591 return expandVSXStoreForLE(N, DCI); 11592 } 11593 } 11594 break; 11595 } 11596 case ISD::BSWAP: 11597 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 11598 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 11599 N->getOperand(0).hasOneUse() && 11600 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 11601 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 11602 N->getValueType(0) == MVT::i64))) { 11603 SDValue Load = N->getOperand(0); 11604 LoadSDNode *LD = cast<LoadSDNode>(Load); 11605 // Create the byte-swapping load. 11606 SDValue Ops[] = { 11607 LD->getChain(), // Chain 11608 LD->getBasePtr(), // Ptr 11609 DAG.getValueType(N->getValueType(0)) // VT 11610 }; 11611 SDValue BSLoad = 11612 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 11613 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 11614 MVT::i64 : MVT::i32, MVT::Other), 11615 Ops, LD->getMemoryVT(), LD->getMemOperand()); 11616 11617 // If this is an i16 load, insert the truncate. 11618 SDValue ResVal = BSLoad; 11619 if (N->getValueType(0) == MVT::i16) 11620 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 11621 11622 // First, combine the bswap away. This makes the value produced by the 11623 // load dead. 11624 DCI.CombineTo(N, ResVal); 11625 11626 // Next, combine the load away, we give it a bogus result value but a real 11627 // chain result. The result value is dead because the bswap is dead. 11628 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 11629 11630 // Return N so it doesn't get rechecked! 11631 return SDValue(N, 0); 11632 } 11633 11634 break; 11635 case PPCISD::VCMP: { 11636 // If a VCMPo node already exists with exactly the same operands as this 11637 // node, use its result instead of this node (VCMPo computes both a CR6 and 11638 // a normal output). 11639 // 11640 if (!N->getOperand(0).hasOneUse() && 11641 !N->getOperand(1).hasOneUse() && 11642 !N->getOperand(2).hasOneUse()) { 11643 11644 // Scan all of the users of the LHS, looking for VCMPo's that match. 11645 SDNode *VCMPoNode = nullptr; 11646 11647 SDNode *LHSN = N->getOperand(0).getNode(); 11648 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 11649 UI != E; ++UI) 11650 if (UI->getOpcode() == PPCISD::VCMPo && 11651 UI->getOperand(1) == N->getOperand(1) && 11652 UI->getOperand(2) == N->getOperand(2) && 11653 UI->getOperand(0) == N->getOperand(0)) { 11654 VCMPoNode = *UI; 11655 break; 11656 } 11657 11658 // If there is no VCMPo node, or if the flag value has a single use, don't 11659 // transform this. 11660 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 11661 break; 11662 11663 // Look at the (necessarily single) use of the flag value. If it has a 11664 // chain, this transformation is more complex. Note that multiple things 11665 // could use the value result, which we should ignore. 11666 SDNode *FlagUser = nullptr; 11667 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 11668 FlagUser == nullptr; ++UI) { 11669 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 11670 SDNode *User = *UI; 11671 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 11672 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 11673 FlagUser = User; 11674 break; 11675 } 11676 } 11677 } 11678 11679 // If the user is a MFOCRF instruction, we know this is safe. 11680 // Otherwise we give up for right now. 11681 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 11682 return SDValue(VCMPoNode, 0); 11683 } 11684 break; 11685 } 11686 case ISD::BRCOND: { 11687 SDValue Cond = N->getOperand(1); 11688 SDValue Target = N->getOperand(2); 11689 11690 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 11691 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 11692 Intrinsic::ppc_is_decremented_ctr_nonzero) { 11693 11694 // We now need to make the intrinsic dead (it cannot be instruction 11695 // selected). 11696 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 11697 assert(Cond.getNode()->hasOneUse() && 11698 "Counter decrement has more than one use"); 11699 11700 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 11701 N->getOperand(0), Target); 11702 } 11703 } 11704 break; 11705 case ISD::BR_CC: { 11706 // If this is a branch on an altivec predicate comparison, lower this so 11707 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 11708 // lowering is done pre-legalize, because the legalizer lowers the predicate 11709 // compare down to code that is difficult to reassemble. 11710 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 11711 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 11712 11713 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 11714 // value. If so, pass-through the AND to get to the intrinsic. 11715 if (LHS.getOpcode() == ISD::AND && 11716 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 11717 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 11718 Intrinsic::ppc_is_decremented_ctr_nonzero && 11719 isa<ConstantSDNode>(LHS.getOperand(1)) && 11720 !isNullConstant(LHS.getOperand(1))) 11721 LHS = LHS.getOperand(0); 11722 11723 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 11724 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 11725 Intrinsic::ppc_is_decremented_ctr_nonzero && 11726 isa<ConstantSDNode>(RHS)) { 11727 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 11728 "Counter decrement comparison is not EQ or NE"); 11729 11730 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 11731 bool isBDNZ = (CC == ISD::SETEQ && Val) || 11732 (CC == ISD::SETNE && !Val); 11733 11734 // We now need to make the intrinsic dead (it cannot be instruction 11735 // selected). 11736 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 11737 assert(LHS.getNode()->hasOneUse() && 11738 "Counter decrement has more than one use"); 11739 11740 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 11741 N->getOperand(0), N->getOperand(4)); 11742 } 11743 11744 int CompareOpc; 11745 bool isDot; 11746 11747 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 11748 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 11749 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { 11750 assert(isDot && "Can't compare against a vector result!"); 11751 11752 // If this is a comparison against something other than 0/1, then we know 11753 // that the condition is never/always true. 11754 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 11755 if (Val != 0 && Val != 1) { 11756 if (CC == ISD::SETEQ) // Cond never true, remove branch. 11757 return N->getOperand(0); 11758 // Always !=, turn it into an unconditional branch. 11759 return DAG.getNode(ISD::BR, dl, MVT::Other, 11760 N->getOperand(0), N->getOperand(4)); 11761 } 11762 11763 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 11764 11765 // Create the PPCISD altivec 'dot' comparison node. 11766 SDValue Ops[] = { 11767 LHS.getOperand(2), // LHS of compare 11768 LHS.getOperand(3), // RHS of compare 11769 DAG.getConstant(CompareOpc, dl, MVT::i32) 11770 }; 11771 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 11772 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 11773 11774 // Unpack the result based on how the target uses it. 11775 PPC::Predicate CompOpc; 11776 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 11777 default: // Can't happen, don't crash on invalid number though. 11778 case 0: // Branch on the value of the EQ bit of CR6. 11779 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 11780 break; 11781 case 1: // Branch on the inverted value of the EQ bit of CR6. 11782 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 11783 break; 11784 case 2: // Branch on the value of the LT bit of CR6. 11785 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 11786 break; 11787 case 3: // Branch on the inverted value of the LT bit of CR6. 11788 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 11789 break; 11790 } 11791 11792 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 11793 DAG.getConstant(CompOpc, dl, MVT::i32), 11794 DAG.getRegister(PPC::CR6, MVT::i32), 11795 N->getOperand(4), CompNode.getValue(1)); 11796 } 11797 break; 11798 } 11799 case ISD::BUILD_VECTOR: 11800 return DAGCombineBuildVector(N, DCI); 11801 } 11802 11803 return SDValue(); 11804 } 11805 11806 SDValue 11807 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 11808 SelectionDAG &DAG, 11809 std::vector<SDNode *> *Created) const { 11810 // fold (sdiv X, pow2) 11811 EVT VT = N->getValueType(0); 11812 if (VT == MVT::i64 && !Subtarget.isPPC64()) 11813 return SDValue(); 11814 if ((VT != MVT::i32 && VT != MVT::i64) || 11815 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) 11816 return SDValue(); 11817 11818 SDLoc DL(N); 11819 SDValue N0 = N->getOperand(0); 11820 11821 bool IsNegPow2 = (-Divisor).isPowerOf2(); 11822 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); 11823 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); 11824 11825 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); 11826 if (Created) 11827 Created->push_back(Op.getNode()); 11828 11829 if (IsNegPow2) { 11830 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); 11831 if (Created) 11832 Created->push_back(Op.getNode()); 11833 } 11834 11835 return Op; 11836 } 11837 11838 //===----------------------------------------------------------------------===// 11839 // Inline Assembly Support 11840 //===----------------------------------------------------------------------===// 11841 11842 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 11843 APInt &KnownZero, 11844 APInt &KnownOne, 11845 const SelectionDAG &DAG, 11846 unsigned Depth) const { 11847 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 11848 switch (Op.getOpcode()) { 11849 default: break; 11850 case PPCISD::LBRX: { 11851 // lhbrx is known to have the top bits cleared out. 11852 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 11853 KnownZero = 0xFFFF0000; 11854 break; 11855 } 11856 case ISD::INTRINSIC_WO_CHAIN: { 11857 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 11858 default: break; 11859 case Intrinsic::ppc_altivec_vcmpbfp_p: 11860 case Intrinsic::ppc_altivec_vcmpeqfp_p: 11861 case Intrinsic::ppc_altivec_vcmpequb_p: 11862 case Intrinsic::ppc_altivec_vcmpequh_p: 11863 case Intrinsic::ppc_altivec_vcmpequw_p: 11864 case Intrinsic::ppc_altivec_vcmpequd_p: 11865 case Intrinsic::ppc_altivec_vcmpgefp_p: 11866 case Intrinsic::ppc_altivec_vcmpgtfp_p: 11867 case Intrinsic::ppc_altivec_vcmpgtsb_p: 11868 case Intrinsic::ppc_altivec_vcmpgtsh_p: 11869 case Intrinsic::ppc_altivec_vcmpgtsw_p: 11870 case Intrinsic::ppc_altivec_vcmpgtsd_p: 11871 case Intrinsic::ppc_altivec_vcmpgtub_p: 11872 case Intrinsic::ppc_altivec_vcmpgtuh_p: 11873 case Intrinsic::ppc_altivec_vcmpgtuw_p: 11874 case Intrinsic::ppc_altivec_vcmpgtud_p: 11875 KnownZero = ~1U; // All bits but the low one are known to be zero. 11876 break; 11877 } 11878 } 11879 } 11880 } 11881 11882 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { 11883 switch (Subtarget.getDarwinDirective()) { 11884 default: break; 11885 case PPC::DIR_970: 11886 case PPC::DIR_PWR4: 11887 case PPC::DIR_PWR5: 11888 case PPC::DIR_PWR5X: 11889 case PPC::DIR_PWR6: 11890 case PPC::DIR_PWR6X: 11891 case PPC::DIR_PWR7: 11892 case PPC::DIR_PWR8: 11893 case PPC::DIR_PWR9: { 11894 if (!ML) 11895 break; 11896 11897 const PPCInstrInfo *TII = Subtarget.getInstrInfo(); 11898 11899 // For small loops (between 5 and 8 instructions), align to a 32-byte 11900 // boundary so that the entire loop fits in one instruction-cache line. 11901 uint64_t LoopSize = 0; 11902 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) 11903 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { 11904 LoopSize += TII->getInstSizeInBytes(*J); 11905 if (LoopSize > 32) 11906 break; 11907 } 11908 11909 if (LoopSize > 16 && LoopSize <= 32) 11910 return 5; 11911 11912 break; 11913 } 11914 } 11915 11916 return TargetLowering::getPrefLoopAlignment(ML); 11917 } 11918 11919 /// getConstraintType - Given a constraint, return the type of 11920 /// constraint it is for this target. 11921 PPCTargetLowering::ConstraintType 11922 PPCTargetLowering::getConstraintType(StringRef Constraint) const { 11923 if (Constraint.size() == 1) { 11924 switch (Constraint[0]) { 11925 default: break; 11926 case 'b': 11927 case 'r': 11928 case 'f': 11929 case 'd': 11930 case 'v': 11931 case 'y': 11932 return C_RegisterClass; 11933 case 'Z': 11934 // FIXME: While Z does indicate a memory constraint, it specifically 11935 // indicates an r+r address (used in conjunction with the 'y' modifier 11936 // in the replacement string). Currently, we're forcing the base 11937 // register to be r0 in the asm printer (which is interpreted as zero) 11938 // and forming the complete address in the second register. This is 11939 // suboptimal. 11940 return C_Memory; 11941 } 11942 } else if (Constraint == "wc") { // individual CR bits. 11943 return C_RegisterClass; 11944 } else if (Constraint == "wa" || Constraint == "wd" || 11945 Constraint == "wf" || Constraint == "ws") { 11946 return C_RegisterClass; // VSX registers. 11947 } 11948 return TargetLowering::getConstraintType(Constraint); 11949 } 11950 11951 /// Examine constraint type and operand type and determine a weight value. 11952 /// This object must already have been set up with the operand type 11953 /// and the current alternative constraint selected. 11954 TargetLowering::ConstraintWeight 11955 PPCTargetLowering::getSingleConstraintMatchWeight( 11956 AsmOperandInfo &info, const char *constraint) const { 11957 ConstraintWeight weight = CW_Invalid; 11958 Value *CallOperandVal = info.CallOperandVal; 11959 // If we don't have a value, we can't do a match, 11960 // but allow it at the lowest weight. 11961 if (!CallOperandVal) 11962 return CW_Default; 11963 Type *type = CallOperandVal->getType(); 11964 11965 // Look at the constraint type. 11966 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 11967 return CW_Register; // an individual CR bit. 11968 else if ((StringRef(constraint) == "wa" || 11969 StringRef(constraint) == "wd" || 11970 StringRef(constraint) == "wf") && 11971 type->isVectorTy()) 11972 return CW_Register; 11973 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 11974 return CW_Register; 11975 11976 switch (*constraint) { 11977 default: 11978 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 11979 break; 11980 case 'b': 11981 if (type->isIntegerTy()) 11982 weight = CW_Register; 11983 break; 11984 case 'f': 11985 if (type->isFloatTy()) 11986 weight = CW_Register; 11987 break; 11988 case 'd': 11989 if (type->isDoubleTy()) 11990 weight = CW_Register; 11991 break; 11992 case 'v': 11993 if (type->isVectorTy()) 11994 weight = CW_Register; 11995 break; 11996 case 'y': 11997 weight = CW_Register; 11998 break; 11999 case 'Z': 12000 weight = CW_Memory; 12001 break; 12002 } 12003 return weight; 12004 } 12005 12006 std::pair<unsigned, const TargetRegisterClass *> 12007 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 12008 StringRef Constraint, 12009 MVT VT) const { 12010 if (Constraint.size() == 1) { 12011 // GCC RS6000 Constraint Letters 12012 switch (Constraint[0]) { 12013 case 'b': // R1-R31 12014 if (VT == MVT::i64 && Subtarget.isPPC64()) 12015 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 12016 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 12017 case 'r': // R0-R31 12018 if (VT == MVT::i64 && Subtarget.isPPC64()) 12019 return std::make_pair(0U, &PPC::G8RCRegClass); 12020 return std::make_pair(0U, &PPC::GPRCRegClass); 12021 // 'd' and 'f' constraints are both defined to be "the floating point 12022 // registers", where one is for 32-bit and the other for 64-bit. We don't 12023 // really care overly much here so just give them all the same reg classes. 12024 case 'd': 12025 case 'f': 12026 if (VT == MVT::f32 || VT == MVT::i32) 12027 return std::make_pair(0U, &PPC::F4RCRegClass); 12028 if (VT == MVT::f64 || VT == MVT::i64) 12029 return std::make_pair(0U, &PPC::F8RCRegClass); 12030 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 12031 return std::make_pair(0U, &PPC::QFRCRegClass); 12032 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 12033 return std::make_pair(0U, &PPC::QSRCRegClass); 12034 break; 12035 case 'v': 12036 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 12037 return std::make_pair(0U, &PPC::QFRCRegClass); 12038 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 12039 return std::make_pair(0U, &PPC::QSRCRegClass); 12040 if (Subtarget.hasAltivec()) 12041 return std::make_pair(0U, &PPC::VRRCRegClass); 12042 case 'y': // crrc 12043 return std::make_pair(0U, &PPC::CRRCRegClass); 12044 } 12045 } else if (Constraint == "wc" && Subtarget.useCRBits()) { 12046 // An individual CR bit. 12047 return std::make_pair(0U, &PPC::CRBITRCRegClass); 12048 } else if ((Constraint == "wa" || Constraint == "wd" || 12049 Constraint == "wf") && Subtarget.hasVSX()) { 12050 return std::make_pair(0U, &PPC::VSRCRegClass); 12051 } else if (Constraint == "ws" && Subtarget.hasVSX()) { 12052 if (VT == MVT::f32 && Subtarget.hasP8Vector()) 12053 return std::make_pair(0U, &PPC::VSSRCRegClass); 12054 else 12055 return std::make_pair(0U, &PPC::VSFRCRegClass); 12056 } 12057 12058 std::pair<unsigned, const TargetRegisterClass *> R = 12059 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 12060 12061 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 12062 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 12063 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 12064 // register. 12065 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 12066 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 12067 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 12068 PPC::GPRCRegClass.contains(R.first)) 12069 return std::make_pair(TRI->getMatchingSuperReg(R.first, 12070 PPC::sub_32, &PPC::G8RCRegClass), 12071 &PPC::G8RCRegClass); 12072 12073 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 12074 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 12075 R.first = PPC::CR0; 12076 R.second = &PPC::CRRCRegClass; 12077 } 12078 12079 return R; 12080 } 12081 12082 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 12083 /// vector. If it is invalid, don't add anything to Ops. 12084 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 12085 std::string &Constraint, 12086 std::vector<SDValue>&Ops, 12087 SelectionDAG &DAG) const { 12088 SDValue Result; 12089 12090 // Only support length 1 constraints. 12091 if (Constraint.length() > 1) return; 12092 12093 char Letter = Constraint[0]; 12094 switch (Letter) { 12095 default: break; 12096 case 'I': 12097 case 'J': 12098 case 'K': 12099 case 'L': 12100 case 'M': 12101 case 'N': 12102 case 'O': 12103 case 'P': { 12104 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 12105 if (!CST) return; // Must be an immediate to match. 12106 SDLoc dl(Op); 12107 int64_t Value = CST->getSExtValue(); 12108 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 12109 // numbers are printed as such. 12110 switch (Letter) { 12111 default: llvm_unreachable("Unknown constraint letter!"); 12112 case 'I': // "I" is a signed 16-bit constant. 12113 if (isInt<16>(Value)) 12114 Result = DAG.getTargetConstant(Value, dl, TCVT); 12115 break; 12116 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 12117 if (isShiftedUInt<16, 16>(Value)) 12118 Result = DAG.getTargetConstant(Value, dl, TCVT); 12119 break; 12120 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 12121 if (isShiftedInt<16, 16>(Value)) 12122 Result = DAG.getTargetConstant(Value, dl, TCVT); 12123 break; 12124 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 12125 if (isUInt<16>(Value)) 12126 Result = DAG.getTargetConstant(Value, dl, TCVT); 12127 break; 12128 case 'M': // "M" is a constant that is greater than 31. 12129 if (Value > 31) 12130 Result = DAG.getTargetConstant(Value, dl, TCVT); 12131 break; 12132 case 'N': // "N" is a positive constant that is an exact power of two. 12133 if (Value > 0 && isPowerOf2_64(Value)) 12134 Result = DAG.getTargetConstant(Value, dl, TCVT); 12135 break; 12136 case 'O': // "O" is the constant zero. 12137 if (Value == 0) 12138 Result = DAG.getTargetConstant(Value, dl, TCVT); 12139 break; 12140 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 12141 if (isInt<16>(-Value)) 12142 Result = DAG.getTargetConstant(Value, dl, TCVT); 12143 break; 12144 } 12145 break; 12146 } 12147 } 12148 12149 if (Result.getNode()) { 12150 Ops.push_back(Result); 12151 return; 12152 } 12153 12154 // Handle standard constraint letters. 12155 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 12156 } 12157 12158 // isLegalAddressingMode - Return true if the addressing mode represented 12159 // by AM is legal for this target, for a load/store of the specified type. 12160 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, 12161 const AddrMode &AM, Type *Ty, 12162 unsigned AS) const { 12163 // PPC does not allow r+i addressing modes for vectors! 12164 if (Ty->isVectorTy() && AM.BaseOffs != 0) 12165 return false; 12166 12167 // PPC allows a sign-extended 16-bit immediate field. 12168 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 12169 return false; 12170 12171 // No global is ever allowed as a base. 12172 if (AM.BaseGV) 12173 return false; 12174 12175 // PPC only support r+r, 12176 switch (AM.Scale) { 12177 case 0: // "r+i" or just "i", depending on HasBaseReg. 12178 break; 12179 case 1: 12180 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 12181 return false; 12182 // Otherwise we have r+r or r+i. 12183 break; 12184 case 2: 12185 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 12186 return false; 12187 // Allow 2*r as r+r. 12188 break; 12189 default: 12190 // No other scales are supported. 12191 return false; 12192 } 12193 12194 return true; 12195 } 12196 12197 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 12198 SelectionDAG &DAG) const { 12199 MachineFunction &MF = DAG.getMachineFunction(); 12200 MachineFrameInfo &MFI = MF.getFrameInfo(); 12201 MFI.setReturnAddressIsTaken(true); 12202 12203 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 12204 return SDValue(); 12205 12206 SDLoc dl(Op); 12207 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 12208 12209 // Make sure the function does not optimize away the store of the RA to 12210 // the stack. 12211 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 12212 FuncInfo->setLRStoreRequired(); 12213 bool isPPC64 = Subtarget.isPPC64(); 12214 auto PtrVT = getPointerTy(MF.getDataLayout()); 12215 12216 if (Depth > 0) { 12217 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 12218 SDValue Offset = 12219 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, 12220 isPPC64 ? MVT::i64 : MVT::i32); 12221 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 12222 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), 12223 MachinePointerInfo()); 12224 } 12225 12226 // Just load the return address off the stack. 12227 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 12228 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, 12229 MachinePointerInfo()); 12230 } 12231 12232 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 12233 SelectionDAG &DAG) const { 12234 SDLoc dl(Op); 12235 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 12236 12237 MachineFunction &MF = DAG.getMachineFunction(); 12238 MachineFrameInfo &MFI = MF.getFrameInfo(); 12239 MFI.setFrameAddressIsTaken(true); 12240 12241 EVT PtrVT = getPointerTy(MF.getDataLayout()); 12242 bool isPPC64 = PtrVT == MVT::i64; 12243 12244 // Naked functions never have a frame pointer, and so we use r1. For all 12245 // other functions, this decision must be delayed until during PEI. 12246 unsigned FrameReg; 12247 if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) 12248 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 12249 else 12250 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 12251 12252 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 12253 PtrVT); 12254 while (Depth--) 12255 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 12256 FrameAddr, MachinePointerInfo()); 12257 return FrameAddr; 12258 } 12259 12260 // FIXME? Maybe this could be a TableGen attribute on some registers and 12261 // this table could be generated automatically from RegInfo. 12262 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, 12263 SelectionDAG &DAG) const { 12264 bool isPPC64 = Subtarget.isPPC64(); 12265 bool isDarwinABI = Subtarget.isDarwinABI(); 12266 12267 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 12268 (!isPPC64 && VT != MVT::i32)) 12269 report_fatal_error("Invalid register global variable type"); 12270 12271 bool is64Bit = isPPC64 && VT == MVT::i64; 12272 unsigned Reg = StringSwitch<unsigned>(RegName) 12273 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 12274 .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) 12275 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 12276 (is64Bit ? PPC::X13 : PPC::R13)) 12277 .Default(0); 12278 12279 if (Reg) 12280 return Reg; 12281 report_fatal_error("Invalid register name global variable"); 12282 } 12283 12284 bool 12285 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 12286 // The PowerPC target isn't yet aware of offsets. 12287 return false; 12288 } 12289 12290 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 12291 const CallInst &I, 12292 unsigned Intrinsic) const { 12293 12294 switch (Intrinsic) { 12295 case Intrinsic::ppc_qpx_qvlfd: 12296 case Intrinsic::ppc_qpx_qvlfs: 12297 case Intrinsic::ppc_qpx_qvlfcd: 12298 case Intrinsic::ppc_qpx_qvlfcs: 12299 case Intrinsic::ppc_qpx_qvlfiwa: 12300 case Intrinsic::ppc_qpx_qvlfiwz: 12301 case Intrinsic::ppc_altivec_lvx: 12302 case Intrinsic::ppc_altivec_lvxl: 12303 case Intrinsic::ppc_altivec_lvebx: 12304 case Intrinsic::ppc_altivec_lvehx: 12305 case Intrinsic::ppc_altivec_lvewx: 12306 case Intrinsic::ppc_vsx_lxvd2x: 12307 case Intrinsic::ppc_vsx_lxvw4x: { 12308 EVT VT; 12309 switch (Intrinsic) { 12310 case Intrinsic::ppc_altivec_lvebx: 12311 VT = MVT::i8; 12312 break; 12313 case Intrinsic::ppc_altivec_lvehx: 12314 VT = MVT::i16; 12315 break; 12316 case Intrinsic::ppc_altivec_lvewx: 12317 VT = MVT::i32; 12318 break; 12319 case Intrinsic::ppc_vsx_lxvd2x: 12320 VT = MVT::v2f64; 12321 break; 12322 case Intrinsic::ppc_qpx_qvlfd: 12323 VT = MVT::v4f64; 12324 break; 12325 case Intrinsic::ppc_qpx_qvlfs: 12326 VT = MVT::v4f32; 12327 break; 12328 case Intrinsic::ppc_qpx_qvlfcd: 12329 VT = MVT::v2f64; 12330 break; 12331 case Intrinsic::ppc_qpx_qvlfcs: 12332 VT = MVT::v2f32; 12333 break; 12334 default: 12335 VT = MVT::v4i32; 12336 break; 12337 } 12338 12339 Info.opc = ISD::INTRINSIC_W_CHAIN; 12340 Info.memVT = VT; 12341 Info.ptrVal = I.getArgOperand(0); 12342 Info.offset = -VT.getStoreSize()+1; 12343 Info.size = 2*VT.getStoreSize()-1; 12344 Info.align = 1; 12345 Info.vol = false; 12346 Info.readMem = true; 12347 Info.writeMem = false; 12348 return true; 12349 } 12350 case Intrinsic::ppc_qpx_qvlfda: 12351 case Intrinsic::ppc_qpx_qvlfsa: 12352 case Intrinsic::ppc_qpx_qvlfcda: 12353 case Intrinsic::ppc_qpx_qvlfcsa: 12354 case Intrinsic::ppc_qpx_qvlfiwaa: 12355 case Intrinsic::ppc_qpx_qvlfiwza: { 12356 EVT VT; 12357 switch (Intrinsic) { 12358 case Intrinsic::ppc_qpx_qvlfda: 12359 VT = MVT::v4f64; 12360 break; 12361 case Intrinsic::ppc_qpx_qvlfsa: 12362 VT = MVT::v4f32; 12363 break; 12364 case Intrinsic::ppc_qpx_qvlfcda: 12365 VT = MVT::v2f64; 12366 break; 12367 case Intrinsic::ppc_qpx_qvlfcsa: 12368 VT = MVT::v2f32; 12369 break; 12370 default: 12371 VT = MVT::v4i32; 12372 break; 12373 } 12374 12375 Info.opc = ISD::INTRINSIC_W_CHAIN; 12376 Info.memVT = VT; 12377 Info.ptrVal = I.getArgOperand(0); 12378 Info.offset = 0; 12379 Info.size = VT.getStoreSize(); 12380 Info.align = 1; 12381 Info.vol = false; 12382 Info.readMem = true; 12383 Info.writeMem = false; 12384 return true; 12385 } 12386 case Intrinsic::ppc_qpx_qvstfd: 12387 case Intrinsic::ppc_qpx_qvstfs: 12388 case Intrinsic::ppc_qpx_qvstfcd: 12389 case Intrinsic::ppc_qpx_qvstfcs: 12390 case Intrinsic::ppc_qpx_qvstfiw: 12391 case Intrinsic::ppc_altivec_stvx: 12392 case Intrinsic::ppc_altivec_stvxl: 12393 case Intrinsic::ppc_altivec_stvebx: 12394 case Intrinsic::ppc_altivec_stvehx: 12395 case Intrinsic::ppc_altivec_stvewx: 12396 case Intrinsic::ppc_vsx_stxvd2x: 12397 case Intrinsic::ppc_vsx_stxvw4x: { 12398 EVT VT; 12399 switch (Intrinsic) { 12400 case Intrinsic::ppc_altivec_stvebx: 12401 VT = MVT::i8; 12402 break; 12403 case Intrinsic::ppc_altivec_stvehx: 12404 VT = MVT::i16; 12405 break; 12406 case Intrinsic::ppc_altivec_stvewx: 12407 VT = MVT::i32; 12408 break; 12409 case Intrinsic::ppc_vsx_stxvd2x: 12410 VT = MVT::v2f64; 12411 break; 12412 case Intrinsic::ppc_qpx_qvstfd: 12413 VT = MVT::v4f64; 12414 break; 12415 case Intrinsic::ppc_qpx_qvstfs: 12416 VT = MVT::v4f32; 12417 break; 12418 case Intrinsic::ppc_qpx_qvstfcd: 12419 VT = MVT::v2f64; 12420 break; 12421 case Intrinsic::ppc_qpx_qvstfcs: 12422 VT = MVT::v2f32; 12423 break; 12424 default: 12425 VT = MVT::v4i32; 12426 break; 12427 } 12428 12429 Info.opc = ISD::INTRINSIC_VOID; 12430 Info.memVT = VT; 12431 Info.ptrVal = I.getArgOperand(1); 12432 Info.offset = -VT.getStoreSize()+1; 12433 Info.size = 2*VT.getStoreSize()-1; 12434 Info.align = 1; 12435 Info.vol = false; 12436 Info.readMem = false; 12437 Info.writeMem = true; 12438 return true; 12439 } 12440 case Intrinsic::ppc_qpx_qvstfda: 12441 case Intrinsic::ppc_qpx_qvstfsa: 12442 case Intrinsic::ppc_qpx_qvstfcda: 12443 case Intrinsic::ppc_qpx_qvstfcsa: 12444 case Intrinsic::ppc_qpx_qvstfiwa: { 12445 EVT VT; 12446 switch (Intrinsic) { 12447 case Intrinsic::ppc_qpx_qvstfda: 12448 VT = MVT::v4f64; 12449 break; 12450 case Intrinsic::ppc_qpx_qvstfsa: 12451 VT = MVT::v4f32; 12452 break; 12453 case Intrinsic::ppc_qpx_qvstfcda: 12454 VT = MVT::v2f64; 12455 break; 12456 case Intrinsic::ppc_qpx_qvstfcsa: 12457 VT = MVT::v2f32; 12458 break; 12459 default: 12460 VT = MVT::v4i32; 12461 break; 12462 } 12463 12464 Info.opc = ISD::INTRINSIC_VOID; 12465 Info.memVT = VT; 12466 Info.ptrVal = I.getArgOperand(1); 12467 Info.offset = 0; 12468 Info.size = VT.getStoreSize(); 12469 Info.align = 1; 12470 Info.vol = false; 12471 Info.readMem = false; 12472 Info.writeMem = true; 12473 return true; 12474 } 12475 default: 12476 break; 12477 } 12478 12479 return false; 12480 } 12481 12482 /// getOptimalMemOpType - Returns the target specific optimal type for load 12483 /// and store operations as a result of memset, memcpy, and memmove 12484 /// lowering. If DstAlign is zero that means it's safe to destination 12485 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 12486 /// means there isn't a need to check it against alignment requirement, 12487 /// probably because the source does not need to be loaded. If 'IsMemset' is 12488 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 12489 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 12490 /// source is constant so it does not need to be loaded. 12491 /// It returns EVT::Other if the type should be determined using generic 12492 /// target-independent logic. 12493 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 12494 unsigned DstAlign, unsigned SrcAlign, 12495 bool IsMemset, bool ZeroMemset, 12496 bool MemcpyStrSrc, 12497 MachineFunction &MF) const { 12498 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { 12499 const Function *F = MF.getFunction(); 12500 // When expanding a memset, require at least two QPX instructions to cover 12501 // the cost of loading the value to be stored from the constant pool. 12502 if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && 12503 (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && 12504 !F->hasFnAttribute(Attribute::NoImplicitFloat)) { 12505 return MVT::v4f64; 12506 } 12507 12508 // We should use Altivec/VSX loads and stores when available. For unaligned 12509 // addresses, unaligned VSX loads are only fast starting with the P8. 12510 if (Subtarget.hasAltivec() && Size >= 16 && 12511 (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || 12512 ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) 12513 return MVT::v4i32; 12514 } 12515 12516 if (Subtarget.isPPC64()) { 12517 return MVT::i64; 12518 } 12519 12520 return MVT::i32; 12521 } 12522 12523 /// \brief Returns true if it is beneficial to convert a load of a constant 12524 /// to just the constant itself. 12525 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 12526 Type *Ty) const { 12527 assert(Ty->isIntegerTy()); 12528 12529 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 12530 return !(BitSize == 0 || BitSize > 64); 12531 } 12532 12533 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 12534 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 12535 return false; 12536 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 12537 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 12538 return NumBits1 == 64 && NumBits2 == 32; 12539 } 12540 12541 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 12542 if (!VT1.isInteger() || !VT2.isInteger()) 12543 return false; 12544 unsigned NumBits1 = VT1.getSizeInBits(); 12545 unsigned NumBits2 = VT2.getSizeInBits(); 12546 return NumBits1 == 64 && NumBits2 == 32; 12547 } 12548 12549 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 12550 // Generally speaking, zexts are not free, but they are free when they can be 12551 // folded with other operations. 12552 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) { 12553 EVT MemVT = LD->getMemoryVT(); 12554 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || 12555 (Subtarget.isPPC64() && MemVT == MVT::i32)) && 12556 (LD->getExtensionType() == ISD::NON_EXTLOAD || 12557 LD->getExtensionType() == ISD::ZEXTLOAD)) 12558 return true; 12559 } 12560 12561 // FIXME: Add other cases... 12562 // - 32-bit shifts with a zext to i64 12563 // - zext after ctlz, bswap, etc. 12564 // - zext after and by a constant mask 12565 12566 return TargetLowering::isZExtFree(Val, VT2); 12567 } 12568 12569 bool PPCTargetLowering::isFPExtFree(EVT VT) const { 12570 assert(VT.isFloatingPoint()); 12571 return true; 12572 } 12573 12574 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 12575 return isInt<16>(Imm) || isUInt<16>(Imm); 12576 } 12577 12578 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 12579 return isInt<16>(Imm) || isUInt<16>(Imm); 12580 } 12581 12582 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 12583 unsigned, 12584 unsigned, 12585 bool *Fast) const { 12586 if (DisablePPCUnaligned) 12587 return false; 12588 12589 // PowerPC supports unaligned memory access for simple non-vector types. 12590 // Although accessing unaligned addresses is not as efficient as accessing 12591 // aligned addresses, it is generally more efficient than manual expansion, 12592 // and generally only traps for software emulation when crossing page 12593 // boundaries. 12594 12595 if (!VT.isSimple()) 12596 return false; 12597 12598 if (VT.getSimpleVT().isVector()) { 12599 if (Subtarget.hasVSX()) { 12600 if (VT != MVT::v2f64 && VT != MVT::v2i64 && 12601 VT != MVT::v4f32 && VT != MVT::v4i32) 12602 return false; 12603 } else { 12604 return false; 12605 } 12606 } 12607 12608 if (VT == MVT::ppcf128) 12609 return false; 12610 12611 if (Fast) 12612 *Fast = true; 12613 12614 return true; 12615 } 12616 12617 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 12618 VT = VT.getScalarType(); 12619 12620 if (!VT.isSimple()) 12621 return false; 12622 12623 switch (VT.getSimpleVT().SimpleTy) { 12624 case MVT::f32: 12625 case MVT::f64: 12626 return true; 12627 default: 12628 break; 12629 } 12630 12631 return false; 12632 } 12633 12634 const MCPhysReg * 12635 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { 12636 // LR is a callee-save register, but we must treat it as clobbered by any call 12637 // site. Hence we include LR in the scratch registers, which are in turn added 12638 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies 12639 // to CTR, which is used by any indirect call. 12640 static const MCPhysReg ScratchRegs[] = { 12641 PPC::X12, PPC::LR8, PPC::CTR8, 0 12642 }; 12643 12644 return ScratchRegs; 12645 } 12646 12647 unsigned PPCTargetLowering::getExceptionPointerRegister( 12648 const Constant *PersonalityFn) const { 12649 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; 12650 } 12651 12652 unsigned PPCTargetLowering::getExceptionSelectorRegister( 12653 const Constant *PersonalityFn) const { 12654 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; 12655 } 12656 12657 bool 12658 PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 12659 EVT VT , unsigned DefinedValues) const { 12660 if (VT == MVT::v2i64) 12661 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves 12662 12663 if (Subtarget.hasVSX() || Subtarget.hasQPX()) 12664 return true; 12665 12666 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 12667 } 12668 12669 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 12670 if (DisableILPPref || Subtarget.enableMachineScheduler()) 12671 return TargetLowering::getSchedulingPreference(N); 12672 12673 return Sched::ILP; 12674 } 12675 12676 // Create a fast isel object. 12677 FastISel * 12678 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 12679 const TargetLibraryInfo *LibInfo) const { 12680 return PPC::createFastISel(FuncInfo, LibInfo); 12681 } 12682 12683 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { 12684 if (Subtarget.isDarwinABI()) return; 12685 if (!Subtarget.isPPC64()) return; 12686 12687 // Update IsSplitCSR in PPCFunctionInfo 12688 PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>(); 12689 PFI->setIsSplitCSR(true); 12690 } 12691 12692 void PPCTargetLowering::insertCopiesSplitCSR( 12693 MachineBasicBlock *Entry, 12694 const SmallVectorImpl<MachineBasicBlock *> &Exits) const { 12695 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 12696 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); 12697 if (!IStart) 12698 return; 12699 12700 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 12701 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); 12702 MachineBasicBlock::iterator MBBI = Entry->begin(); 12703 for (const MCPhysReg *I = IStart; *I; ++I) { 12704 const TargetRegisterClass *RC = nullptr; 12705 if (PPC::G8RCRegClass.contains(*I)) 12706 RC = &PPC::G8RCRegClass; 12707 else if (PPC::F8RCRegClass.contains(*I)) 12708 RC = &PPC::F8RCRegClass; 12709 else if (PPC::CRRCRegClass.contains(*I)) 12710 RC = &PPC::CRRCRegClass; 12711 else if (PPC::VRRCRegClass.contains(*I)) 12712 RC = &PPC::VRRCRegClass; 12713 else 12714 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 12715 12716 unsigned NewVR = MRI->createVirtualRegister(RC); 12717 // Create copy from CSR to a virtual register. 12718 // FIXME: this currently does not emit CFI pseudo-instructions, it works 12719 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be 12720 // nounwind. If we want to generalize this later, we may need to emit 12721 // CFI pseudo-instructions. 12722 assert(Entry->getParent()->getFunction()->hasFnAttribute( 12723 Attribute::NoUnwind) && 12724 "Function should be nounwind in insertCopiesSplitCSR!"); 12725 Entry->addLiveIn(*I); 12726 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) 12727 .addReg(*I); 12728 12729 // Insert the copy-back instructions right before the terminator 12730 for (auto *Exit : Exits) 12731 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), 12732 TII->get(TargetOpcode::COPY), *I) 12733 .addReg(NewVR); 12734 } 12735 } 12736 12737 // Override to enable LOAD_STACK_GUARD lowering on Linux. 12738 bool PPCTargetLowering::useLoadStackGuardNode() const { 12739 if (!Subtarget.isTargetLinux()) 12740 return TargetLowering::useLoadStackGuardNode(); 12741 return true; 12742 } 12743 12744 // Override to disable global variable loading on Linux. 12745 void PPCTargetLowering::insertSSPDeclarations(Module &M) const { 12746 if (!Subtarget.isTargetLinux()) 12747 return TargetLowering::insertSSPDeclarations(M); 12748 } 12749 12750 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 12751 12752 if (!VT.isSimple() || !Subtarget.hasVSX()) 12753 return false; 12754 12755 switch(VT.getSimpleVT().SimpleTy) { 12756 default: 12757 // For FP types that are currently not supported by PPC backend, return 12758 // false. Examples: f16, f80. 12759 return false; 12760 case MVT::f32: 12761 case MVT::f64: 12762 case MVT::ppcf128: 12763 return Imm.isPosZero(); 12764 } 12765 } 12766