1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PPCISelLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCISelLowering.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPCCallingConv.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCPerfectShuffle.h" 19 #include "PPCTargetMachine.h" 20 #include "PPCTargetObjectFile.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/StringSwitch.h" 23 #include "llvm/ADT/Triple.h" 24 #include "llvm/CodeGen/CallingConvLower.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineLoopInfo.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/SelectionDAG.h" 31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 32 #include "llvm/IR/CallingConv.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/Function.h" 36 #include "llvm/IR/Intrinsics.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Target/TargetOptions.h" 42 using namespace llvm; 43 44 // FIXME: Remove this once soft-float is supported. 45 static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic", 46 cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden); 47 48 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 49 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 50 51 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 52 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 53 54 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 55 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 56 57 // FIXME: Remove this once the bug has been fixed! 58 extern cl::opt<bool> ANDIGlueBug; 59 60 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, 61 const PPCSubtarget &STI) 62 : TargetLowering(TM), Subtarget(STI) { 63 // Use _setjmp/_longjmp instead of setjmp/longjmp. 64 setUseUnderscoreSetJmp(true); 65 setUseUnderscoreLongJmp(true); 66 67 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 68 // arguments are at least 4/8 bytes aligned. 69 bool isPPC64 = Subtarget.isPPC64(); 70 setMinStackArgumentAlignment(isPPC64 ? 8:4); 71 72 // Set up the register classes. 73 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 74 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 75 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 76 77 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 78 for (MVT VT : MVT::integer_valuetypes()) { 79 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 80 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 81 } 82 83 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 84 85 // PowerPC has pre-inc load and store's. 86 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 87 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 88 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 89 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 90 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 91 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); 92 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); 93 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 94 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 95 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 96 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 97 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 98 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); 99 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); 100 101 if (Subtarget.useCRBits()) { 102 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 103 104 if (isPPC64 || Subtarget.hasFPCVT()) { 105 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 106 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 107 isPPC64 ? MVT::i64 : MVT::i32); 108 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 109 AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 110 isPPC64 ? MVT::i64 : MVT::i32); 111 } else { 112 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 113 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 114 } 115 116 // PowerPC does not support direct load / store of condition registers 117 setOperationAction(ISD::LOAD, MVT::i1, Custom); 118 setOperationAction(ISD::STORE, MVT::i1, Custom); 119 120 // FIXME: Remove this once the ANDI glue bug is fixed: 121 if (ANDIGlueBug) 122 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 123 124 for (MVT VT : MVT::integer_valuetypes()) { 125 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 126 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 127 setTruncStoreAction(VT, MVT::i1, Expand); 128 } 129 130 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 131 } 132 133 // This is used in the ppcf128->int sequence. Note it has different semantics 134 // from FP_ROUND: that rounds to nearest, this rounds to zero. 135 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 136 137 // We do not currently implement these libm ops for PowerPC. 138 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 139 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 140 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 141 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 142 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 143 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 144 145 // PowerPC has no SREM/UREM instructions 146 setOperationAction(ISD::SREM, MVT::i32, Expand); 147 setOperationAction(ISD::UREM, MVT::i32, Expand); 148 setOperationAction(ISD::SREM, MVT::i64, Expand); 149 setOperationAction(ISD::UREM, MVT::i64, Expand); 150 151 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 152 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 153 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 154 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 155 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 156 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 157 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 158 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 159 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 160 161 // We don't support sin/cos/sqrt/fmod/pow 162 setOperationAction(ISD::FSIN , MVT::f64, Expand); 163 setOperationAction(ISD::FCOS , MVT::f64, Expand); 164 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 165 setOperationAction(ISD::FREM , MVT::f64, Expand); 166 setOperationAction(ISD::FPOW , MVT::f64, Expand); 167 setOperationAction(ISD::FMA , MVT::f64, Legal); 168 setOperationAction(ISD::FSIN , MVT::f32, Expand); 169 setOperationAction(ISD::FCOS , MVT::f32, Expand); 170 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 171 setOperationAction(ISD::FREM , MVT::f32, Expand); 172 setOperationAction(ISD::FPOW , MVT::f32, Expand); 173 setOperationAction(ISD::FMA , MVT::f32, Legal); 174 175 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 176 177 // If we're enabling GP optimizations, use hardware square root 178 if (!Subtarget.hasFSQRT() && 179 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && 180 Subtarget.hasFRE())) 181 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 182 183 if (!Subtarget.hasFSQRT() && 184 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && 185 Subtarget.hasFRES())) 186 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 187 188 if (Subtarget.hasFCPSGN()) { 189 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 190 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 191 } else { 192 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 193 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 194 } 195 196 if (Subtarget.hasFPRND()) { 197 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 198 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 199 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 200 setOperationAction(ISD::FROUND, MVT::f64, Legal); 201 202 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 203 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 204 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 205 setOperationAction(ISD::FROUND, MVT::f32, Legal); 206 } 207 208 // PowerPC does not have BSWAP, CTPOP or CTTZ 209 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 210 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 211 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 212 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 213 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 214 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 215 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 216 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 217 218 if (Subtarget.hasPOPCNTD()) { 219 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 220 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 221 } else { 222 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 223 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 224 } 225 226 // PowerPC does not have ROTR 227 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 228 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 229 230 if (!Subtarget.useCRBits()) { 231 // PowerPC does not have Select 232 setOperationAction(ISD::SELECT, MVT::i32, Expand); 233 setOperationAction(ISD::SELECT, MVT::i64, Expand); 234 setOperationAction(ISD::SELECT, MVT::f32, Expand); 235 setOperationAction(ISD::SELECT, MVT::f64, Expand); 236 } 237 238 // PowerPC wants to turn select_cc of FP into fsel when possible. 239 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 240 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 241 242 // PowerPC wants to optimize integer setcc a bit 243 if (!Subtarget.useCRBits()) 244 setOperationAction(ISD::SETCC, MVT::i32, Custom); 245 246 // PowerPC does not have BRCOND which requires SetCC 247 if (!Subtarget.useCRBits()) 248 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 249 250 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 251 252 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 253 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 254 255 // PowerPC does not have [U|S]INT_TO_FP 256 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 257 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 258 259 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 260 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 261 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 262 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 263 264 // We cannot sextinreg(i1). Expand to shifts. 265 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 266 267 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 268 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 269 // support continuation, user-level threading, and etc.. As a result, no 270 // other SjLj exception interfaces are implemented and please don't build 271 // your own exception handling based on them. 272 // LLVM/Clang supports zero-cost DWARF exception handling. 273 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 274 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 275 276 // We want to legalize GlobalAddress and ConstantPool nodes into the 277 // appropriate instructions to materialize the address. 278 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 279 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 280 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 282 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 283 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 284 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 285 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 286 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 287 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 288 289 // TRAP is legal. 290 setOperationAction(ISD::TRAP, MVT::Other, Legal); 291 292 // TRAMPOLINE is custom lowered. 293 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 294 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 295 296 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 297 setOperationAction(ISD::VASTART , MVT::Other, Custom); 298 299 if (Subtarget.isSVR4ABI()) { 300 if (isPPC64) { 301 // VAARG always uses double-word chunks, so promote anything smaller. 302 setOperationAction(ISD::VAARG, MVT::i1, Promote); 303 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 304 setOperationAction(ISD::VAARG, MVT::i8, Promote); 305 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 306 setOperationAction(ISD::VAARG, MVT::i16, Promote); 307 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 308 setOperationAction(ISD::VAARG, MVT::i32, Promote); 309 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 310 setOperationAction(ISD::VAARG, MVT::Other, Expand); 311 } else { 312 // VAARG is custom lowered with the 32-bit SVR4 ABI. 313 setOperationAction(ISD::VAARG, MVT::Other, Custom); 314 setOperationAction(ISD::VAARG, MVT::i64, Custom); 315 } 316 } else 317 setOperationAction(ISD::VAARG, MVT::Other, Expand); 318 319 if (Subtarget.isSVR4ABI() && !isPPC64) 320 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 321 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 322 else 323 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 324 325 // Use the default implementation. 326 setOperationAction(ISD::VAEND , MVT::Other, Expand); 327 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 328 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 329 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 330 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 331 332 // We want to custom lower some of our intrinsics. 333 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 334 335 // To handle counter-based loop conditions. 336 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 337 338 // Comparisons that require checking two conditions. 339 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 340 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 341 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 342 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 343 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 344 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 345 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 346 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 347 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 348 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 349 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 350 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 351 352 if (Subtarget.has64BitSupport()) { 353 // They also have instructions for converting between i64 and fp. 354 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 355 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 356 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 357 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 358 // This is just the low 32 bits of a (signed) fp->i64 conversion. 359 // We cannot do this with Promote because i64 is not a legal type. 360 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 361 362 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 363 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 364 } else { 365 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 366 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 367 } 368 369 // With the instructions enabled under FPCVT, we can do everything. 370 if (Subtarget.hasFPCVT()) { 371 if (Subtarget.has64BitSupport()) { 372 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 373 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 374 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 375 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 376 } 377 378 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 379 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 380 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 381 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 382 } 383 384 if (Subtarget.use64BitRegs()) { 385 // 64-bit PowerPC implementations can support i64 types directly 386 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 387 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 388 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 389 // 64-bit PowerPC wants to expand i128 shifts itself. 390 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 391 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 392 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 393 } else { 394 // 32-bit PowerPC wants to expand i64 shifts itself. 395 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 396 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 397 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 398 } 399 400 if (Subtarget.hasAltivec()) { 401 // First set operation action for all vector types to expand. Then we 402 // will selectively turn on ones that can be effectively codegen'd. 403 for (MVT VT : MVT::vector_valuetypes()) { 404 // add/sub are legal for all supported vector VT's. 405 setOperationAction(ISD::ADD , VT, Legal); 406 setOperationAction(ISD::SUB , VT, Legal); 407 408 // Vector instructions introduced in P8 409 if (Subtarget.hasP8Altivec()) { 410 setOperationAction(ISD::CTPOP, VT, Legal); 411 setOperationAction(ISD::CTLZ, VT, Legal); 412 } 413 else { 414 setOperationAction(ISD::CTPOP, VT, Expand); 415 setOperationAction(ISD::CTLZ, VT, Expand); 416 } 417 418 // We promote all shuffles to v16i8. 419 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 420 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 421 422 // We promote all non-typed operations to v4i32. 423 setOperationAction(ISD::AND , VT, Promote); 424 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 425 setOperationAction(ISD::OR , VT, Promote); 426 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 427 setOperationAction(ISD::XOR , VT, Promote); 428 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 429 setOperationAction(ISD::LOAD , VT, Promote); 430 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 431 setOperationAction(ISD::SELECT, VT, Promote); 432 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 433 setOperationAction(ISD::STORE, VT, Promote); 434 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 435 436 // No other operations are legal. 437 setOperationAction(ISD::MUL , VT, Expand); 438 setOperationAction(ISD::SDIV, VT, Expand); 439 setOperationAction(ISD::SREM, VT, Expand); 440 setOperationAction(ISD::UDIV, VT, Expand); 441 setOperationAction(ISD::UREM, VT, Expand); 442 setOperationAction(ISD::FDIV, VT, Expand); 443 setOperationAction(ISD::FREM, VT, Expand); 444 setOperationAction(ISD::FNEG, VT, Expand); 445 setOperationAction(ISD::FSQRT, VT, Expand); 446 setOperationAction(ISD::FLOG, VT, Expand); 447 setOperationAction(ISD::FLOG10, VT, Expand); 448 setOperationAction(ISD::FLOG2, VT, Expand); 449 setOperationAction(ISD::FEXP, VT, Expand); 450 setOperationAction(ISD::FEXP2, VT, Expand); 451 setOperationAction(ISD::FSIN, VT, Expand); 452 setOperationAction(ISD::FCOS, VT, Expand); 453 setOperationAction(ISD::FABS, VT, Expand); 454 setOperationAction(ISD::FPOWI, VT, Expand); 455 setOperationAction(ISD::FFLOOR, VT, Expand); 456 setOperationAction(ISD::FCEIL, VT, Expand); 457 setOperationAction(ISD::FTRUNC, VT, Expand); 458 setOperationAction(ISD::FRINT, VT, Expand); 459 setOperationAction(ISD::FNEARBYINT, VT, Expand); 460 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 461 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 462 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 463 setOperationAction(ISD::MULHU, VT, Expand); 464 setOperationAction(ISD::MULHS, VT, Expand); 465 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 466 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 467 setOperationAction(ISD::UDIVREM, VT, Expand); 468 setOperationAction(ISD::SDIVREM, VT, Expand); 469 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 470 setOperationAction(ISD::FPOW, VT, Expand); 471 setOperationAction(ISD::BSWAP, VT, Expand); 472 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 473 setOperationAction(ISD::CTTZ, VT, Expand); 474 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 475 setOperationAction(ISD::VSELECT, VT, Expand); 476 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 477 478 for (MVT InnerVT : MVT::vector_valuetypes()) { 479 setTruncStoreAction(VT, InnerVT, Expand); 480 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 481 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 482 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 483 } 484 } 485 486 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 487 // with merges, splats, etc. 488 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 489 490 setOperationAction(ISD::AND , MVT::v4i32, Legal); 491 setOperationAction(ISD::OR , MVT::v4i32, Legal); 492 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 493 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 494 setOperationAction(ISD::SELECT, MVT::v4i32, 495 Subtarget.useCRBits() ? Legal : Expand); 496 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 497 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 498 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 499 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 500 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 501 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 502 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 503 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 504 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 505 506 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 507 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 508 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 509 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 510 511 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 512 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 513 514 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 515 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 516 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 517 } 518 519 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 520 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 521 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 522 523 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 524 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 525 526 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 527 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 528 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 529 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 530 531 // Altivec does not contain unordered floating-point compare instructions 532 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 533 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 534 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 535 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 536 537 if (Subtarget.hasVSX()) { 538 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 539 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 540 541 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 542 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 543 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 544 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 545 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 546 547 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 548 549 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 550 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 551 552 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 553 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 554 555 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 556 setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); 557 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); 558 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 559 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); 560 561 // Share the Altivec comparison restrictions. 562 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 563 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 564 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 565 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 566 567 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 568 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 569 570 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 571 572 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 573 574 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 575 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 576 577 // VSX v2i64 only supports non-arithmetic operations. 578 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 579 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 580 581 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 582 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 583 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 584 585 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 586 587 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 588 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 589 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 590 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 591 592 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 593 594 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 595 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 596 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 597 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 598 599 // Vector operation legalization checks the result type of 600 // SIGN_EXTEND_INREG, overall legalization checks the inner type. 601 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); 602 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); 603 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); 604 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); 605 606 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 607 } 608 609 if (Subtarget.hasP8Altivec()) 610 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); 611 } 612 613 if (Subtarget.hasQPX()) { 614 setOperationAction(ISD::FADD, MVT::v4f64, Legal); 615 setOperationAction(ISD::FSUB, MVT::v4f64, Legal); 616 setOperationAction(ISD::FMUL, MVT::v4f64, Legal); 617 setOperationAction(ISD::FREM, MVT::v4f64, Expand); 618 619 setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); 620 setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); 621 622 setOperationAction(ISD::LOAD , MVT::v4f64, Custom); 623 setOperationAction(ISD::STORE , MVT::v4f64, Custom); 624 625 setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); 626 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); 627 628 if (!Subtarget.useCRBits()) 629 setOperationAction(ISD::SELECT, MVT::v4f64, Expand); 630 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); 631 632 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); 633 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); 634 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); 635 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); 636 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); 637 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); 638 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); 639 640 setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); 641 setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); 642 643 setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); 644 setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); 645 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); 646 647 setOperationAction(ISD::FNEG , MVT::v4f64, Legal); 648 setOperationAction(ISD::FABS , MVT::v4f64, Legal); 649 setOperationAction(ISD::FSIN , MVT::v4f64, Expand); 650 setOperationAction(ISD::FCOS , MVT::v4f64, Expand); 651 setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); 652 setOperationAction(ISD::FPOW , MVT::v4f64, Expand); 653 setOperationAction(ISD::FLOG , MVT::v4f64, Expand); 654 setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); 655 setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); 656 setOperationAction(ISD::FEXP , MVT::v4f64, Expand); 657 setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); 658 659 setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); 660 setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); 661 662 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); 663 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); 664 665 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); 666 667 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 668 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 669 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 670 setOperationAction(ISD::FREM, MVT::v4f32, Expand); 671 672 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); 673 setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); 674 675 setOperationAction(ISD::LOAD , MVT::v4f32, Custom); 676 setOperationAction(ISD::STORE , MVT::v4f32, Custom); 677 678 if (!Subtarget.useCRBits()) 679 setOperationAction(ISD::SELECT, MVT::v4f32, Expand); 680 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 681 682 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); 683 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); 684 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); 685 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); 686 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); 687 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 688 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 689 690 setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); 691 setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); 692 693 setOperationAction(ISD::FNEG , MVT::v4f32, Legal); 694 setOperationAction(ISD::FABS , MVT::v4f32, Legal); 695 setOperationAction(ISD::FSIN , MVT::v4f32, Expand); 696 setOperationAction(ISD::FCOS , MVT::v4f32, Expand); 697 setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); 698 setOperationAction(ISD::FPOW , MVT::v4f32, Expand); 699 setOperationAction(ISD::FLOG , MVT::v4f32, Expand); 700 setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); 701 setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); 702 setOperationAction(ISD::FEXP , MVT::v4f32, Expand); 703 setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); 704 705 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 706 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 707 708 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); 709 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); 710 711 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); 712 713 setOperationAction(ISD::AND , MVT::v4i1, Legal); 714 setOperationAction(ISD::OR , MVT::v4i1, Legal); 715 setOperationAction(ISD::XOR , MVT::v4i1, Legal); 716 717 if (!Subtarget.useCRBits()) 718 setOperationAction(ISD::SELECT, MVT::v4i1, Expand); 719 setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); 720 721 setOperationAction(ISD::LOAD , MVT::v4i1, Custom); 722 setOperationAction(ISD::STORE , MVT::v4i1, Custom); 723 724 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); 725 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); 726 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); 727 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); 728 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); 729 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); 730 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); 731 732 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); 733 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); 734 735 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); 736 737 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); 738 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); 739 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); 740 setOperationAction(ISD::FROUND, MVT::v4f64, Legal); 741 742 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 743 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 744 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 745 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 746 747 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); 748 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 749 750 // These need to set FE_INEXACT, and so cannot be vectorized here. 751 setOperationAction(ISD::FRINT, MVT::v4f64, Expand); 752 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 753 754 if (TM.Options.UnsafeFPMath) { 755 setOperationAction(ISD::FDIV, MVT::v4f64, Legal); 756 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); 757 758 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 759 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 760 } else { 761 setOperationAction(ISD::FDIV, MVT::v4f64, Expand); 762 setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); 763 764 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 765 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 766 } 767 } 768 769 if (Subtarget.has64BitSupport()) 770 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 771 772 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); 773 774 if (!isPPC64) { 775 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 776 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 777 } 778 779 setBooleanContents(ZeroOrOneBooleanContent); 780 781 if (Subtarget.hasAltivec()) { 782 // Altivec instructions set fields to all zeros or all ones. 783 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 784 } 785 786 if (!isPPC64) { 787 // These libcalls are not available in 32-bit. 788 setLibcallName(RTLIB::SHL_I128, nullptr); 789 setLibcallName(RTLIB::SRL_I128, nullptr); 790 setLibcallName(RTLIB::SRA_I128, nullptr); 791 } 792 793 if (isPPC64) { 794 setStackPointerRegisterToSaveRestore(PPC::X1); 795 setExceptionPointerRegister(PPC::X3); 796 setExceptionSelectorRegister(PPC::X4); 797 } else { 798 setStackPointerRegisterToSaveRestore(PPC::R1); 799 setExceptionPointerRegister(PPC::R3); 800 setExceptionSelectorRegister(PPC::R4); 801 } 802 803 // We have target-specific dag combine patterns for the following nodes: 804 setTargetDAGCombine(ISD::SINT_TO_FP); 805 if (Subtarget.hasFPCVT()) 806 setTargetDAGCombine(ISD::UINT_TO_FP); 807 setTargetDAGCombine(ISD::LOAD); 808 setTargetDAGCombine(ISD::STORE); 809 setTargetDAGCombine(ISD::BR_CC); 810 if (Subtarget.useCRBits()) 811 setTargetDAGCombine(ISD::BRCOND); 812 setTargetDAGCombine(ISD::BSWAP); 813 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 814 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 815 setTargetDAGCombine(ISD::INTRINSIC_VOID); 816 817 setTargetDAGCombine(ISD::SIGN_EXTEND); 818 setTargetDAGCombine(ISD::ZERO_EXTEND); 819 setTargetDAGCombine(ISD::ANY_EXTEND); 820 821 if (Subtarget.useCRBits()) { 822 setTargetDAGCombine(ISD::TRUNCATE); 823 setTargetDAGCombine(ISD::SETCC); 824 setTargetDAGCombine(ISD::SELECT_CC); 825 } 826 827 // Use reciprocal estimates. 828 if (TM.Options.UnsafeFPMath) { 829 setTargetDAGCombine(ISD::FDIV); 830 setTargetDAGCombine(ISD::FSQRT); 831 } 832 833 // Darwin long double math library functions have $LDBL128 appended. 834 if (Subtarget.isDarwin()) { 835 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 836 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 837 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 838 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 839 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 840 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 841 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 842 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 843 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 844 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 845 } 846 847 // With 32 condition bits, we don't need to sink (and duplicate) compares 848 // aggressively in CodeGenPrep. 849 if (Subtarget.useCRBits()) { 850 setHasMultipleConditionRegisters(); 851 setJumpIsExpensive(); 852 } 853 854 setMinFunctionAlignment(2); 855 if (Subtarget.isDarwin()) 856 setPrefFunctionAlignment(4); 857 858 switch (Subtarget.getDarwinDirective()) { 859 default: break; 860 case PPC::DIR_970: 861 case PPC::DIR_A2: 862 case PPC::DIR_E500mc: 863 case PPC::DIR_E5500: 864 case PPC::DIR_PWR4: 865 case PPC::DIR_PWR5: 866 case PPC::DIR_PWR5X: 867 case PPC::DIR_PWR6: 868 case PPC::DIR_PWR6X: 869 case PPC::DIR_PWR7: 870 case PPC::DIR_PWR8: 871 setPrefFunctionAlignment(4); 872 setPrefLoopAlignment(4); 873 break; 874 } 875 876 setInsertFencesForAtomic(true); 877 878 if (Subtarget.enableMachineScheduler()) 879 setSchedulingPreference(Sched::Source); 880 else 881 setSchedulingPreference(Sched::Hybrid); 882 883 computeRegisterProperties(STI.getRegisterInfo()); 884 885 // The Freescale cores do better with aggressive inlining of memcpy and 886 // friends. GCC uses same threshold of 128 bytes (= 32 word stores). 887 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 888 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 889 MaxStoresPerMemset = 32; 890 MaxStoresPerMemsetOptSize = 16; 891 MaxStoresPerMemcpy = 32; 892 MaxStoresPerMemcpyOptSize = 8; 893 MaxStoresPerMemmove = 32; 894 MaxStoresPerMemmoveOptSize = 8; 895 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { 896 // The A2 also benefits from (very) aggressive inlining of memcpy and 897 // friends. The overhead of a the function call, even when warm, can be 898 // over one hundred cycles. 899 MaxStoresPerMemset = 128; 900 MaxStoresPerMemcpy = 128; 901 MaxStoresPerMemmove = 128; 902 } 903 } 904 905 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 906 /// the desired ByVal argument alignment. 907 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 908 unsigned MaxMaxAlign) { 909 if (MaxAlign == MaxMaxAlign) 910 return; 911 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 912 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 913 MaxAlign = 32; 914 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 915 MaxAlign = 16; 916 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 917 unsigned EltAlign = 0; 918 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 919 if (EltAlign > MaxAlign) 920 MaxAlign = EltAlign; 921 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 922 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 923 unsigned EltAlign = 0; 924 getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); 925 if (EltAlign > MaxAlign) 926 MaxAlign = EltAlign; 927 if (MaxAlign == MaxMaxAlign) 928 break; 929 } 930 } 931 } 932 933 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 934 /// function arguments in the caller parameter area. 935 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { 936 // Darwin passes everything on 4 byte boundary. 937 if (Subtarget.isDarwin()) 938 return 4; 939 940 // 16byte and wider vectors are passed on 16byte boundary. 941 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 942 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 943 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 944 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 945 return Align; 946 } 947 948 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 949 switch (Opcode) { 950 default: return nullptr; 951 case PPCISD::FSEL: return "PPCISD::FSEL"; 952 case PPCISD::FCFID: return "PPCISD::FCFID"; 953 case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; 954 case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; 955 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; 956 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 957 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 958 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; 959 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; 960 case PPCISD::FRE: return "PPCISD::FRE"; 961 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 962 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 963 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 964 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 965 case PPCISD::VPERM: return "PPCISD::VPERM"; 966 case PPCISD::CMPB: return "PPCISD::CMPB"; 967 case PPCISD::Hi: return "PPCISD::Hi"; 968 case PPCISD::Lo: return "PPCISD::Lo"; 969 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 970 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 971 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 972 case PPCISD::SRL: return "PPCISD::SRL"; 973 case PPCISD::SRA: return "PPCISD::SRA"; 974 case PPCISD::SHL: return "PPCISD::SHL"; 975 case PPCISD::CALL: return "PPCISD::CALL"; 976 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 977 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 978 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 979 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; 980 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 981 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; 982 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 983 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 984 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 985 case PPCISD::VCMP: return "PPCISD::VCMP"; 986 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 987 case PPCISD::LBRX: return "PPCISD::LBRX"; 988 case PPCISD::STBRX: return "PPCISD::STBRX"; 989 case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; 990 case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; 991 case PPCISD::LARX: return "PPCISD::LARX"; 992 case PPCISD::STCX: return "PPCISD::STCX"; 993 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 994 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 995 case PPCISD::BDZ: return "PPCISD::BDZ"; 996 case PPCISD::MFFS: return "PPCISD::MFFS"; 997 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 998 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 999 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 1000 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 1001 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 1002 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 1003 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 1004 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 1005 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 1006 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 1007 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 1008 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; 1009 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 1010 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 1011 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 1012 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; 1013 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 1014 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 1015 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 1016 case PPCISD::SC: return "PPCISD::SC"; 1017 case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; 1018 case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; 1019 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; 1020 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; 1021 case PPCISD::QBFLT: return "PPCISD::QBFLT"; 1022 case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; 1023 } 1024 } 1025 1026 EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const { 1027 if (!VT.isVector()) 1028 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 1029 1030 if (Subtarget.hasQPX()) 1031 return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); 1032 1033 return VT.changeVectorElementTypeToInteger(); 1034 } 1035 1036 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { 1037 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); 1038 return true; 1039 } 1040 1041 //===----------------------------------------------------------------------===// 1042 // Node matching predicates, for use by the tblgen matching code. 1043 //===----------------------------------------------------------------------===// 1044 1045 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 1046 static bool isFloatingPointZero(SDValue Op) { 1047 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 1048 return CFP->getValueAPF().isZero(); 1049 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 1050 // Maybe this has already been legalized into the constant pool? 1051 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 1052 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 1053 return CFP->getValueAPF().isZero(); 1054 } 1055 return false; 1056 } 1057 1058 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 1059 /// true if Op is undef or if it matches the specified value. 1060 static bool isConstantOrUndef(int Op, int Val) { 1061 return Op < 0 || Op == Val; 1062 } 1063 1064 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 1065 /// VPKUHUM instruction. 1066 /// The ShuffleKind distinguishes between big-endian operations with 1067 /// two different inputs (0), either-endian operations with two identical 1068 /// inputs (1), and little-endian operantion with two different inputs (2). 1069 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1070 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1071 SelectionDAG &DAG) { 1072 bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian(); 1073 if (ShuffleKind == 0) { 1074 if (IsLE) 1075 return false; 1076 for (unsigned i = 0; i != 16; ++i) 1077 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 1078 return false; 1079 } else if (ShuffleKind == 2) { 1080 if (!IsLE) 1081 return false; 1082 for (unsigned i = 0; i != 16; ++i) 1083 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 1084 return false; 1085 } else if (ShuffleKind == 1) { 1086 unsigned j = IsLE ? 0 : 1; 1087 for (unsigned i = 0; i != 8; ++i) 1088 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 1089 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 1090 return false; 1091 } 1092 return true; 1093 } 1094 1095 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 1096 /// VPKUWUM instruction. 1097 /// The ShuffleKind distinguishes between big-endian operations with 1098 /// two different inputs (0), either-endian operations with two identical 1099 /// inputs (1), and little-endian operantion with two different inputs (2). 1100 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1101 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1102 SelectionDAG &DAG) { 1103 bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian(); 1104 if (ShuffleKind == 0) { 1105 if (IsLE) 1106 return false; 1107 for (unsigned i = 0; i != 16; i += 2) 1108 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 1109 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 1110 return false; 1111 } else if (ShuffleKind == 2) { 1112 if (!IsLE) 1113 return false; 1114 for (unsigned i = 0; i != 16; i += 2) 1115 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1116 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 1117 return false; 1118 } else if (ShuffleKind == 1) { 1119 unsigned j = IsLE ? 0 : 2; 1120 for (unsigned i = 0; i != 8; i += 2) 1121 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1122 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1123 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1124 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 1125 return false; 1126 } 1127 return true; 1128 } 1129 1130 /// isVMerge - Common function, used to match vmrg* shuffles. 1131 /// 1132 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 1133 unsigned LHSStart, unsigned RHSStart) { 1134 if (N->getValueType(0) != MVT::v16i8) 1135 return false; 1136 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 1137 "Unsupported merge size!"); 1138 1139 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 1140 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 1141 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 1142 LHSStart+j+i*UnitSize) || 1143 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 1144 RHSStart+j+i*UnitSize)) 1145 return false; 1146 } 1147 return true; 1148 } 1149 1150 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 1151 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 1152 /// The ShuffleKind distinguishes between big-endian merges with two 1153 /// different inputs (0), either-endian merges with two identical inputs (1), 1154 /// and little-endian merges with two different inputs (2). For the latter, 1155 /// the input operands are swapped (see PPCInstrAltivec.td). 1156 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1157 unsigned ShuffleKind, SelectionDAG &DAG) { 1158 if (DAG.getTarget().getDataLayout()->isLittleEndian()) { 1159 if (ShuffleKind == 1) // unary 1160 return isVMerge(N, UnitSize, 0, 0); 1161 else if (ShuffleKind == 2) // swapped 1162 return isVMerge(N, UnitSize, 0, 16); 1163 else 1164 return false; 1165 } else { 1166 if (ShuffleKind == 1) // unary 1167 return isVMerge(N, UnitSize, 8, 8); 1168 else if (ShuffleKind == 0) // normal 1169 return isVMerge(N, UnitSize, 8, 24); 1170 else 1171 return false; 1172 } 1173 } 1174 1175 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 1176 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 1177 /// The ShuffleKind distinguishes between big-endian merges with two 1178 /// different inputs (0), either-endian merges with two identical inputs (1), 1179 /// and little-endian merges with two different inputs (2). For the latter, 1180 /// the input operands are swapped (see PPCInstrAltivec.td). 1181 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1182 unsigned ShuffleKind, SelectionDAG &DAG) { 1183 if (DAG.getTarget().getDataLayout()->isLittleEndian()) { 1184 if (ShuffleKind == 1) // unary 1185 return isVMerge(N, UnitSize, 8, 8); 1186 else if (ShuffleKind == 2) // swapped 1187 return isVMerge(N, UnitSize, 8, 24); 1188 else 1189 return false; 1190 } else { 1191 if (ShuffleKind == 1) // unary 1192 return isVMerge(N, UnitSize, 0, 0); 1193 else if (ShuffleKind == 0) // normal 1194 return isVMerge(N, UnitSize, 0, 16); 1195 else 1196 return false; 1197 } 1198 } 1199 1200 1201 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 1202 /// amount, otherwise return -1. 1203 /// The ShuffleKind distinguishes between big-endian operations with two 1204 /// different inputs (0), either-endian operations with two identical inputs 1205 /// (1), and little-endian operations with two different inputs (2). For the 1206 /// latter, the input operands are swapped (see PPCInstrAltivec.td). 1207 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 1208 SelectionDAG &DAG) { 1209 if (N->getValueType(0) != MVT::v16i8) 1210 return -1; 1211 1212 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1213 1214 // Find the first non-undef value in the shuffle mask. 1215 unsigned i; 1216 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1217 /*search*/; 1218 1219 if (i == 16) return -1; // all undef. 1220 1221 // Otherwise, check to see if the rest of the elements are consecutively 1222 // numbered from this value. 1223 unsigned ShiftAmt = SVOp->getMaskElt(i); 1224 if (ShiftAmt < i) return -1; 1225 1226 ShiftAmt -= i; 1227 bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian(); 1228 1229 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1230 // Check the rest of the elements to see if they are consecutive. 1231 for (++i; i != 16; ++i) 1232 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1233 return -1; 1234 } else if (ShuffleKind == 1) { 1235 // Check the rest of the elements to see if they are consecutive. 1236 for (++i; i != 16; ++i) 1237 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1238 return -1; 1239 } else 1240 return -1; 1241 1242 if (ShuffleKind == 2 && isLE) 1243 ShiftAmt = 16 - ShiftAmt; 1244 1245 return ShiftAmt; 1246 } 1247 1248 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1249 /// specifies a splat of a single element that is suitable for input to 1250 /// VSPLTB/VSPLTH/VSPLTW. 1251 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1252 assert(N->getValueType(0) == MVT::v16i8 && 1253 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1254 1255 // This is a splat operation if each element of the permute is the same, and 1256 // if the value doesn't reference the second vector. 1257 unsigned ElementBase = N->getMaskElt(0); 1258 1259 // FIXME: Handle UNDEF elements too! 1260 if (ElementBase >= 16) 1261 return false; 1262 1263 // Check that the indices are consecutive, in the case of a multi-byte element 1264 // splatted with a v16i8 mask. 1265 for (unsigned i = 1; i != EltSize; ++i) 1266 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1267 return false; 1268 1269 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1270 if (N->getMaskElt(i) < 0) continue; 1271 for (unsigned j = 0; j != EltSize; ++j) 1272 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1273 return false; 1274 } 1275 return true; 1276 } 1277 1278 /// isAllNegativeZeroVector - Returns true if all elements of build_vector 1279 /// are -0.0. 1280 bool PPC::isAllNegativeZeroVector(SDNode *N) { 1281 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); 1282 1283 APInt APVal, APUndef; 1284 unsigned BitSize; 1285 bool HasAnyUndefs; 1286 1287 if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) 1288 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 1289 return CFP->getValueAPF().isNegZero(); 1290 1291 return false; 1292 } 1293 1294 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 1295 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 1296 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 1297 SelectionDAG &DAG) { 1298 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1299 assert(isSplatShuffleMask(SVOp, EltSize)); 1300 if (DAG.getTarget().getDataLayout()->isLittleEndian()) 1301 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 1302 else 1303 return SVOp->getMaskElt(0) / EltSize; 1304 } 1305 1306 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 1307 /// by using a vspltis[bhw] instruction of the specified element size, return 1308 /// the constant being splatted. The ByteSize field indicates the number of 1309 /// bytes of each element [124] -> [bhw]. 1310 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 1311 SDValue OpVal(nullptr, 0); 1312 1313 // If ByteSize of the splat is bigger than the element size of the 1314 // build_vector, then we have a case where we are checking for a splat where 1315 // multiple elements of the buildvector are folded together into a single 1316 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 1317 unsigned EltSize = 16/N->getNumOperands(); 1318 if (EltSize < ByteSize) { 1319 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 1320 SDValue UniquedVals[4]; 1321 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 1322 1323 // See if all of the elements in the buildvector agree across. 1324 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1325 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1326 // If the element isn't a constant, bail fully out. 1327 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 1328 1329 1330 if (!UniquedVals[i&(Multiple-1)].getNode()) 1331 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 1332 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 1333 return SDValue(); // no match. 1334 } 1335 1336 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 1337 // either constant or undef values that are identical for each chunk. See 1338 // if these chunks can form into a larger vspltis*. 1339 1340 // Check to see if all of the leading entries are either 0 or -1. If 1341 // neither, then this won't fit into the immediate field. 1342 bool LeadingZero = true; 1343 bool LeadingOnes = true; 1344 for (unsigned i = 0; i != Multiple-1; ++i) { 1345 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 1346 1347 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 1348 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 1349 } 1350 // Finally, check the least significant entry. 1351 if (LeadingZero) { 1352 if (!UniquedVals[Multiple-1].getNode()) 1353 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 1354 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 1355 if (Val < 16) 1356 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 1357 } 1358 if (LeadingOnes) { 1359 if (!UniquedVals[Multiple-1].getNode()) 1360 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 1361 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 1362 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 1363 return DAG.getTargetConstant(Val, MVT::i32); 1364 } 1365 1366 return SDValue(); 1367 } 1368 1369 // Check to see if this buildvec has a single non-undef value in its elements. 1370 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1371 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1372 if (!OpVal.getNode()) 1373 OpVal = N->getOperand(i); 1374 else if (OpVal != N->getOperand(i)) 1375 return SDValue(); 1376 } 1377 1378 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 1379 1380 unsigned ValSizeInBytes = EltSize; 1381 uint64_t Value = 0; 1382 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1383 Value = CN->getZExtValue(); 1384 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1385 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 1386 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 1387 } 1388 1389 // If the splat value is larger than the element value, then we can never do 1390 // this splat. The only case that we could fit the replicated bits into our 1391 // immediate field for would be zero, and we prefer to use vxor for it. 1392 if (ValSizeInBytes < ByteSize) return SDValue(); 1393 1394 // If the element value is larger than the splat value, cut it in half and 1395 // check to see if the two halves are equal. Continue doing this until we 1396 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 1397 while (ValSizeInBytes > ByteSize) { 1398 ValSizeInBytes >>= 1; 1399 1400 // If the top half equals the bottom half, we're still ok. 1401 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 1402 (Value & ((1 << (8*ValSizeInBytes))-1))) 1403 return SDValue(); 1404 } 1405 1406 // Properly sign extend the value. 1407 int MaskVal = SignExtend32(Value, ByteSize * 8); 1408 1409 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 1410 if (MaskVal == 0) return SDValue(); 1411 1412 // Finally, if this value fits in a 5 bit sext field, return it 1413 if (SignExtend32<5>(MaskVal) == MaskVal) 1414 return DAG.getTargetConstant(MaskVal, MVT::i32); 1415 return SDValue(); 1416 } 1417 1418 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift 1419 /// amount, otherwise return -1. 1420 int PPC::isQVALIGNIShuffleMask(SDNode *N) { 1421 EVT VT = N->getValueType(0); 1422 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) 1423 return -1; 1424 1425 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1426 1427 // Find the first non-undef value in the shuffle mask. 1428 unsigned i; 1429 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) 1430 /*search*/; 1431 1432 if (i == 4) return -1; // all undef. 1433 1434 // Otherwise, check to see if the rest of the elements are consecutively 1435 // numbered from this value. 1436 unsigned ShiftAmt = SVOp->getMaskElt(i); 1437 if (ShiftAmt < i) return -1; 1438 ShiftAmt -= i; 1439 1440 // Check the rest of the elements to see if they are consecutive. 1441 for (++i; i != 4; ++i) 1442 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1443 return -1; 1444 1445 return ShiftAmt; 1446 } 1447 1448 //===----------------------------------------------------------------------===// 1449 // Addressing Mode Selection 1450 //===----------------------------------------------------------------------===// 1451 1452 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 1453 /// or 64-bit immediate, and if the value can be accurately represented as a 1454 /// sign extension from a 16-bit value. If so, this returns true and the 1455 /// immediate. 1456 static bool isIntS16Immediate(SDNode *N, short &Imm) { 1457 if (!isa<ConstantSDNode>(N)) 1458 return false; 1459 1460 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1461 if (N->getValueType(0) == MVT::i32) 1462 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1463 else 1464 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1465 } 1466 static bool isIntS16Immediate(SDValue Op, short &Imm) { 1467 return isIntS16Immediate(Op.getNode(), Imm); 1468 } 1469 1470 1471 /// SelectAddressRegReg - Given the specified addressed, check to see if it 1472 /// can be represented as an indexed [r+r] operation. Returns false if it 1473 /// can be more efficiently represented with [r+imm]. 1474 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1475 SDValue &Index, 1476 SelectionDAG &DAG) const { 1477 short imm = 0; 1478 if (N.getOpcode() == ISD::ADD) { 1479 if (isIntS16Immediate(N.getOperand(1), imm)) 1480 return false; // r+i 1481 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1482 return false; // r+i 1483 1484 Base = N.getOperand(0); 1485 Index = N.getOperand(1); 1486 return true; 1487 } else if (N.getOpcode() == ISD::OR) { 1488 if (isIntS16Immediate(N.getOperand(1), imm)) 1489 return false; // r+i can fold it if we can. 1490 1491 // If this is an or of disjoint bitfields, we can codegen this as an add 1492 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1493 // disjoint. 1494 APInt LHSKnownZero, LHSKnownOne; 1495 APInt RHSKnownZero, RHSKnownOne; 1496 DAG.computeKnownBits(N.getOperand(0), 1497 LHSKnownZero, LHSKnownOne); 1498 1499 if (LHSKnownZero.getBoolValue()) { 1500 DAG.computeKnownBits(N.getOperand(1), 1501 RHSKnownZero, RHSKnownOne); 1502 // If all of the bits are known zero on the LHS or RHS, the add won't 1503 // carry. 1504 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1505 Base = N.getOperand(0); 1506 Index = N.getOperand(1); 1507 return true; 1508 } 1509 } 1510 } 1511 1512 return false; 1513 } 1514 1515 // If we happen to be doing an i64 load or store into a stack slot that has 1516 // less than a 4-byte alignment, then the frame-index elimination may need to 1517 // use an indexed load or store instruction (because the offset may not be a 1518 // multiple of 4). The extra register needed to hold the offset comes from the 1519 // register scavenger, and it is possible that the scavenger will need to use 1520 // an emergency spill slot. As a result, we need to make sure that a spill slot 1521 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1522 // stack slot. 1523 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1524 // FIXME: This does not handle the LWA case. 1525 if (VT != MVT::i64) 1526 return; 1527 1528 // NOTE: We'll exclude negative FIs here, which come from argument 1529 // lowering, because there are no known test cases triggering this problem 1530 // using packed structures (or similar). We can remove this exclusion if 1531 // we find such a test case. The reason why this is so test-case driven is 1532 // because this entire 'fixup' is only to prevent crashes (from the 1533 // register scavenger) on not-really-valid inputs. For example, if we have: 1534 // %a = alloca i1 1535 // %b = bitcast i1* %a to i64* 1536 // store i64* a, i64 b 1537 // then the store should really be marked as 'align 1', but is not. If it 1538 // were marked as 'align 1' then the indexed form would have been 1539 // instruction-selected initially, and the problem this 'fixup' is preventing 1540 // won't happen regardless. 1541 if (FrameIdx < 0) 1542 return; 1543 1544 MachineFunction &MF = DAG.getMachineFunction(); 1545 MachineFrameInfo *MFI = MF.getFrameInfo(); 1546 1547 unsigned Align = MFI->getObjectAlignment(FrameIdx); 1548 if (Align >= 4) 1549 return; 1550 1551 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1552 FuncInfo->setHasNonRISpills(); 1553 } 1554 1555 /// Returns true if the address N can be represented by a base register plus 1556 /// a signed 16-bit displacement [r+imm], and if it is not better 1557 /// represented as reg+reg. If Aligned is true, only accept displacements 1558 /// suitable for STD and friends, i.e. multiples of 4. 1559 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1560 SDValue &Base, 1561 SelectionDAG &DAG, 1562 bool Aligned) const { 1563 // FIXME dl should come from parent load or store, not from address 1564 SDLoc dl(N); 1565 // If this can be more profitably realized as r+r, fail. 1566 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1567 return false; 1568 1569 if (N.getOpcode() == ISD::ADD) { 1570 short imm = 0; 1571 if (isIntS16Immediate(N.getOperand(1), imm) && 1572 (!Aligned || (imm & 3) == 0)) { 1573 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1574 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1575 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1576 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1577 } else { 1578 Base = N.getOperand(0); 1579 } 1580 return true; // [r+i] 1581 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1582 // Match LOAD (ADD (X, Lo(G))). 1583 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1584 && "Cannot handle constant offsets yet!"); 1585 Disp = N.getOperand(1).getOperand(0); // The global address. 1586 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1587 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1588 Disp.getOpcode() == ISD::TargetConstantPool || 1589 Disp.getOpcode() == ISD::TargetJumpTable); 1590 Base = N.getOperand(0); 1591 return true; // [&g+r] 1592 } 1593 } else if (N.getOpcode() == ISD::OR) { 1594 short imm = 0; 1595 if (isIntS16Immediate(N.getOperand(1), imm) && 1596 (!Aligned || (imm & 3) == 0)) { 1597 // If this is an or of disjoint bitfields, we can codegen this as an add 1598 // (for better address arithmetic) if the LHS and RHS of the OR are 1599 // provably disjoint. 1600 APInt LHSKnownZero, LHSKnownOne; 1601 DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1602 1603 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1604 // If all of the bits are known zero on the LHS or RHS, the add won't 1605 // carry. 1606 if (FrameIndexSDNode *FI = 1607 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1608 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1609 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1610 } else { 1611 Base = N.getOperand(0); 1612 } 1613 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1614 return true; 1615 } 1616 } 1617 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1618 // Loading from a constant address. 1619 1620 // If this address fits entirely in a 16-bit sext immediate field, codegen 1621 // this as "d, 0" 1622 short Imm; 1623 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1624 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 1625 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1626 CN->getValueType(0)); 1627 return true; 1628 } 1629 1630 // Handle 32-bit sext immediates with LIS + addr mode. 1631 if ((CN->getValueType(0) == MVT::i32 || 1632 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1633 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1634 int Addr = (int)CN->getZExtValue(); 1635 1636 // Otherwise, break this down into an LIS + disp. 1637 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 1638 1639 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 1640 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1641 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1642 return true; 1643 } 1644 } 1645 1646 Disp = DAG.getTargetConstant(0, getPointerTy()); 1647 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1648 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1649 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1650 } else 1651 Base = N; 1652 return true; // [r+0] 1653 } 1654 1655 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1656 /// represented as an indexed [r+r] operation. 1657 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1658 SDValue &Index, 1659 SelectionDAG &DAG) const { 1660 // Check to see if we can easily represent this as an [r+r] address. This 1661 // will fail if it thinks that the address is more profitably represented as 1662 // reg+imm, e.g. where imm = 0. 1663 if (SelectAddressRegReg(N, Base, Index, DAG)) 1664 return true; 1665 1666 // If the operand is an addition, always emit this as [r+r], since this is 1667 // better (for code size, and execution, as the memop does the add for free) 1668 // than emitting an explicit add. 1669 if (N.getOpcode() == ISD::ADD) { 1670 Base = N.getOperand(0); 1671 Index = N.getOperand(1); 1672 return true; 1673 } 1674 1675 // Otherwise, do it the hard way, using R0 as the base register. 1676 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1677 N.getValueType()); 1678 Index = N; 1679 return true; 1680 } 1681 1682 /// getPreIndexedAddressParts - returns true by value, base pointer and 1683 /// offset pointer and addressing mode by reference if the node's address 1684 /// can be legally represented as pre-indexed load / store address. 1685 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1686 SDValue &Offset, 1687 ISD::MemIndexedMode &AM, 1688 SelectionDAG &DAG) const { 1689 if (DisablePPCPreinc) return false; 1690 1691 bool isLoad = true; 1692 SDValue Ptr; 1693 EVT VT; 1694 unsigned Alignment; 1695 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1696 Ptr = LD->getBasePtr(); 1697 VT = LD->getMemoryVT(); 1698 Alignment = LD->getAlignment(); 1699 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1700 Ptr = ST->getBasePtr(); 1701 VT = ST->getMemoryVT(); 1702 Alignment = ST->getAlignment(); 1703 isLoad = false; 1704 } else 1705 return false; 1706 1707 // PowerPC doesn't have preinc load/store instructions for vectors (except 1708 // for QPX, which does have preinc r+r forms). 1709 if (VT.isVector()) { 1710 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { 1711 return false; 1712 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { 1713 AM = ISD::PRE_INC; 1714 return true; 1715 } 1716 } 1717 1718 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 1719 1720 // Common code will reject creating a pre-inc form if the base pointer 1721 // is a frame index, or if N is a store and the base pointer is either 1722 // the same as or a predecessor of the value being stored. Check for 1723 // those situations here, and try with swapped Base/Offset instead. 1724 bool Swap = false; 1725 1726 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 1727 Swap = true; 1728 else if (!isLoad) { 1729 SDValue Val = cast<StoreSDNode>(N)->getValue(); 1730 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 1731 Swap = true; 1732 } 1733 1734 if (Swap) 1735 std::swap(Base, Offset); 1736 1737 AM = ISD::PRE_INC; 1738 return true; 1739 } 1740 1741 // LDU/STU can only handle immediates that are a multiple of 4. 1742 if (VT != MVT::i64) { 1743 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 1744 return false; 1745 } else { 1746 // LDU/STU need an address with at least 4-byte alignment. 1747 if (Alignment < 4) 1748 return false; 1749 1750 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 1751 return false; 1752 } 1753 1754 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1755 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1756 // sext i32 to i64 when addr mode is r+i. 1757 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1758 LD->getExtensionType() == ISD::SEXTLOAD && 1759 isa<ConstantSDNode>(Offset)) 1760 return false; 1761 } 1762 1763 AM = ISD::PRE_INC; 1764 return true; 1765 } 1766 1767 //===----------------------------------------------------------------------===// 1768 // LowerOperation implementation 1769 //===----------------------------------------------------------------------===// 1770 1771 /// GetLabelAccessInfo - Return true if we should reference labels using a 1772 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1773 static bool GetLabelAccessInfo(const TargetMachine &TM, 1774 const PPCSubtarget &Subtarget, 1775 unsigned &HiOpFlags, unsigned &LoOpFlags, 1776 const GlobalValue *GV = nullptr) { 1777 HiOpFlags = PPCII::MO_HA; 1778 LoOpFlags = PPCII::MO_LO; 1779 1780 // Don't use the pic base if not in PIC relocation model. 1781 bool isPIC = TM.getRelocationModel() == Reloc::PIC_; 1782 1783 if (isPIC) { 1784 HiOpFlags |= PPCII::MO_PIC_FLAG; 1785 LoOpFlags |= PPCII::MO_PIC_FLAG; 1786 } 1787 1788 // If this is a reference to a global value that requires a non-lazy-ptr, make 1789 // sure that instruction lowering adds it. 1790 if (GV && Subtarget.hasLazyResolverStub(GV)) { 1791 HiOpFlags |= PPCII::MO_NLP_FLAG; 1792 LoOpFlags |= PPCII::MO_NLP_FLAG; 1793 1794 if (GV->hasHiddenVisibility()) { 1795 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1796 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1797 } 1798 } 1799 1800 return isPIC; 1801 } 1802 1803 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 1804 SelectionDAG &DAG) { 1805 EVT PtrVT = HiPart.getValueType(); 1806 SDValue Zero = DAG.getConstant(0, PtrVT); 1807 SDLoc DL(HiPart); 1808 1809 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 1810 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 1811 1812 // With PIC, the first instruction is actually "GR+hi(&G)". 1813 if (isPIC) 1814 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 1815 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 1816 1817 // Generate non-pic code that has direct accesses to the constant pool. 1818 // The address of the global is just (hi(&g)+lo(&g)). 1819 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 1820 } 1821 1822 static void setUsesTOCBasePtr(MachineFunction &MF) { 1823 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1824 FuncInfo->setUsesTOCBasePtr(); 1825 } 1826 1827 static void setUsesTOCBasePtr(SelectionDAG &DAG) { 1828 setUsesTOCBasePtr(DAG.getMachineFunction()); 1829 } 1830 1831 static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit, 1832 SDValue GA) { 1833 EVT VT = Is64Bit ? MVT::i64 : MVT::i32; 1834 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : 1835 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); 1836 1837 SDValue Ops[] = { GA, Reg }; 1838 return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl, 1839 DAG.getVTList(VT, MVT::Other), Ops, VT, 1840 MachinePointerInfo::getGOT(), 0, false, true, 1841 false, 0); 1842 } 1843 1844 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1845 SelectionDAG &DAG) const { 1846 EVT PtrVT = Op.getValueType(); 1847 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1848 const Constant *C = CP->getConstVal(); 1849 1850 // 64-bit SVR4 ABI code is always position-independent. 1851 // The actual address of the GlobalValue is stored in the TOC. 1852 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1853 setUsesTOCBasePtr(DAG); 1854 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 1855 return getTOCEntry(DAG, SDLoc(CP), true, GA); 1856 } 1857 1858 unsigned MOHiFlag, MOLoFlag; 1859 bool isPIC = 1860 GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag); 1861 1862 if (isPIC && Subtarget.isSVR4ABI()) { 1863 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 1864 PPCII::MO_PIC_FLAG); 1865 return getTOCEntry(DAG, SDLoc(CP), false, GA); 1866 } 1867 1868 SDValue CPIHi = 1869 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 1870 SDValue CPILo = 1871 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 1872 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 1873 } 1874 1875 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 1876 EVT PtrVT = Op.getValueType(); 1877 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1878 1879 // 64-bit SVR4 ABI code is always position-independent. 1880 // The actual address of the GlobalValue is stored in the TOC. 1881 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1882 setUsesTOCBasePtr(DAG); 1883 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1884 return getTOCEntry(DAG, SDLoc(JT), true, GA); 1885 } 1886 1887 unsigned MOHiFlag, MOLoFlag; 1888 bool isPIC = 1889 GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag); 1890 1891 if (isPIC && Subtarget.isSVR4ABI()) { 1892 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 1893 PPCII::MO_PIC_FLAG); 1894 return getTOCEntry(DAG, SDLoc(GA), false, GA); 1895 } 1896 1897 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 1898 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 1899 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 1900 } 1901 1902 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 1903 SelectionDAG &DAG) const { 1904 EVT PtrVT = Op.getValueType(); 1905 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 1906 const BlockAddress *BA = BASDN->getBlockAddress(); 1907 1908 // 64-bit SVR4 ABI code is always position-independent. 1909 // The actual BlockAddress is stored in the TOC. 1910 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1911 setUsesTOCBasePtr(DAG); 1912 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 1913 return getTOCEntry(DAG, SDLoc(BASDN), true, GA); 1914 } 1915 1916 unsigned MOHiFlag, MOLoFlag; 1917 bool isPIC = 1918 GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag); 1919 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 1920 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 1921 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 1922 } 1923 1924 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1925 SelectionDAG &DAG) const { 1926 1927 // FIXME: TLS addresses currently use medium model code sequences, 1928 // which is the most useful form. Eventually support for small and 1929 // large models could be added if users need it, at the cost of 1930 // additional complexity. 1931 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1932 SDLoc dl(GA); 1933 const GlobalValue *GV = GA->getGlobal(); 1934 EVT PtrVT = getPointerTy(); 1935 bool is64bit = Subtarget.isPPC64(); 1936 const Module *M = DAG.getMachineFunction().getFunction()->getParent(); 1937 PICLevel::Level picLevel = M->getPICLevel(); 1938 1939 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 1940 1941 if (Model == TLSModel::LocalExec) { 1942 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1943 PPCII::MO_TPREL_HA); 1944 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1945 PPCII::MO_TPREL_LO); 1946 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 1947 is64bit ? MVT::i64 : MVT::i32); 1948 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 1949 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 1950 } 1951 1952 if (Model == TLSModel::InitialExec) { 1953 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1954 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1955 PPCII::MO_TLS); 1956 SDValue GOTPtr; 1957 if (is64bit) { 1958 setUsesTOCBasePtr(DAG); 1959 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1960 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 1961 PtrVT, GOTReg, TGA); 1962 } else 1963 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 1964 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 1965 PtrVT, TGA, GOTPtr); 1966 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 1967 } 1968 1969 if (Model == TLSModel::GeneralDynamic) { 1970 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1971 SDValue GOTPtr; 1972 if (is64bit) { 1973 setUsesTOCBasePtr(DAG); 1974 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1975 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 1976 GOTReg, TGA); 1977 } else { 1978 if (picLevel == PICLevel::Small) 1979 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 1980 else 1981 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 1982 } 1983 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, 1984 GOTPtr, TGA, TGA); 1985 } 1986 1987 if (Model == TLSModel::LocalDynamic) { 1988 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1989 SDValue GOTPtr; 1990 if (is64bit) { 1991 setUsesTOCBasePtr(DAG); 1992 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1993 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 1994 GOTReg, TGA); 1995 } else { 1996 if (picLevel == PICLevel::Small) 1997 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 1998 else 1999 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2000 } 2001 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, 2002 PtrVT, GOTPtr, TGA, TGA); 2003 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, 2004 PtrVT, TLSAddr, TGA); 2005 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 2006 } 2007 2008 llvm_unreachable("Unknown TLS model!"); 2009 } 2010 2011 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 2012 SelectionDAG &DAG) const { 2013 EVT PtrVT = Op.getValueType(); 2014 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 2015 SDLoc DL(GSDN); 2016 const GlobalValue *GV = GSDN->getGlobal(); 2017 2018 // 64-bit SVR4 ABI code is always position-independent. 2019 // The actual address of the GlobalValue is stored in the TOC. 2020 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2021 setUsesTOCBasePtr(DAG); 2022 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 2023 return getTOCEntry(DAG, DL, true, GA); 2024 } 2025 2026 unsigned MOHiFlag, MOLoFlag; 2027 bool isPIC = 2028 GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV); 2029 2030 if (isPIC && Subtarget.isSVR4ABI()) { 2031 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 2032 GSDN->getOffset(), 2033 PPCII::MO_PIC_FLAG); 2034 return getTOCEntry(DAG, DL, false, GA); 2035 } 2036 2037 SDValue GAHi = 2038 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 2039 SDValue GALo = 2040 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 2041 2042 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 2043 2044 // If the global reference is actually to a non-lazy-pointer, we have to do an 2045 // extra load to get the address of the global. 2046 if (MOHiFlag & PPCII::MO_NLP_FLAG) 2047 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 2048 false, false, false, 0); 2049 return Ptr; 2050 } 2051 2052 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 2053 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2054 SDLoc dl(Op); 2055 2056 if (Op.getValueType() == MVT::v2i64) { 2057 // When the operands themselves are v2i64 values, we need to do something 2058 // special because VSX has no underlying comparison operations for these. 2059 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 2060 // Equality can be handled by casting to the legal type for Altivec 2061 // comparisons, everything else needs to be expanded. 2062 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2063 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 2064 DAG.getSetCC(dl, MVT::v4i32, 2065 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 2066 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 2067 CC)); 2068 } 2069 2070 return SDValue(); 2071 } 2072 2073 // We handle most of these in the usual way. 2074 return Op; 2075 } 2076 2077 // If we're comparing for equality to zero, expose the fact that this is 2078 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 2079 // fold the new nodes. 2080 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 2081 if (C->isNullValue() && CC == ISD::SETEQ) { 2082 EVT VT = Op.getOperand(0).getValueType(); 2083 SDValue Zext = Op.getOperand(0); 2084 if (VT.bitsLT(MVT::i32)) { 2085 VT = MVT::i32; 2086 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 2087 } 2088 unsigned Log2b = Log2_32(VT.getSizeInBits()); 2089 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 2090 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 2091 DAG.getConstant(Log2b, MVT::i32)); 2092 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 2093 } 2094 // Leave comparisons against 0 and -1 alone for now, since they're usually 2095 // optimized. FIXME: revisit this when we can custom lower all setcc 2096 // optimizations. 2097 if (C->isAllOnesValue() || C->isNullValue()) 2098 return SDValue(); 2099 } 2100 2101 // If we have an integer seteq/setne, turn it into a compare against zero 2102 // by xor'ing the rhs with the lhs, which is faster than setting a 2103 // condition register, reading it back out, and masking the correct bit. The 2104 // normal approach here uses sub to do this instead of xor. Using xor exposes 2105 // the result to other bit-twiddling opportunities. 2106 EVT LHSVT = Op.getOperand(0).getValueType(); 2107 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 2108 EVT VT = Op.getValueType(); 2109 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 2110 Op.getOperand(1)); 2111 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); 2112 } 2113 return SDValue(); 2114 } 2115 2116 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 2117 const PPCSubtarget &Subtarget) const { 2118 SDNode *Node = Op.getNode(); 2119 EVT VT = Node->getValueType(0); 2120 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2121 SDValue InChain = Node->getOperand(0); 2122 SDValue VAListPtr = Node->getOperand(1); 2123 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 2124 SDLoc dl(Node); 2125 2126 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 2127 2128 // gpr_index 2129 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2130 VAListPtr, MachinePointerInfo(SV), MVT::i8, 2131 false, false, false, 0); 2132 InChain = GprIndex.getValue(1); 2133 2134 if (VT == MVT::i64) { 2135 // Check if GprIndex is even 2136 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 2137 DAG.getConstant(1, MVT::i32)); 2138 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 2139 DAG.getConstant(0, MVT::i32), ISD::SETNE); 2140 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 2141 DAG.getConstant(1, MVT::i32)); 2142 // Align GprIndex to be even if it isn't 2143 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 2144 GprIndex); 2145 } 2146 2147 // fpr index is 1 byte after gpr 2148 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2149 DAG.getConstant(1, MVT::i32)); 2150 2151 // fpr 2152 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2153 FprPtr, MachinePointerInfo(SV), MVT::i8, 2154 false, false, false, 0); 2155 InChain = FprIndex.getValue(1); 2156 2157 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2158 DAG.getConstant(8, MVT::i32)); 2159 2160 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2161 DAG.getConstant(4, MVT::i32)); 2162 2163 // areas 2164 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 2165 MachinePointerInfo(), false, false, 2166 false, 0); 2167 InChain = OverflowArea.getValue(1); 2168 2169 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 2170 MachinePointerInfo(), false, false, 2171 false, 0); 2172 InChain = RegSaveArea.getValue(1); 2173 2174 // select overflow_area if index > 8 2175 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 2176 DAG.getConstant(8, MVT::i32), ISD::SETLT); 2177 2178 // adjustment constant gpr_index * 4/8 2179 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 2180 VT.isInteger() ? GprIndex : FprIndex, 2181 DAG.getConstant(VT.isInteger() ? 4 : 8, 2182 MVT::i32)); 2183 2184 // OurReg = RegSaveArea + RegConstant 2185 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 2186 RegConstant); 2187 2188 // Floating types are 32 bytes into RegSaveArea 2189 if (VT.isFloatingPoint()) 2190 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 2191 DAG.getConstant(32, MVT::i32)); 2192 2193 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 2194 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 2195 VT.isInteger() ? GprIndex : FprIndex, 2196 DAG.getConstant(VT == MVT::i64 ? 2 : 1, 2197 MVT::i32)); 2198 2199 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 2200 VT.isInteger() ? VAListPtr : FprPtr, 2201 MachinePointerInfo(SV), 2202 MVT::i8, false, false, 0); 2203 2204 // determine if we should load from reg_save_area or overflow_area 2205 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 2206 2207 // increase overflow_area by 4/8 if gpr/fpr > 8 2208 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 2209 DAG.getConstant(VT.isInteger() ? 4 : 8, 2210 MVT::i32)); 2211 2212 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 2213 OverflowAreaPlusN); 2214 2215 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 2216 OverflowAreaPtr, 2217 MachinePointerInfo(), 2218 MVT::i32, false, false, 0); 2219 2220 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 2221 false, false, false, 0); 2222 } 2223 2224 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, 2225 const PPCSubtarget &Subtarget) const { 2226 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 2227 2228 // We have to copy the entire va_list struct: 2229 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 2230 return DAG.getMemcpy(Op.getOperand(0), Op, 2231 Op.getOperand(1), Op.getOperand(2), 2232 DAG.getConstant(12, MVT::i32), 8, false, true, 2233 MachinePointerInfo(), MachinePointerInfo()); 2234 } 2235 2236 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 2237 SelectionDAG &DAG) const { 2238 return Op.getOperand(0); 2239 } 2240 2241 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 2242 SelectionDAG &DAG) const { 2243 SDValue Chain = Op.getOperand(0); 2244 SDValue Trmp = Op.getOperand(1); // trampoline 2245 SDValue FPtr = Op.getOperand(2); // nested function 2246 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 2247 SDLoc dl(Op); 2248 2249 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2250 bool isPPC64 = (PtrVT == MVT::i64); 2251 Type *IntPtrTy = 2252 DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( 2253 *DAG.getContext()); 2254 2255 TargetLowering::ArgListTy Args; 2256 TargetLowering::ArgListEntry Entry; 2257 2258 Entry.Ty = IntPtrTy; 2259 Entry.Node = Trmp; Args.push_back(Entry); 2260 2261 // TrampSize == (isPPC64 ? 48 : 40); 2262 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, 2263 isPPC64 ? MVT::i64 : MVT::i32); 2264 Args.push_back(Entry); 2265 2266 Entry.Node = FPtr; Args.push_back(Entry); 2267 Entry.Node = Nest; Args.push_back(Entry); 2268 2269 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 2270 TargetLowering::CallLoweringInfo CLI(DAG); 2271 CLI.setDebugLoc(dl).setChain(Chain) 2272 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 2273 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 2274 std::move(Args), 0); 2275 2276 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2277 return CallResult.second; 2278 } 2279 2280 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 2281 const PPCSubtarget &Subtarget) const { 2282 MachineFunction &MF = DAG.getMachineFunction(); 2283 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2284 2285 SDLoc dl(Op); 2286 2287 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 2288 // vastart just stores the address of the VarArgsFrameIndex slot into the 2289 // memory location argument. 2290 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2291 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2292 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2293 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2294 MachinePointerInfo(SV), 2295 false, false, 0); 2296 } 2297 2298 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 2299 // We suppose the given va_list is already allocated. 2300 // 2301 // typedef struct { 2302 // char gpr; /* index into the array of 8 GPRs 2303 // * stored in the register save area 2304 // * gpr=0 corresponds to r3, 2305 // * gpr=1 to r4, etc. 2306 // */ 2307 // char fpr; /* index into the array of 8 FPRs 2308 // * stored in the register save area 2309 // * fpr=0 corresponds to f1, 2310 // * fpr=1 to f2, etc. 2311 // */ 2312 // char *overflow_arg_area; 2313 // /* location on stack that holds 2314 // * the next overflow argument 2315 // */ 2316 // char *reg_save_area; 2317 // /* where r3:r10 and f1:f8 (if saved) 2318 // * are stored 2319 // */ 2320 // } va_list[1]; 2321 2322 2323 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); 2324 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); 2325 2326 2327 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2328 2329 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 2330 PtrVT); 2331 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2332 PtrVT); 2333 2334 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 2335 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 2336 2337 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 2338 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 2339 2340 uint64_t FPROffset = 1; 2341 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 2342 2343 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2344 2345 // Store first byte : number of int regs 2346 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 2347 Op.getOperand(1), 2348 MachinePointerInfo(SV), 2349 MVT::i8, false, false, 0); 2350 uint64_t nextOffset = FPROffset; 2351 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 2352 ConstFPROffset); 2353 2354 // Store second byte : number of float regs 2355 SDValue secondStore = 2356 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 2357 MachinePointerInfo(SV, nextOffset), MVT::i8, 2358 false, false, 0); 2359 nextOffset += StackOffset; 2360 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 2361 2362 // Store second word : arguments given on stack 2363 SDValue thirdStore = 2364 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 2365 MachinePointerInfo(SV, nextOffset), 2366 false, false, 0); 2367 nextOffset += FrameOffset; 2368 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 2369 2370 // Store third word : arguments given in registers 2371 return DAG.getStore(thirdStore, dl, FR, nextPtr, 2372 MachinePointerInfo(SV, nextOffset), 2373 false, false, 0); 2374 2375 } 2376 2377 #include "PPCGenCallingConv.inc" 2378 2379 // Function whose sole purpose is to kill compiler warnings 2380 // stemming from unused functions included from PPCGenCallingConv.inc. 2381 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 2382 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 2383 } 2384 2385 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 2386 CCValAssign::LocInfo &LocInfo, 2387 ISD::ArgFlagsTy &ArgFlags, 2388 CCState &State) { 2389 return true; 2390 } 2391 2392 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 2393 MVT &LocVT, 2394 CCValAssign::LocInfo &LocInfo, 2395 ISD::ArgFlagsTy &ArgFlags, 2396 CCState &State) { 2397 static const MCPhysReg ArgRegs[] = { 2398 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2399 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2400 }; 2401 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2402 2403 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2404 2405 // Skip one register if the first unallocated register has an even register 2406 // number and there are still argument registers available which have not been 2407 // allocated yet. RegNum is actually an index into ArgRegs, which means we 2408 // need to skip a register if RegNum is odd. 2409 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 2410 State.AllocateReg(ArgRegs[RegNum]); 2411 } 2412 2413 // Always return false here, as this function only makes sure that the first 2414 // unallocated register has an odd register number and does not actually 2415 // allocate a register for the current argument. 2416 return false; 2417 } 2418 2419 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 2420 MVT &LocVT, 2421 CCValAssign::LocInfo &LocInfo, 2422 ISD::ArgFlagsTy &ArgFlags, 2423 CCState &State) { 2424 static const MCPhysReg ArgRegs[] = { 2425 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2426 PPC::F8 2427 }; 2428 2429 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2430 2431 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2432 2433 // If there is only one Floating-point register left we need to put both f64 2434 // values of a split ppc_fp128 value on the stack. 2435 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 2436 State.AllocateReg(ArgRegs[RegNum]); 2437 } 2438 2439 // Always return false here, as this function only makes sure that the two f64 2440 // values a ppc_fp128 value is split into are both passed in registers or both 2441 // passed on the stack and does not actually allocate a register for the 2442 // current argument. 2443 return false; 2444 } 2445 2446 /// FPR - The set of FP registers that should be allocated for arguments, 2447 /// on Darwin. 2448 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, 2449 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, 2450 PPC::F11, PPC::F12, PPC::F13}; 2451 2452 /// QFPR - The set of QPX registers that should be allocated for arguments. 2453 static const MCPhysReg QFPR[] = { 2454 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, 2455 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; 2456 2457 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 2458 /// the stack. 2459 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 2460 unsigned PtrByteSize) { 2461 unsigned ArgSize = ArgVT.getStoreSize(); 2462 if (Flags.isByVal()) 2463 ArgSize = Flags.getByValSize(); 2464 2465 // Round up to multiples of the pointer size, except for array members, 2466 // which are always packed. 2467 if (!Flags.isInConsecutiveRegs()) 2468 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2469 2470 return ArgSize; 2471 } 2472 2473 /// CalculateStackSlotAlignment - Calculates the alignment of this argument 2474 /// on the stack. 2475 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 2476 ISD::ArgFlagsTy Flags, 2477 unsigned PtrByteSize) { 2478 unsigned Align = PtrByteSize; 2479 2480 // Altivec parameters are padded to a 16 byte boundary. 2481 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2482 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2483 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) 2484 Align = 16; 2485 // QPX vector types stored in double-precision are padded to a 32 byte 2486 // boundary. 2487 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) 2488 Align = 32; 2489 2490 // ByVal parameters are aligned as requested. 2491 if (Flags.isByVal()) { 2492 unsigned BVAlign = Flags.getByValAlign(); 2493 if (BVAlign > PtrByteSize) { 2494 if (BVAlign % PtrByteSize != 0) 2495 llvm_unreachable( 2496 "ByVal alignment is not a multiple of the pointer size"); 2497 2498 Align = BVAlign; 2499 } 2500 } 2501 2502 // Array members are always packed to their original alignment. 2503 if (Flags.isInConsecutiveRegs()) { 2504 // If the array member was split into multiple registers, the first 2505 // needs to be aligned to the size of the full type. (Except for 2506 // ppcf128, which is only aligned as its f64 components.) 2507 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 2508 Align = OrigVT.getStoreSize(); 2509 else 2510 Align = ArgVT.getStoreSize(); 2511 } 2512 2513 return Align; 2514 } 2515 2516 /// CalculateStackSlotUsed - Return whether this argument will use its 2517 /// stack slot (instead of being passed in registers). ArgOffset, 2518 /// AvailableFPRs, and AvailableVRs must hold the current argument 2519 /// position, and will be updated to account for this argument. 2520 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 2521 ISD::ArgFlagsTy Flags, 2522 unsigned PtrByteSize, 2523 unsigned LinkageSize, 2524 unsigned ParamAreaSize, 2525 unsigned &ArgOffset, 2526 unsigned &AvailableFPRs, 2527 unsigned &AvailableVRs, bool HasQPX) { 2528 bool UseMemory = false; 2529 2530 // Respect alignment of argument on the stack. 2531 unsigned Align = 2532 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 2533 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2534 // If there's no space left in the argument save area, we must 2535 // use memory (this check also catches zero-sized arguments). 2536 if (ArgOffset >= LinkageSize + ParamAreaSize) 2537 UseMemory = true; 2538 2539 // Allocate argument on the stack. 2540 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2541 if (Flags.isInConsecutiveRegsLast()) 2542 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2543 // If we overran the argument save area, we must use memory 2544 // (this check catches arguments passed partially in memory) 2545 if (ArgOffset > LinkageSize + ParamAreaSize) 2546 UseMemory = true; 2547 2548 // However, if the argument is actually passed in an FPR or a VR, 2549 // we don't use memory after all. 2550 if (!Flags.isByVal()) { 2551 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || 2552 // QPX registers overlap with the scalar FP registers. 2553 (HasQPX && (ArgVT == MVT::v4f32 || 2554 ArgVT == MVT::v4f64 || 2555 ArgVT == MVT::v4i1))) 2556 if (AvailableFPRs > 0) { 2557 --AvailableFPRs; 2558 return false; 2559 } 2560 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2561 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2562 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) 2563 if (AvailableVRs > 0) { 2564 --AvailableVRs; 2565 return false; 2566 } 2567 } 2568 2569 return UseMemory; 2570 } 2571 2572 /// EnsureStackAlignment - Round stack frame size up from NumBytes to 2573 /// ensure minimum alignment required for target. 2574 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, 2575 unsigned NumBytes) { 2576 unsigned TargetAlign = Lowering->getStackAlignment(); 2577 unsigned AlignMask = TargetAlign - 1; 2578 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2579 return NumBytes; 2580 } 2581 2582 SDValue 2583 PPCTargetLowering::LowerFormalArguments(SDValue Chain, 2584 CallingConv::ID CallConv, bool isVarArg, 2585 const SmallVectorImpl<ISD::InputArg> 2586 &Ins, 2587 SDLoc dl, SelectionDAG &DAG, 2588 SmallVectorImpl<SDValue> &InVals) 2589 const { 2590 if (Subtarget.isSVR4ABI()) { 2591 if (Subtarget.isPPC64()) 2592 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 2593 dl, DAG, InVals); 2594 else 2595 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 2596 dl, DAG, InVals); 2597 } else { 2598 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 2599 dl, DAG, InVals); 2600 } 2601 } 2602 2603 SDValue 2604 PPCTargetLowering::LowerFormalArguments_32SVR4( 2605 SDValue Chain, 2606 CallingConv::ID CallConv, bool isVarArg, 2607 const SmallVectorImpl<ISD::InputArg> 2608 &Ins, 2609 SDLoc dl, SelectionDAG &DAG, 2610 SmallVectorImpl<SDValue> &InVals) const { 2611 2612 // 32-bit SVR4 ABI Stack Frame Layout: 2613 // +-----------------------------------+ 2614 // +--> | Back chain | 2615 // | +-----------------------------------+ 2616 // | | Floating-point register save area | 2617 // | +-----------------------------------+ 2618 // | | General register save area | 2619 // | +-----------------------------------+ 2620 // | | CR save word | 2621 // | +-----------------------------------+ 2622 // | | VRSAVE save word | 2623 // | +-----------------------------------+ 2624 // | | Alignment padding | 2625 // | +-----------------------------------+ 2626 // | | Vector register save area | 2627 // | +-----------------------------------+ 2628 // | | Local variable space | 2629 // | +-----------------------------------+ 2630 // | | Parameter list area | 2631 // | +-----------------------------------+ 2632 // | | LR save word | 2633 // | +-----------------------------------+ 2634 // SP--> +--- | Back chain | 2635 // +-----------------------------------+ 2636 // 2637 // Specifications: 2638 // System V Application Binary Interface PowerPC Processor Supplement 2639 // AltiVec Technology Programming Interface Manual 2640 2641 MachineFunction &MF = DAG.getMachineFunction(); 2642 MachineFrameInfo *MFI = MF.getFrameInfo(); 2643 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2644 2645 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2646 // Potential tail calls could cause overwriting of argument stack slots. 2647 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2648 (CallConv == CallingConv::Fast)); 2649 unsigned PtrByteSize = 4; 2650 2651 // Assign locations to all of the incoming arguments. 2652 SmallVector<CCValAssign, 16> ArgLocs; 2653 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 2654 *DAG.getContext()); 2655 2656 // Reserve space for the linkage area on the stack. 2657 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 2658 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 2659 2660 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 2661 2662 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2663 CCValAssign &VA = ArgLocs[i]; 2664 2665 // Arguments stored in registers. 2666 if (VA.isRegLoc()) { 2667 const TargetRegisterClass *RC; 2668 EVT ValVT = VA.getValVT(); 2669 2670 switch (ValVT.getSimpleVT().SimpleTy) { 2671 default: 2672 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 2673 case MVT::i1: 2674 case MVT::i32: 2675 RC = &PPC::GPRCRegClass; 2676 break; 2677 case MVT::f32: 2678 RC = &PPC::F4RCRegClass; 2679 break; 2680 case MVT::f64: 2681 if (Subtarget.hasVSX()) 2682 RC = &PPC::VSFRCRegClass; 2683 else 2684 RC = &PPC::F8RCRegClass; 2685 break; 2686 case MVT::v16i8: 2687 case MVT::v8i16: 2688 case MVT::v4i32: 2689 RC = &PPC::VRRCRegClass; 2690 break; 2691 case MVT::v4f32: 2692 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; 2693 break; 2694 case MVT::v2f64: 2695 case MVT::v2i64: 2696 RC = &PPC::VSHRCRegClass; 2697 break; 2698 case MVT::v4f64: 2699 RC = &PPC::QFRCRegClass; 2700 break; 2701 case MVT::v4i1: 2702 RC = &PPC::QBRCRegClass; 2703 break; 2704 } 2705 2706 // Transform the arguments stored in physical registers into virtual ones. 2707 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2708 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 2709 ValVT == MVT::i1 ? MVT::i32 : ValVT); 2710 2711 if (ValVT == MVT::i1) 2712 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 2713 2714 InVals.push_back(ArgValue); 2715 } else { 2716 // Argument stored in memory. 2717 assert(VA.isMemLoc()); 2718 2719 unsigned ArgSize = VA.getLocVT().getStoreSize(); 2720 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 2721 isImmutable); 2722 2723 // Create load nodes to retrieve arguments from the stack. 2724 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2725 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2726 MachinePointerInfo(), 2727 false, false, false, 0)); 2728 } 2729 } 2730 2731 // Assign locations to all of the incoming aggregate by value arguments. 2732 // Aggregates passed by value are stored in the local variable space of the 2733 // caller's stack frame, right above the parameter list area. 2734 SmallVector<CCValAssign, 16> ByValArgLocs; 2735 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2736 ByValArgLocs, *DAG.getContext()); 2737 2738 // Reserve stack space for the allocations in CCInfo. 2739 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 2740 2741 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 2742 2743 // Area that is at least reserved in the caller of this function. 2744 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 2745 MinReservedArea = std::max(MinReservedArea, LinkageSize); 2746 2747 // Set the size that is at least reserved in caller of this function. Tail 2748 // call optimized function's reserved stack space needs to be aligned so that 2749 // taking the difference between two stack areas will result in an aligned 2750 // stack. 2751 MinReservedArea = 2752 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 2753 FuncInfo->setMinReservedArea(MinReservedArea); 2754 2755 SmallVector<SDValue, 8> MemOps; 2756 2757 // If the function takes variable number of arguments, make a frame index for 2758 // the start of the first vararg value... for expansion of llvm.va_start. 2759 if (isVarArg) { 2760 static const MCPhysReg GPArgRegs[] = { 2761 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2762 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2763 }; 2764 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 2765 2766 static const MCPhysReg FPArgRegs[] = { 2767 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2768 PPC::F8 2769 }; 2770 unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 2771 if (DisablePPCFloatInVariadic) 2772 NumFPArgRegs = 0; 2773 2774 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); 2775 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); 2776 2777 // Make room for NumGPArgRegs and NumFPArgRegs. 2778 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 2779 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; 2780 2781 FuncInfo->setVarArgsStackOffset( 2782 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2783 CCInfo.getNextStackOffset(), true)); 2784 2785 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 2786 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2787 2788 // The fixed integer arguments of a variadic function are stored to the 2789 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 2790 // the result of va_next. 2791 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 2792 // Get an existing live-in vreg, or add a new one. 2793 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 2794 if (!VReg) 2795 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 2796 2797 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2798 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2799 MachinePointerInfo(), false, false, 0); 2800 MemOps.push_back(Store); 2801 // Increment the address by four for the next argument to store 2802 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2803 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2804 } 2805 2806 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 2807 // is set. 2808 // The double arguments are stored to the VarArgsFrameIndex 2809 // on the stack. 2810 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 2811 // Get an existing live-in vreg, or add a new one. 2812 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 2813 if (!VReg) 2814 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 2815 2816 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 2817 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2818 MachinePointerInfo(), false, false, 0); 2819 MemOps.push_back(Store); 2820 // Increment the address by eight for the next argument to store 2821 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, 2822 PtrVT); 2823 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2824 } 2825 } 2826 2827 if (!MemOps.empty()) 2828 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 2829 2830 return Chain; 2831 } 2832 2833 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2834 // value to MVT::i64 and then truncate to the correct register size. 2835 SDValue 2836 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 2837 SelectionDAG &DAG, SDValue ArgVal, 2838 SDLoc dl) const { 2839 if (Flags.isSExt()) 2840 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 2841 DAG.getValueType(ObjectVT)); 2842 else if (Flags.isZExt()) 2843 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 2844 DAG.getValueType(ObjectVT)); 2845 2846 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 2847 } 2848 2849 SDValue 2850 PPCTargetLowering::LowerFormalArguments_64SVR4( 2851 SDValue Chain, 2852 CallingConv::ID CallConv, bool isVarArg, 2853 const SmallVectorImpl<ISD::InputArg> 2854 &Ins, 2855 SDLoc dl, SelectionDAG &DAG, 2856 SmallVectorImpl<SDValue> &InVals) const { 2857 // TODO: add description of PPC stack frame format, or at least some docs. 2858 // 2859 bool isELFv2ABI = Subtarget.isELFv2ABI(); 2860 bool isLittleEndian = Subtarget.isLittleEndian(); 2861 MachineFunction &MF = DAG.getMachineFunction(); 2862 MachineFrameInfo *MFI = MF.getFrameInfo(); 2863 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2864 2865 assert(!(CallConv == CallingConv::Fast && isVarArg) && 2866 "fastcc not supported on varargs functions"); 2867 2868 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2869 // Potential tail calls could cause overwriting of argument stack slots. 2870 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2871 (CallConv == CallingConv::Fast)); 2872 unsigned PtrByteSize = 8; 2873 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 2874 2875 static const MCPhysReg GPR[] = { 2876 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2877 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2878 }; 2879 static const MCPhysReg VR[] = { 2880 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2881 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2882 }; 2883 static const MCPhysReg VSRH[] = { 2884 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 2885 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 2886 }; 2887 2888 const unsigned Num_GPR_Regs = array_lengthof(GPR); 2889 const unsigned Num_FPR_Regs = 13; 2890 const unsigned Num_VR_Regs = array_lengthof(VR); 2891 const unsigned Num_QFPR_Regs = Num_FPR_Regs; 2892 2893 // Do a first pass over the arguments to determine whether the ABI 2894 // guarantees that our caller has allocated the parameter save area 2895 // on its stack frame. In the ELFv1 ABI, this is always the case; 2896 // in the ELFv2 ABI, it is true if this is a vararg function or if 2897 // any parameter is located in a stack slot. 2898 2899 bool HasParameterArea = !isELFv2ABI || isVarArg; 2900 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 2901 unsigned NumBytes = LinkageSize; 2902 unsigned AvailableFPRs = Num_FPR_Regs; 2903 unsigned AvailableVRs = Num_VR_Regs; 2904 for (unsigned i = 0, e = Ins.size(); i != e; ++i) 2905 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 2906 PtrByteSize, LinkageSize, ParamAreaSize, 2907 NumBytes, AvailableFPRs, AvailableVRs, 2908 Subtarget.hasQPX())) 2909 HasParameterArea = true; 2910 2911 // Add DAG nodes to load the arguments or copy them out of registers. On 2912 // entry to a function on PPC, the arguments start after the linkage area, 2913 // although the first ones are often in registers. 2914 2915 unsigned ArgOffset = LinkageSize; 2916 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2917 unsigned &QFPR_idx = FPR_idx; 2918 SmallVector<SDValue, 8> MemOps; 2919 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2920 unsigned CurArgIdx = 0; 2921 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2922 SDValue ArgVal; 2923 bool needsLoad = false; 2924 EVT ObjectVT = Ins[ArgNo].VT; 2925 EVT OrigVT = Ins[ArgNo].ArgVT; 2926 unsigned ObjSize = ObjectVT.getStoreSize(); 2927 unsigned ArgSize = ObjSize; 2928 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2929 if (Ins[ArgNo].isOrigArg()) { 2930 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 2931 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 2932 } 2933 // We re-align the argument offset for each argument, except when using the 2934 // fast calling convention, when we need to make sure we do that only when 2935 // we'll actually use a stack slot. 2936 unsigned CurArgOffset, Align; 2937 auto ComputeArgOffset = [&]() { 2938 /* Respect alignment of argument on the stack. */ 2939 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); 2940 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2941 CurArgOffset = ArgOffset; 2942 }; 2943 2944 if (CallConv != CallingConv::Fast) { 2945 ComputeArgOffset(); 2946 2947 /* Compute GPR index associated with argument offset. */ 2948 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 2949 GPR_idx = std::min(GPR_idx, Num_GPR_Regs); 2950 } 2951 2952 // FIXME the codegen can be much improved in some cases. 2953 // We do not have to keep everything in memory. 2954 if (Flags.isByVal()) { 2955 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 2956 2957 if (CallConv == CallingConv::Fast) 2958 ComputeArgOffset(); 2959 2960 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2961 ObjSize = Flags.getByValSize(); 2962 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2963 // Empty aggregate parameters do not take up registers. Examples: 2964 // struct { } a; 2965 // union { } b; 2966 // int c[0]; 2967 // etc. However, we have to provide a place-holder in InVals, so 2968 // pretend we have an 8-byte item at the current address for that 2969 // purpose. 2970 if (!ObjSize) { 2971 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2972 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2973 InVals.push_back(FIN); 2974 continue; 2975 } 2976 2977 // Create a stack object covering all stack doublewords occupied 2978 // by the argument. If the argument is (fully or partially) on 2979 // the stack, or if the argument is fully in registers but the 2980 // caller has allocated the parameter save anyway, we can refer 2981 // directly to the caller's stack frame. Otherwise, create a 2982 // local copy in our own frame. 2983 int FI; 2984 if (HasParameterArea || 2985 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) 2986 FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true); 2987 else 2988 FI = MFI->CreateStackObject(ArgSize, Align, false); 2989 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2990 2991 // Handle aggregates smaller than 8 bytes. 2992 if (ObjSize < PtrByteSize) { 2993 // The value of the object is its address, which differs from the 2994 // address of the enclosing doubleword on big-endian systems. 2995 SDValue Arg = FIN; 2996 if (!isLittleEndian) { 2997 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT); 2998 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); 2999 } 3000 InVals.push_back(Arg); 3001 3002 if (GPR_idx != Num_GPR_Regs) { 3003 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3004 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3005 SDValue Store; 3006 3007 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 3008 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 3009 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 3010 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, 3011 MachinePointerInfo(FuncArg), 3012 ObjType, false, false, 0); 3013 } else { 3014 // For sizes that don't fit a truncating store (3, 5, 6, 7), 3015 // store the whole register as-is to the parameter save area 3016 // slot. 3017 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3018 MachinePointerInfo(FuncArg), 3019 false, false, 0); 3020 } 3021 3022 MemOps.push_back(Store); 3023 } 3024 // Whether we copied from a register or not, advance the offset 3025 // into the parameter save area by a full doubleword. 3026 ArgOffset += PtrByteSize; 3027 continue; 3028 } 3029 3030 // The value of the object is its address, which is the address of 3031 // its first stack doubleword. 3032 InVals.push_back(FIN); 3033 3034 // Store whatever pieces of the object are in registers to memory. 3035 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3036 if (GPR_idx == Num_GPR_Regs) 3037 break; 3038 3039 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3040 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3041 SDValue Addr = FIN; 3042 if (j) { 3043 SDValue Off = DAG.getConstant(j, PtrVT); 3044 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); 3045 } 3046 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, 3047 MachinePointerInfo(FuncArg, j), 3048 false, false, 0); 3049 MemOps.push_back(Store); 3050 ++GPR_idx; 3051 } 3052 ArgOffset += ArgSize; 3053 continue; 3054 } 3055 3056 switch (ObjectVT.getSimpleVT().SimpleTy) { 3057 default: llvm_unreachable("Unhandled argument type!"); 3058 case MVT::i1: 3059 case MVT::i32: 3060 case MVT::i64: 3061 // These can be scalar arguments or elements of an integer array type 3062 // passed directly. Clang may use those instead of "byval" aggregate 3063 // types to avoid forcing arguments to memory unnecessarily. 3064 if (GPR_idx != Num_GPR_Regs) { 3065 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3066 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3067 3068 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3069 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3070 // value to MVT::i64 and then truncate to the correct register size. 3071 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3072 } else { 3073 if (CallConv == CallingConv::Fast) 3074 ComputeArgOffset(); 3075 3076 needsLoad = true; 3077 ArgSize = PtrByteSize; 3078 } 3079 if (CallConv != CallingConv::Fast || needsLoad) 3080 ArgOffset += 8; 3081 break; 3082 3083 case MVT::f32: 3084 case MVT::f64: 3085 // These can be scalar arguments or elements of a float array type 3086 // passed directly. The latter are used to implement ELFv2 homogenous 3087 // float aggregates. 3088 if (FPR_idx != Num_FPR_Regs) { 3089 unsigned VReg; 3090 3091 if (ObjectVT == MVT::f32) 3092 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 3093 else 3094 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() 3095 ? &PPC::VSFRCRegClass 3096 : &PPC::F8RCRegClass); 3097 3098 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3099 ++FPR_idx; 3100 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { 3101 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 3102 // once we support fp <-> gpr moves. 3103 3104 // This can only ever happen in the presence of f32 array types, 3105 // since otherwise we never run out of FPRs before running out 3106 // of GPRs. 3107 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3108 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3109 3110 if (ObjectVT == MVT::f32) { 3111 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) 3112 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, 3113 DAG.getConstant(32, MVT::i32)); 3114 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 3115 } 3116 3117 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); 3118 } else { 3119 if (CallConv == CallingConv::Fast) 3120 ComputeArgOffset(); 3121 3122 needsLoad = true; 3123 } 3124 3125 // When passing an array of floats, the array occupies consecutive 3126 // space in the argument area; only round up to the next doubleword 3127 // at the end of the array. Otherwise, each float takes 8 bytes. 3128 if (CallConv != CallingConv::Fast || needsLoad) { 3129 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; 3130 ArgOffset += ArgSize; 3131 if (Flags.isInConsecutiveRegsLast()) 3132 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3133 } 3134 break; 3135 case MVT::v4f32: 3136 case MVT::v4i32: 3137 case MVT::v8i16: 3138 case MVT::v16i8: 3139 case MVT::v2f64: 3140 case MVT::v2i64: 3141 if (!Subtarget.hasQPX()) { 3142 // These can be scalar arguments or elements of a vector array type 3143 // passed directly. The latter are used to implement ELFv2 homogenous 3144 // vector aggregates. 3145 if (VR_idx != Num_VR_Regs) { 3146 unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? 3147 MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : 3148 MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3149 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3150 ++VR_idx; 3151 } else { 3152 if (CallConv == CallingConv::Fast) 3153 ComputeArgOffset(); 3154 3155 needsLoad = true; 3156 } 3157 if (CallConv != CallingConv::Fast || needsLoad) 3158 ArgOffset += 16; 3159 break; 3160 } // not QPX 3161 3162 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && 3163 "Invalid QPX parameter type"); 3164 /* fall through */ 3165 3166 case MVT::v4f64: 3167 case MVT::v4i1: 3168 // QPX vectors are treated like their scalar floating-point subregisters 3169 // (except that they're larger). 3170 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; 3171 if (QFPR_idx != Num_QFPR_Regs) { 3172 const TargetRegisterClass *RC; 3173 switch (ObjectVT.getSimpleVT().SimpleTy) { 3174 case MVT::v4f64: RC = &PPC::QFRCRegClass; break; 3175 case MVT::v4f32: RC = &PPC::QSRCRegClass; break; 3176 default: RC = &PPC::QBRCRegClass; break; 3177 } 3178 3179 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); 3180 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3181 ++QFPR_idx; 3182 } else { 3183 if (CallConv == CallingConv::Fast) 3184 ComputeArgOffset(); 3185 needsLoad = true; 3186 } 3187 if (CallConv != CallingConv::Fast || needsLoad) 3188 ArgOffset += Sz; 3189 break; 3190 } 3191 3192 // We need to load the argument to a virtual register if we determined 3193 // above that we ran out of physical registers of the appropriate type. 3194 if (needsLoad) { 3195 if (ObjSize < ArgSize && !isLittleEndian) 3196 CurArgOffset += ArgSize - ObjSize; 3197 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable); 3198 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3199 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 3200 false, false, false, 0); 3201 } 3202 3203 InVals.push_back(ArgVal); 3204 } 3205 3206 // Area that is at least reserved in the caller of this function. 3207 unsigned MinReservedArea; 3208 if (HasParameterArea) 3209 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); 3210 else 3211 MinReservedArea = LinkageSize; 3212 3213 // Set the size that is at least reserved in caller of this function. Tail 3214 // call optimized functions' reserved stack space needs to be aligned so that 3215 // taking the difference between two stack areas will result in an aligned 3216 // stack. 3217 MinReservedArea = 3218 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3219 FuncInfo->setMinReservedArea(MinReservedArea); 3220 3221 // If the function takes variable number of arguments, make a frame index for 3222 // the start of the first vararg value... for expansion of llvm.va_start. 3223 if (isVarArg) { 3224 int Depth = ArgOffset; 3225 3226 FuncInfo->setVarArgsFrameIndex( 3227 MFI->CreateFixedObject(PtrByteSize, Depth, true)); 3228 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3229 3230 // If this function is vararg, store any remaining integer argument regs 3231 // to their spots on the stack so that they may be loaded by deferencing the 3232 // result of va_next. 3233 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3234 GPR_idx < Num_GPR_Regs; ++GPR_idx) { 3235 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3236 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3237 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3238 MachinePointerInfo(), false, false, 0); 3239 MemOps.push_back(Store); 3240 // Increment the address by four for the next argument to store 3241 SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT); 3242 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3243 } 3244 } 3245 3246 if (!MemOps.empty()) 3247 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3248 3249 return Chain; 3250 } 3251 3252 SDValue 3253 PPCTargetLowering::LowerFormalArguments_Darwin( 3254 SDValue Chain, 3255 CallingConv::ID CallConv, bool isVarArg, 3256 const SmallVectorImpl<ISD::InputArg> 3257 &Ins, 3258 SDLoc dl, SelectionDAG &DAG, 3259 SmallVectorImpl<SDValue> &InVals) const { 3260 // TODO: add description of PPC stack frame format, or at least some docs. 3261 // 3262 MachineFunction &MF = DAG.getMachineFunction(); 3263 MachineFrameInfo *MFI = MF.getFrameInfo(); 3264 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3265 3266 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3267 bool isPPC64 = PtrVT == MVT::i64; 3268 // Potential tail calls could cause overwriting of argument stack slots. 3269 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3270 (CallConv == CallingConv::Fast)); 3271 unsigned PtrByteSize = isPPC64 ? 8 : 4; 3272 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3273 unsigned ArgOffset = LinkageSize; 3274 // Area that is at least reserved in caller of this function. 3275 unsigned MinReservedArea = ArgOffset; 3276 3277 static const MCPhysReg GPR_32[] = { // 32-bit registers. 3278 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3279 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3280 }; 3281 static const MCPhysReg GPR_64[] = { // 64-bit registers. 3282 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3283 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3284 }; 3285 static const MCPhysReg VR[] = { 3286 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3287 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3288 }; 3289 3290 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 3291 const unsigned Num_FPR_Regs = 13; 3292 const unsigned Num_VR_Regs = array_lengthof( VR); 3293 3294 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3295 3296 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 3297 3298 // In 32-bit non-varargs functions, the stack space for vectors is after the 3299 // stack space for non-vectors. We do not use this space unless we have 3300 // too many vectors to fit in registers, something that only occurs in 3301 // constructed examples:), but we have to walk the arglist to figure 3302 // that out...for the pathological case, compute VecArgOffset as the 3303 // start of the vector parameter area. Computing VecArgOffset is the 3304 // entire point of the following loop. 3305 unsigned VecArgOffset = ArgOffset; 3306 if (!isVarArg && !isPPC64) { 3307 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 3308 ++ArgNo) { 3309 EVT ObjectVT = Ins[ArgNo].VT; 3310 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3311 3312 if (Flags.isByVal()) { 3313 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 3314 unsigned ObjSize = Flags.getByValSize(); 3315 unsigned ArgSize = 3316 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3317 VecArgOffset += ArgSize; 3318 continue; 3319 } 3320 3321 switch(ObjectVT.getSimpleVT().SimpleTy) { 3322 default: llvm_unreachable("Unhandled argument type!"); 3323 case MVT::i1: 3324 case MVT::i32: 3325 case MVT::f32: 3326 VecArgOffset += 4; 3327 break; 3328 case MVT::i64: // PPC64 3329 case MVT::f64: 3330 // FIXME: We are guaranteed to be !isPPC64 at this point. 3331 // Does MVT::i64 apply? 3332 VecArgOffset += 8; 3333 break; 3334 case MVT::v4f32: 3335 case MVT::v4i32: 3336 case MVT::v8i16: 3337 case MVT::v16i8: 3338 // Nothing to do, we're only looking at Nonvector args here. 3339 break; 3340 } 3341 } 3342 } 3343 // We've found where the vector parameter area in memory is. Skip the 3344 // first 12 parameters; these don't use that memory. 3345 VecArgOffset = ((VecArgOffset+15)/16)*16; 3346 VecArgOffset += 12*16; 3347 3348 // Add DAG nodes to load the arguments or copy them out of registers. On 3349 // entry to a function on PPC, the arguments start after the linkage area, 3350 // although the first ones are often in registers. 3351 3352 SmallVector<SDValue, 8> MemOps; 3353 unsigned nAltivecParamsAtEnd = 0; 3354 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3355 unsigned CurArgIdx = 0; 3356 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3357 SDValue ArgVal; 3358 bool needsLoad = false; 3359 EVT ObjectVT = Ins[ArgNo].VT; 3360 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 3361 unsigned ArgSize = ObjSize; 3362 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3363 if (Ins[ArgNo].isOrigArg()) { 3364 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3365 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3366 } 3367 unsigned CurArgOffset = ArgOffset; 3368 3369 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 3370 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 3371 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 3372 if (isVarArg || isPPC64) { 3373 MinReservedArea = ((MinReservedArea+15)/16)*16; 3374 MinReservedArea += CalculateStackSlotSize(ObjectVT, 3375 Flags, 3376 PtrByteSize); 3377 } else nAltivecParamsAtEnd++; 3378 } else 3379 // Calculate min reserved area. 3380 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 3381 Flags, 3382 PtrByteSize); 3383 3384 // FIXME the codegen can be much improved in some cases. 3385 // We do not have to keep everything in memory. 3386 if (Flags.isByVal()) { 3387 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3388 3389 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3390 ObjSize = Flags.getByValSize(); 3391 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3392 // Objects of size 1 and 2 are right justified, everything else is 3393 // left justified. This means the memory address is adjusted forwards. 3394 if (ObjSize==1 || ObjSize==2) { 3395 CurArgOffset = CurArgOffset + (4 - ObjSize); 3396 } 3397 // The value of the object is its address. 3398 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true); 3399 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3400 InVals.push_back(FIN); 3401 if (ObjSize==1 || ObjSize==2) { 3402 if (GPR_idx != Num_GPR_Regs) { 3403 unsigned VReg; 3404 if (isPPC64) 3405 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3406 else 3407 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3408 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3409 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 3410 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 3411 MachinePointerInfo(FuncArg), 3412 ObjType, false, false, 0); 3413 MemOps.push_back(Store); 3414 ++GPR_idx; 3415 } 3416 3417 ArgOffset += PtrByteSize; 3418 3419 continue; 3420 } 3421 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3422 // Store whatever pieces of the object are in registers 3423 // to memory. ArgOffset will be the address of the beginning 3424 // of the object. 3425 if (GPR_idx != Num_GPR_Regs) { 3426 unsigned VReg; 3427 if (isPPC64) 3428 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3429 else 3430 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3431 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 3432 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3433 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3434 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3435 MachinePointerInfo(FuncArg, j), 3436 false, false, 0); 3437 MemOps.push_back(Store); 3438 ++GPR_idx; 3439 ArgOffset += PtrByteSize; 3440 } else { 3441 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 3442 break; 3443 } 3444 } 3445 continue; 3446 } 3447 3448 switch (ObjectVT.getSimpleVT().SimpleTy) { 3449 default: llvm_unreachable("Unhandled argument type!"); 3450 case MVT::i1: 3451 case MVT::i32: 3452 if (!isPPC64) { 3453 if (GPR_idx != Num_GPR_Regs) { 3454 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3455 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 3456 3457 if (ObjectVT == MVT::i1) 3458 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 3459 3460 ++GPR_idx; 3461 } else { 3462 needsLoad = true; 3463 ArgSize = PtrByteSize; 3464 } 3465 // All int arguments reserve stack space in the Darwin ABI. 3466 ArgOffset += PtrByteSize; 3467 break; 3468 } 3469 // FALLTHROUGH 3470 case MVT::i64: // PPC64 3471 if (GPR_idx != Num_GPR_Regs) { 3472 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3473 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3474 3475 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3476 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3477 // value to MVT::i64 and then truncate to the correct register size. 3478 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3479 3480 ++GPR_idx; 3481 } else { 3482 needsLoad = true; 3483 ArgSize = PtrByteSize; 3484 } 3485 // All int arguments reserve stack space in the Darwin ABI. 3486 ArgOffset += 8; 3487 break; 3488 3489 case MVT::f32: 3490 case MVT::f64: 3491 // Every 4 bytes of argument space consumes one of the GPRs available for 3492 // argument passing. 3493 if (GPR_idx != Num_GPR_Regs) { 3494 ++GPR_idx; 3495 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 3496 ++GPR_idx; 3497 } 3498 if (FPR_idx != Num_FPR_Regs) { 3499 unsigned VReg; 3500 3501 if (ObjectVT == MVT::f32) 3502 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 3503 else 3504 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 3505 3506 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3507 ++FPR_idx; 3508 } else { 3509 needsLoad = true; 3510 } 3511 3512 // All FP arguments reserve stack space in the Darwin ABI. 3513 ArgOffset += isPPC64 ? 8 : ObjSize; 3514 break; 3515 case MVT::v4f32: 3516 case MVT::v4i32: 3517 case MVT::v8i16: 3518 case MVT::v16i8: 3519 // Note that vector arguments in registers don't reserve stack space, 3520 // except in varargs functions. 3521 if (VR_idx != Num_VR_Regs) { 3522 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3523 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3524 if (isVarArg) { 3525 while ((ArgOffset % 16) != 0) { 3526 ArgOffset += PtrByteSize; 3527 if (GPR_idx != Num_GPR_Regs) 3528 GPR_idx++; 3529 } 3530 ArgOffset += 16; 3531 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 3532 } 3533 ++VR_idx; 3534 } else { 3535 if (!isVarArg && !isPPC64) { 3536 // Vectors go after all the nonvectors. 3537 CurArgOffset = VecArgOffset; 3538 VecArgOffset += 16; 3539 } else { 3540 // Vectors are aligned. 3541 ArgOffset = ((ArgOffset+15)/16)*16; 3542 CurArgOffset = ArgOffset; 3543 ArgOffset += 16; 3544 } 3545 needsLoad = true; 3546 } 3547 break; 3548 } 3549 3550 // We need to load the argument to a virtual register if we determined above 3551 // that we ran out of physical registers of the appropriate type. 3552 if (needsLoad) { 3553 int FI = MFI->CreateFixedObject(ObjSize, 3554 CurArgOffset + (ArgSize - ObjSize), 3555 isImmutable); 3556 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3557 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 3558 false, false, false, 0); 3559 } 3560 3561 InVals.push_back(ArgVal); 3562 } 3563 3564 // Allow for Altivec parameters at the end, if needed. 3565 if (nAltivecParamsAtEnd) { 3566 MinReservedArea = ((MinReservedArea+15)/16)*16; 3567 MinReservedArea += 16*nAltivecParamsAtEnd; 3568 } 3569 3570 // Area that is at least reserved in the caller of this function. 3571 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 3572 3573 // Set the size that is at least reserved in caller of this function. Tail 3574 // call optimized functions' reserved stack space needs to be aligned so that 3575 // taking the difference between two stack areas will result in an aligned 3576 // stack. 3577 MinReservedArea = 3578 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3579 FuncInfo->setMinReservedArea(MinReservedArea); 3580 3581 // If the function takes variable number of arguments, make a frame index for 3582 // the start of the first vararg value... for expansion of llvm.va_start. 3583 if (isVarArg) { 3584 int Depth = ArgOffset; 3585 3586 FuncInfo->setVarArgsFrameIndex( 3587 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 3588 Depth, true)); 3589 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3590 3591 // If this function is vararg, store any remaining integer argument regs 3592 // to their spots on the stack so that they may be loaded by deferencing the 3593 // result of va_next. 3594 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 3595 unsigned VReg; 3596 3597 if (isPPC64) 3598 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3599 else 3600 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3601 3602 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3603 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3604 MachinePointerInfo(), false, false, 0); 3605 MemOps.push_back(Store); 3606 // Increment the address by four for the next argument to store 3607 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 3608 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3609 } 3610 } 3611 3612 if (!MemOps.empty()) 3613 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3614 3615 return Chain; 3616 } 3617 3618 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 3619 /// adjusted to accommodate the arguments for the tailcall. 3620 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 3621 unsigned ParamSize) { 3622 3623 if (!isTailCall) return 0; 3624 3625 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 3626 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 3627 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 3628 // Remember only if the new adjustement is bigger. 3629 if (SPDiff < FI->getTailCallSPDelta()) 3630 FI->setTailCallSPDelta(SPDiff); 3631 3632 return SPDiff; 3633 } 3634 3635 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 3636 /// for tail call optimization. Targets which want to do tail call 3637 /// optimization should implement this function. 3638 bool 3639 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 3640 CallingConv::ID CalleeCC, 3641 bool isVarArg, 3642 const SmallVectorImpl<ISD::InputArg> &Ins, 3643 SelectionDAG& DAG) const { 3644 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 3645 return false; 3646 3647 // Variable argument functions are not supported. 3648 if (isVarArg) 3649 return false; 3650 3651 MachineFunction &MF = DAG.getMachineFunction(); 3652 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 3653 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 3654 // Functions containing by val parameters are not supported. 3655 for (unsigned i = 0; i != Ins.size(); i++) { 3656 ISD::ArgFlagsTy Flags = Ins[i].Flags; 3657 if (Flags.isByVal()) return false; 3658 } 3659 3660 // Non-PIC/GOT tail calls are supported. 3661 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 3662 return true; 3663 3664 // At the moment we can only do local tail calls (in same module, hidden 3665 // or protected) if we are generating PIC. 3666 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3667 return G->getGlobal()->hasHiddenVisibility() 3668 || G->getGlobal()->hasProtectedVisibility(); 3669 } 3670 3671 return false; 3672 } 3673 3674 /// isCallCompatibleAddress - Return the immediate to use if the specified 3675 /// 32-bit value is representable in the immediate field of a BxA instruction. 3676 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 3677 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 3678 if (!C) return nullptr; 3679 3680 int Addr = C->getZExtValue(); 3681 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 3682 SignExtend32<26>(Addr) != Addr) 3683 return nullptr; // Top 6 bits have to be sext of immediate. 3684 3685 return DAG.getConstant((int)C->getZExtValue() >> 2, 3686 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 3687 } 3688 3689 namespace { 3690 3691 struct TailCallArgumentInfo { 3692 SDValue Arg; 3693 SDValue FrameIdxOp; 3694 int FrameIdx; 3695 3696 TailCallArgumentInfo() : FrameIdx(0) {} 3697 }; 3698 3699 } 3700 3701 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 3702 static void 3703 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 3704 SDValue Chain, 3705 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 3706 SmallVectorImpl<SDValue> &MemOpChains, 3707 SDLoc dl) { 3708 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 3709 SDValue Arg = TailCallArgs[i].Arg; 3710 SDValue FIN = TailCallArgs[i].FrameIdxOp; 3711 int FI = TailCallArgs[i].FrameIdx; 3712 // Store relative to framepointer. 3713 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, 3714 MachinePointerInfo::getFixedStack(FI), 3715 false, false, 0)); 3716 } 3717 } 3718 3719 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 3720 /// the appropriate stack slot for the tail call optimized function call. 3721 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 3722 MachineFunction &MF, 3723 SDValue Chain, 3724 SDValue OldRetAddr, 3725 SDValue OldFP, 3726 int SPDiff, 3727 bool isPPC64, 3728 bool isDarwinABI, 3729 SDLoc dl) { 3730 if (SPDiff) { 3731 // Calculate the new stack slot for the return address. 3732 int SlotSize = isPPC64 ? 8 : 4; 3733 const PPCFrameLowering *FL = 3734 MF.getSubtarget<PPCSubtarget>().getFrameLowering(); 3735 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); 3736 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 3737 NewRetAddrLoc, true); 3738 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3739 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 3740 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 3741 MachinePointerInfo::getFixedStack(NewRetAddr), 3742 false, false, 0); 3743 3744 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 3745 // slot as the FP is never overwritten. 3746 if (isDarwinABI) { 3747 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); 3748 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 3749 true); 3750 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 3751 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 3752 MachinePointerInfo::getFixedStack(NewFPIdx), 3753 false, false, 0); 3754 } 3755 } 3756 return Chain; 3757 } 3758 3759 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 3760 /// the position of the argument. 3761 static void 3762 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 3763 SDValue Arg, int SPDiff, unsigned ArgOffset, 3764 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 3765 int Offset = ArgOffset + SPDiff; 3766 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 3767 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 3768 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3769 SDValue FIN = DAG.getFrameIndex(FI, VT); 3770 TailCallArgumentInfo Info; 3771 Info.Arg = Arg; 3772 Info.FrameIdxOp = FIN; 3773 Info.FrameIdx = FI; 3774 TailCallArguments.push_back(Info); 3775 } 3776 3777 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 3778 /// stack slot. Returns the chain as result and the loaded frame pointers in 3779 /// LROpOut/FPOpout. Used when tail calling. 3780 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 3781 int SPDiff, 3782 SDValue Chain, 3783 SDValue &LROpOut, 3784 SDValue &FPOpOut, 3785 bool isDarwinABI, 3786 SDLoc dl) const { 3787 if (SPDiff) { 3788 // Load the LR and FP stack slot for later adjusting. 3789 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 3790 LROpOut = getReturnAddrFrameIndex(DAG); 3791 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 3792 false, false, false, 0); 3793 Chain = SDValue(LROpOut.getNode(), 1); 3794 3795 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 3796 // slot as the FP is never overwritten. 3797 if (isDarwinABI) { 3798 FPOpOut = getFramePointerFrameIndex(DAG); 3799 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 3800 false, false, false, 0); 3801 Chain = SDValue(FPOpOut.getNode(), 1); 3802 } 3803 } 3804 return Chain; 3805 } 3806 3807 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 3808 /// by "Src" to address "Dst" of size "Size". Alignment information is 3809 /// specified by the specific parameter attribute. The copy will be passed as 3810 /// a byval function parameter. 3811 /// Sometimes what we are copying is the end of a larger object, the part that 3812 /// does not fit in registers. 3813 static SDValue 3814 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 3815 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 3816 SDLoc dl) { 3817 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 3818 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 3819 false, false, MachinePointerInfo(), 3820 MachinePointerInfo()); 3821 } 3822 3823 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 3824 /// tail calls. 3825 static void 3826 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 3827 SDValue Arg, SDValue PtrOff, int SPDiff, 3828 unsigned ArgOffset, bool isPPC64, bool isTailCall, 3829 bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 3830 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, 3831 SDLoc dl) { 3832 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3833 if (!isTailCall) { 3834 if (isVector) { 3835 SDValue StackPtr; 3836 if (isPPC64) 3837 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3838 else 3839 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3840 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3841 DAG.getConstant(ArgOffset, PtrVT)); 3842 } 3843 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3844 MachinePointerInfo(), false, false, 0)); 3845 // Calculate and remember argument location. 3846 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 3847 TailCallArguments); 3848 } 3849 3850 static 3851 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 3852 SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 3853 SDValue LROp, SDValue FPOp, bool isDarwinABI, 3854 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 3855 MachineFunction &MF = DAG.getMachineFunction(); 3856 3857 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 3858 // might overwrite each other in case of tail call optimization. 3859 SmallVector<SDValue, 8> MemOpChains2; 3860 // Do not flag preceding copytoreg stuff together with the following stuff. 3861 InFlag = SDValue(); 3862 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 3863 MemOpChains2, dl); 3864 if (!MemOpChains2.empty()) 3865 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 3866 3867 // Store the return address to the appropriate stack slot. 3868 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 3869 isPPC64, isDarwinABI, dl); 3870 3871 // Emit callseq_end just before tailcall node. 3872 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3873 DAG.getIntPtrConstant(0, true), InFlag, dl); 3874 InFlag = Chain.getValue(1); 3875 } 3876 3877 // Is this global address that of a function that can be called by name? (as 3878 // opposed to something that must hold a descriptor for an indirect call). 3879 static bool isFunctionGlobalAddress(SDValue Callee) { 3880 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3881 if (Callee.getOpcode() == ISD::GlobalTLSAddress || 3882 Callee.getOpcode() == ISD::TargetGlobalTLSAddress) 3883 return false; 3884 3885 return G->getGlobal()->getType()->getElementType()->isFunctionTy(); 3886 } 3887 3888 return false; 3889 } 3890 3891 static 3892 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 3893 SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff, 3894 bool isTailCall, bool IsPatchPoint, 3895 SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, 3896 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 3897 ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { 3898 3899 bool isPPC64 = Subtarget.isPPC64(); 3900 bool isSVR4ABI = Subtarget.isSVR4ABI(); 3901 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3902 3903 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3904 NodeTys.push_back(MVT::Other); // Returns a chain 3905 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 3906 3907 unsigned CallOpc = PPCISD::CALL; 3908 3909 bool needIndirectCall = true; 3910 if (!isSVR4ABI || !isPPC64) 3911 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 3912 // If this is an absolute destination address, use the munged value. 3913 Callee = SDValue(Dest, 0); 3914 needIndirectCall = false; 3915 } 3916 3917 if (isFunctionGlobalAddress(Callee)) { 3918 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); 3919 // A call to a TLS address is actually an indirect call to a 3920 // thread-specific pointer. 3921 unsigned OpFlags = 0; 3922 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 3923 (Subtarget.getTargetTriple().isMacOSX() && 3924 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 3925 (G->getGlobal()->isDeclaration() || 3926 G->getGlobal()->isWeakForLinker())) || 3927 (Subtarget.isTargetELF() && !isPPC64 && 3928 !G->getGlobal()->hasLocalLinkage() && 3929 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3930 // PC-relative references to external symbols should go through $stub, 3931 // unless we're building with the leopard linker or later, which 3932 // automatically synthesizes these stubs. 3933 OpFlags = PPCII::MO_PLT_OR_STUB; 3934 } 3935 3936 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 3937 // every direct call is) turn it into a TargetGlobalAddress / 3938 // TargetExternalSymbol node so that legalize doesn't hack it. 3939 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 3940 Callee.getValueType(), 0, OpFlags); 3941 needIndirectCall = false; 3942 } 3943 3944 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3945 unsigned char OpFlags = 0; 3946 3947 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 3948 (Subtarget.getTargetTriple().isMacOSX() && 3949 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) || 3950 (Subtarget.isTargetELF() && !isPPC64 && 3951 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3952 // PC-relative references to external symbols should go through $stub, 3953 // unless we're building with the leopard linker or later, which 3954 // automatically synthesizes these stubs. 3955 OpFlags = PPCII::MO_PLT_OR_STUB; 3956 } 3957 3958 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 3959 OpFlags); 3960 needIndirectCall = false; 3961 } 3962 3963 if (IsPatchPoint) { 3964 // We'll form an invalid direct call when lowering a patchpoint; the full 3965 // sequence for an indirect call is complicated, and many of the 3966 // instructions introduced might have side effects (and, thus, can't be 3967 // removed later). The call itself will be removed as soon as the 3968 // argument/return lowering is complete, so the fact that it has the wrong 3969 // kind of operands should not really matter. 3970 needIndirectCall = false; 3971 } 3972 3973 if (needIndirectCall) { 3974 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 3975 // to do the call, we can't use PPCISD::CALL. 3976 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 3977 3978 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 3979 // Function pointers in the 64-bit SVR4 ABI do not point to the function 3980 // entry point, but to the function descriptor (the function entry point 3981 // address is part of the function descriptor though). 3982 // The function descriptor is a three doubleword structure with the 3983 // following fields: function entry point, TOC base address and 3984 // environment pointer. 3985 // Thus for a call through a function pointer, the following actions need 3986 // to be performed: 3987 // 1. Save the TOC of the caller in the TOC save area of its stack 3988 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 3989 // 2. Load the address of the function entry point from the function 3990 // descriptor. 3991 // 3. Load the TOC of the callee from the function descriptor into r2. 3992 // 4. Load the environment pointer from the function descriptor into 3993 // r11. 3994 // 5. Branch to the function entry point address. 3995 // 6. On return of the callee, the TOC of the caller needs to be 3996 // restored (this is done in FinishCall()). 3997 // 3998 // The loads are scheduled at the beginning of the call sequence, and the 3999 // register copies are flagged together to ensure that no other 4000 // operations can be scheduled in between. E.g. without flagging the 4001 // copies together, a TOC access in the caller could be scheduled between 4002 // the assignment of the callee TOC and the branch to the callee, which 4003 // results in the TOC access going through the TOC of the callee instead 4004 // of going through the TOC of the caller, which leads to incorrect code. 4005 4006 // Load the address of the function entry point from the function 4007 // descriptor. 4008 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); 4009 if (LDChain.getValueType() == MVT::Glue) 4010 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); 4011 4012 bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors(); 4013 4014 MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr); 4015 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, 4016 false, false, LoadsInv, 8); 4017 4018 // Load environment pointer into r11. 4019 SDValue PtrOff = DAG.getIntPtrConstant(16); 4020 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 4021 SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, 4022 MPI.getWithOffset(16), false, false, 4023 LoadsInv, 8); 4024 4025 SDValue TOCOff = DAG.getIntPtrConstant(8); 4026 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 4027 SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, 4028 MPI.getWithOffset(8), false, false, 4029 LoadsInv, 8); 4030 4031 setUsesTOCBasePtr(DAG); 4032 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, 4033 InFlag); 4034 Chain = TOCVal.getValue(0); 4035 InFlag = TOCVal.getValue(1); 4036 4037 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 4038 InFlag); 4039 4040 Chain = EnvVal.getValue(0); 4041 InFlag = EnvVal.getValue(1); 4042 4043 MTCTROps[0] = Chain; 4044 MTCTROps[1] = LoadFuncPtr; 4045 MTCTROps[2] = InFlag; 4046 } 4047 4048 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 4049 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 4050 InFlag = Chain.getValue(1); 4051 4052 NodeTys.clear(); 4053 NodeTys.push_back(MVT::Other); 4054 NodeTys.push_back(MVT::Glue); 4055 Ops.push_back(Chain); 4056 CallOpc = PPCISD::BCTRL; 4057 Callee.setNode(nullptr); 4058 // Add use of X11 (holding environment pointer) 4059 if (isSVR4ABI && isPPC64 && !isELFv2ABI) 4060 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 4061 // Add CTR register as callee so a bctr can be emitted later. 4062 if (isTailCall) 4063 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 4064 } 4065 4066 // If this is a direct call, pass the chain and the callee. 4067 if (Callee.getNode()) { 4068 Ops.push_back(Chain); 4069 Ops.push_back(Callee); 4070 } 4071 // If this is a tail call add stack pointer delta. 4072 if (isTailCall) 4073 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 4074 4075 // Add argument registers to the end of the list so that they are known live 4076 // into the call. 4077 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 4078 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 4079 RegsToPass[i].second.getValueType())); 4080 4081 // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live 4082 // into the call. 4083 if (isSVR4ABI && isPPC64 && !IsPatchPoint) { 4084 setUsesTOCBasePtr(DAG); 4085 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); 4086 } 4087 4088 return CallOpc; 4089 } 4090 4091 static 4092 bool isLocalCall(const SDValue &Callee) 4093 { 4094 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4095 return !G->getGlobal()->isDeclaration() && 4096 !G->getGlobal()->isWeakForLinker(); 4097 return false; 4098 } 4099 4100 SDValue 4101 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 4102 CallingConv::ID CallConv, bool isVarArg, 4103 const SmallVectorImpl<ISD::InputArg> &Ins, 4104 SDLoc dl, SelectionDAG &DAG, 4105 SmallVectorImpl<SDValue> &InVals) const { 4106 4107 SmallVector<CCValAssign, 16> RVLocs; 4108 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 4109 *DAG.getContext()); 4110 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 4111 4112 // Copy all of the result registers out of their specified physreg. 4113 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 4114 CCValAssign &VA = RVLocs[i]; 4115 assert(VA.isRegLoc() && "Can only return in registers!"); 4116 4117 SDValue Val = DAG.getCopyFromReg(Chain, dl, 4118 VA.getLocReg(), VA.getLocVT(), InFlag); 4119 Chain = Val.getValue(1); 4120 InFlag = Val.getValue(2); 4121 4122 switch (VA.getLocInfo()) { 4123 default: llvm_unreachable("Unknown loc info!"); 4124 case CCValAssign::Full: break; 4125 case CCValAssign::AExt: 4126 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4127 break; 4128 case CCValAssign::ZExt: 4129 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 4130 DAG.getValueType(VA.getValVT())); 4131 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4132 break; 4133 case CCValAssign::SExt: 4134 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 4135 DAG.getValueType(VA.getValVT())); 4136 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4137 break; 4138 } 4139 4140 InVals.push_back(Val); 4141 } 4142 4143 return Chain; 4144 } 4145 4146 SDValue 4147 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, 4148 bool isTailCall, bool isVarArg, bool IsPatchPoint, 4149 SelectionDAG &DAG, 4150 SmallVector<std::pair<unsigned, SDValue>, 8> 4151 &RegsToPass, 4152 SDValue InFlag, SDValue Chain, 4153 SDValue CallSeqStart, SDValue &Callee, 4154 int SPDiff, unsigned NumBytes, 4155 const SmallVectorImpl<ISD::InputArg> &Ins, 4156 SmallVectorImpl<SDValue> &InVals, 4157 ImmutableCallSite *CS) const { 4158 4159 std::vector<EVT> NodeTys; 4160 SmallVector<SDValue, 8> Ops; 4161 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, 4162 SPDiff, isTailCall, IsPatchPoint, RegsToPass, 4163 Ops, NodeTys, CS, Subtarget); 4164 4165 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 4166 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 4167 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 4168 4169 // When performing tail call optimization the callee pops its arguments off 4170 // the stack. Account for this here so these bytes can be pushed back on in 4171 // PPCFrameLowering::eliminateCallFramePseudoInstr. 4172 int BytesCalleePops = 4173 (CallConv == CallingConv::Fast && 4174 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 4175 4176 // Add a register mask operand representing the call-preserved registers. 4177 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4178 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 4179 assert(Mask && "Missing call preserved mask for calling convention"); 4180 Ops.push_back(DAG.getRegisterMask(Mask)); 4181 4182 if (InFlag.getNode()) 4183 Ops.push_back(InFlag); 4184 4185 // Emit tail call. 4186 if (isTailCall) { 4187 assert(((Callee.getOpcode() == ISD::Register && 4188 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 4189 Callee.getOpcode() == ISD::TargetExternalSymbol || 4190 Callee.getOpcode() == ISD::TargetGlobalAddress || 4191 isa<ConstantSDNode>(Callee)) && 4192 "Expecting an global address, external symbol, absolute value or register"); 4193 4194 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 4195 } 4196 4197 // Add a NOP immediately after the branch instruction when using the 64-bit 4198 // SVR4 ABI. At link time, if caller and callee are in a different module and 4199 // thus have a different TOC, the call will be replaced with a call to a stub 4200 // function which saves the current TOC, loads the TOC of the callee and 4201 // branches to the callee. The NOP will be replaced with a load instruction 4202 // which restores the TOC of the caller from the TOC save slot of the current 4203 // stack frame. If caller and callee belong to the same module (and have the 4204 // same TOC), the NOP will remain unchanged. 4205 4206 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && 4207 !IsPatchPoint) { 4208 if (CallOpc == PPCISD::BCTRL) { 4209 // This is a call through a function pointer. 4210 // Restore the caller TOC from the save area into R2. 4211 // See PrepareCall() for more information about calls through function 4212 // pointers in the 64-bit SVR4 ABI. 4213 // We are using a target-specific load with r2 hard coded, because the 4214 // result of a target-independent load would never go directly into r2, 4215 // since r2 is a reserved register (which prevents the register allocator 4216 // from allocating it), resulting in an additional register being 4217 // allocated and an unnecessary move instruction being generated. 4218 CallOpc = PPCISD::BCTRL_LOAD_TOC; 4219 4220 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4221 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 4222 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 4223 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset); 4224 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 4225 4226 // The address needs to go after the chain input but before the flag (or 4227 // any other variadic arguments). 4228 Ops.insert(std::next(Ops.begin()), AddTOC); 4229 } else if ((CallOpc == PPCISD::CALL) && 4230 (!isLocalCall(Callee) || 4231 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) 4232 // Otherwise insert NOP for non-local calls. 4233 CallOpc = PPCISD::CALL_NOP; 4234 } 4235 4236 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 4237 InFlag = Chain.getValue(1); 4238 4239 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 4240 DAG.getIntPtrConstant(BytesCalleePops, true), 4241 InFlag, dl); 4242 if (!Ins.empty()) 4243 InFlag = Chain.getValue(1); 4244 4245 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 4246 Ins, dl, DAG, InVals); 4247 } 4248 4249 SDValue 4250 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 4251 SmallVectorImpl<SDValue> &InVals) const { 4252 SelectionDAG &DAG = CLI.DAG; 4253 SDLoc &dl = CLI.DL; 4254 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4255 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4256 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4257 SDValue Chain = CLI.Chain; 4258 SDValue Callee = CLI.Callee; 4259 bool &isTailCall = CLI.IsTailCall; 4260 CallingConv::ID CallConv = CLI.CallConv; 4261 bool isVarArg = CLI.IsVarArg; 4262 bool IsPatchPoint = CLI.IsPatchPoint; 4263 ImmutableCallSite *CS = CLI.CS; 4264 4265 if (isTailCall) 4266 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 4267 Ins, DAG); 4268 4269 if (!isTailCall && CS && CS->isMustTailCall()) 4270 report_fatal_error("failed to perform tail call elimination on a call " 4271 "site marked musttail"); 4272 4273 if (Subtarget.isSVR4ABI()) { 4274 if (Subtarget.isPPC64()) 4275 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 4276 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 4277 dl, DAG, InVals, CS); 4278 else 4279 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 4280 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 4281 dl, DAG, InVals, CS); 4282 } 4283 4284 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 4285 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 4286 dl, DAG, InVals, CS); 4287 } 4288 4289 SDValue 4290 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, 4291 CallingConv::ID CallConv, bool isVarArg, 4292 bool isTailCall, bool IsPatchPoint, 4293 const SmallVectorImpl<ISD::OutputArg> &Outs, 4294 const SmallVectorImpl<SDValue> &OutVals, 4295 const SmallVectorImpl<ISD::InputArg> &Ins, 4296 SDLoc dl, SelectionDAG &DAG, 4297 SmallVectorImpl<SDValue> &InVals, 4298 ImmutableCallSite *CS) const { 4299 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 4300 // of the 32-bit SVR4 ABI stack frame layout. 4301 4302 assert((CallConv == CallingConv::C || 4303 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 4304 4305 unsigned PtrByteSize = 4; 4306 4307 MachineFunction &MF = DAG.getMachineFunction(); 4308 4309 // Mark this function as potentially containing a function that contains a 4310 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4311 // and restoring the callers stack pointer in this functions epilog. This is 4312 // done because by tail calling the called function might overwrite the value 4313 // in this function's (MF) stack pointer stack slot 0(SP). 4314 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4315 CallConv == CallingConv::Fast) 4316 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4317 4318 // Count how many bytes are to be pushed on the stack, including the linkage 4319 // area, parameter list area and the part of the local variable space which 4320 // contains copies of aggregates which are passed by value. 4321 4322 // Assign locations to all of the outgoing arguments. 4323 SmallVector<CCValAssign, 16> ArgLocs; 4324 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 4325 *DAG.getContext()); 4326 4327 // Reserve space for the linkage area on the stack. 4328 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), 4329 PtrByteSize); 4330 4331 if (isVarArg) { 4332 // Handle fixed and variable vector arguments differently. 4333 // Fixed vector arguments go into registers as long as registers are 4334 // available. Variable vector arguments always go into memory. 4335 unsigned NumArgs = Outs.size(); 4336 4337 for (unsigned i = 0; i != NumArgs; ++i) { 4338 MVT ArgVT = Outs[i].VT; 4339 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4340 bool Result; 4341 4342 if (Outs[i].IsFixed) { 4343 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 4344 CCInfo); 4345 } else { 4346 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 4347 ArgFlags, CCInfo); 4348 } 4349 4350 if (Result) { 4351 #ifndef NDEBUG 4352 errs() << "Call operand #" << i << " has unhandled type " 4353 << EVT(ArgVT).getEVTString() << "\n"; 4354 #endif 4355 llvm_unreachable(nullptr); 4356 } 4357 } 4358 } else { 4359 // All arguments are treated the same. 4360 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 4361 } 4362 4363 // Assign locations to all of the outgoing aggregate by value arguments. 4364 SmallVector<CCValAssign, 16> ByValArgLocs; 4365 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4366 ByValArgLocs, *DAG.getContext()); 4367 4368 // Reserve stack space for the allocations in CCInfo. 4369 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 4370 4371 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 4372 4373 // Size of the linkage area, parameter list area and the part of the local 4374 // space variable where copies of aggregates which are passed by value are 4375 // stored. 4376 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 4377 4378 // Calculate by how many bytes the stack has to be adjusted in case of tail 4379 // call optimization. 4380 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4381 4382 // Adjust the stack pointer for the new arguments... 4383 // These operations are automatically eliminated by the prolog/epilog pass 4384 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4385 dl); 4386 SDValue CallSeqStart = Chain; 4387 4388 // Load the return address and frame pointer so it can be moved somewhere else 4389 // later. 4390 SDValue LROp, FPOp; 4391 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 4392 dl); 4393 4394 // Set up a copy of the stack pointer for use loading and storing any 4395 // arguments that may not fit in the registers available for argument 4396 // passing. 4397 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4398 4399 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4400 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4401 SmallVector<SDValue, 8> MemOpChains; 4402 4403 bool seenFloatArg = false; 4404 // Walk the register/memloc assignments, inserting copies/loads. 4405 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 4406 i != e; 4407 ++i) { 4408 CCValAssign &VA = ArgLocs[i]; 4409 SDValue Arg = OutVals[i]; 4410 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4411 4412 if (Flags.isByVal()) { 4413 // Argument is an aggregate which is passed by value, thus we need to 4414 // create a copy of it in the local variable space of the current stack 4415 // frame (which is the stack frame of the caller) and pass the address of 4416 // this copy to the callee. 4417 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 4418 CCValAssign &ByValVA = ByValArgLocs[j++]; 4419 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 4420 4421 // Memory reserved in the local variable space of the callers stack frame. 4422 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 4423 4424 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 4425 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 4426 4427 // Create a copy of the argument in the local area of the current 4428 // stack frame. 4429 SDValue MemcpyCall = 4430 CreateCopyOfByValArgument(Arg, PtrOff, 4431 CallSeqStart.getNode()->getOperand(0), 4432 Flags, DAG, dl); 4433 4434 // This must go outside the CALLSEQ_START..END. 4435 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4436 CallSeqStart.getNode()->getOperand(1), 4437 SDLoc(MemcpyCall)); 4438 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4439 NewCallSeqStart.getNode()); 4440 Chain = CallSeqStart = NewCallSeqStart; 4441 4442 // Pass the address of the aggregate copy on the stack either in a 4443 // physical register or in the parameter list area of the current stack 4444 // frame to the callee. 4445 Arg = PtrOff; 4446 } 4447 4448 if (VA.isRegLoc()) { 4449 if (Arg.getValueType() == MVT::i1) 4450 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); 4451 4452 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 4453 // Put argument in a physical register. 4454 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 4455 } else { 4456 // Put argument in the parameter list area of the current stack frame. 4457 assert(VA.isMemLoc()); 4458 unsigned LocMemOffset = VA.getLocMemOffset(); 4459 4460 if (!isTailCall) { 4461 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 4462 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 4463 4464 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 4465 MachinePointerInfo(), 4466 false, false, 0)); 4467 } else { 4468 // Calculate and remember argument location. 4469 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 4470 TailCallArguments); 4471 } 4472 } 4473 } 4474 4475 if (!MemOpChains.empty()) 4476 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4477 4478 // Build a sequence of copy-to-reg nodes chained together with token chain 4479 // and flag operands which copy the outgoing args into the appropriate regs. 4480 SDValue InFlag; 4481 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4482 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4483 RegsToPass[i].second, InFlag); 4484 InFlag = Chain.getValue(1); 4485 } 4486 4487 // Set CR bit 6 to true if this is a vararg call with floating args passed in 4488 // registers. 4489 if (isVarArg) { 4490 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 4491 SDValue Ops[] = { Chain, InFlag }; 4492 4493 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 4494 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 4495 4496 InFlag = Chain.getValue(1); 4497 } 4498 4499 if (isTailCall) 4500 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 4501 false, TailCallArguments); 4502 4503 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 4504 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 4505 NumBytes, Ins, InVals, CS); 4506 } 4507 4508 // Copy an argument into memory, being careful to do this outside the 4509 // call sequence for the call to which the argument belongs. 4510 SDValue 4511 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 4512 SDValue CallSeqStart, 4513 ISD::ArgFlagsTy Flags, 4514 SelectionDAG &DAG, 4515 SDLoc dl) const { 4516 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 4517 CallSeqStart.getNode()->getOperand(0), 4518 Flags, DAG, dl); 4519 // The MEMCPY must go outside the CALLSEQ_START..END. 4520 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4521 CallSeqStart.getNode()->getOperand(1), 4522 SDLoc(MemcpyCall)); 4523 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4524 NewCallSeqStart.getNode()); 4525 return NewCallSeqStart; 4526 } 4527 4528 SDValue 4529 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, 4530 CallingConv::ID CallConv, bool isVarArg, 4531 bool isTailCall, bool IsPatchPoint, 4532 const SmallVectorImpl<ISD::OutputArg> &Outs, 4533 const SmallVectorImpl<SDValue> &OutVals, 4534 const SmallVectorImpl<ISD::InputArg> &Ins, 4535 SDLoc dl, SelectionDAG &DAG, 4536 SmallVectorImpl<SDValue> &InVals, 4537 ImmutableCallSite *CS) const { 4538 4539 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4540 bool isLittleEndian = Subtarget.isLittleEndian(); 4541 unsigned NumOps = Outs.size(); 4542 4543 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4544 unsigned PtrByteSize = 8; 4545 4546 MachineFunction &MF = DAG.getMachineFunction(); 4547 4548 // Mark this function as potentially containing a function that contains a 4549 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4550 // and restoring the callers stack pointer in this functions epilog. This is 4551 // done because by tail calling the called function might overwrite the value 4552 // in this function's (MF) stack pointer stack slot 0(SP). 4553 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4554 CallConv == CallingConv::Fast) 4555 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4556 4557 assert(!(CallConv == CallingConv::Fast && isVarArg) && 4558 "fastcc not supported on varargs functions"); 4559 4560 // Count how many bytes are to be pushed on the stack, including the linkage 4561 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 4562 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 4563 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 4564 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 4565 unsigned NumBytes = LinkageSize; 4566 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4567 unsigned &QFPR_idx = FPR_idx; 4568 4569 static const MCPhysReg GPR[] = { 4570 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4571 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4572 }; 4573 static const MCPhysReg VR[] = { 4574 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4575 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4576 }; 4577 static const MCPhysReg VSRH[] = { 4578 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 4579 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 4580 }; 4581 4582 const unsigned NumGPRs = array_lengthof(GPR); 4583 const unsigned NumFPRs = 13; 4584 const unsigned NumVRs = array_lengthof(VR); 4585 const unsigned NumQFPRs = NumFPRs; 4586 4587 // When using the fast calling convention, we don't provide backing for 4588 // arguments that will be in registers. 4589 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; 4590 4591 // Add up all the space actually used. 4592 for (unsigned i = 0; i != NumOps; ++i) { 4593 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4594 EVT ArgVT = Outs[i].VT; 4595 EVT OrigVT = Outs[i].ArgVT; 4596 4597 if (CallConv == CallingConv::Fast) { 4598 if (Flags.isByVal()) 4599 NumGPRsUsed += (Flags.getByValSize()+7)/8; 4600 else 4601 switch (ArgVT.getSimpleVT().SimpleTy) { 4602 default: llvm_unreachable("Unexpected ValueType for argument!"); 4603 case MVT::i1: 4604 case MVT::i32: 4605 case MVT::i64: 4606 if (++NumGPRsUsed <= NumGPRs) 4607 continue; 4608 break; 4609 case MVT::v4i32: 4610 case MVT::v8i16: 4611 case MVT::v16i8: 4612 case MVT::v2f64: 4613 case MVT::v2i64: 4614 if (++NumVRsUsed <= NumVRs) 4615 continue; 4616 break; 4617 case MVT::v4f32: 4618 // When using QPX, this is handled like a FP register, otherwise, it 4619 // is an Altivec register. 4620 if (Subtarget.hasQPX()) { 4621 if (++NumFPRsUsed <= NumFPRs) 4622 continue; 4623 } else { 4624 if (++NumVRsUsed <= NumVRs) 4625 continue; 4626 } 4627 break; 4628 case MVT::f32: 4629 case MVT::f64: 4630 case MVT::v4f64: // QPX 4631 case MVT::v4i1: // QPX 4632 if (++NumFPRsUsed <= NumFPRs) 4633 continue; 4634 break; 4635 } 4636 } 4637 4638 /* Respect alignment of argument on the stack. */ 4639 unsigned Align = 4640 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 4641 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 4642 4643 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 4644 if (Flags.isInConsecutiveRegsLast()) 4645 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4646 } 4647 4648 unsigned NumBytesActuallyUsed = NumBytes; 4649 4650 // The prolog code of the callee may store up to 8 GPR argument registers to 4651 // the stack, allowing va_start to index over them in memory if its varargs. 4652 // Because we cannot tell if this is needed on the caller side, we have to 4653 // conservatively assume that it is needed. As such, make sure we have at 4654 // least enough stack space for the caller to store the 8 GPRs. 4655 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. 4656 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 4657 4658 // Tail call needs the stack to be aligned. 4659 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4660 CallConv == CallingConv::Fast) 4661 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 4662 4663 // Calculate by how many bytes the stack has to be adjusted in case of tail 4664 // call optimization. 4665 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4666 4667 // To protect arguments on the stack from being clobbered in a tail call, 4668 // force all the loads to happen before doing any other lowering. 4669 if (isTailCall) 4670 Chain = DAG.getStackArgumentTokenFactor(Chain); 4671 4672 // Adjust the stack pointer for the new arguments... 4673 // These operations are automatically eliminated by the prolog/epilog pass 4674 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4675 dl); 4676 SDValue CallSeqStart = Chain; 4677 4678 // Load the return address and frame pointer so it can be move somewhere else 4679 // later. 4680 SDValue LROp, FPOp; 4681 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4682 dl); 4683 4684 // Set up a copy of the stack pointer for use loading and storing any 4685 // arguments that may not fit in the registers available for argument 4686 // passing. 4687 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4688 4689 // Figure out which arguments are going to go in registers, and which in 4690 // memory. Also, if this is a vararg function, floating point operations 4691 // must be stored to our stack, and loaded into integer regs as well, if 4692 // any integer regs are available for argument passing. 4693 unsigned ArgOffset = LinkageSize; 4694 4695 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4696 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4697 4698 SmallVector<SDValue, 8> MemOpChains; 4699 for (unsigned i = 0; i != NumOps; ++i) { 4700 SDValue Arg = OutVals[i]; 4701 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4702 EVT ArgVT = Outs[i].VT; 4703 EVT OrigVT = Outs[i].ArgVT; 4704 4705 // PtrOff will be used to store the current argument to the stack if a 4706 // register cannot be found for it. 4707 SDValue PtrOff; 4708 4709 // We re-align the argument offset for each argument, except when using the 4710 // fast calling convention, when we need to make sure we do that only when 4711 // we'll actually use a stack slot. 4712 auto ComputePtrOff = [&]() { 4713 /* Respect alignment of argument on the stack. */ 4714 unsigned Align = 4715 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 4716 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 4717 4718 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4719 4720 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4721 }; 4722 4723 if (CallConv != CallingConv::Fast) { 4724 ComputePtrOff(); 4725 4726 /* Compute GPR index associated with argument offset. */ 4727 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 4728 GPR_idx = std::min(GPR_idx, NumGPRs); 4729 } 4730 4731 // Promote integers to 64-bit values. 4732 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 4733 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4734 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4735 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4736 } 4737 4738 // FIXME memcpy is used way more than necessary. Correctness first. 4739 // Note: "by value" is code for passing a structure by value, not 4740 // basic types. 4741 if (Flags.isByVal()) { 4742 // Note: Size includes alignment padding, so 4743 // struct x { short a; char b; } 4744 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 4745 // These are the proper values we need for right-justifying the 4746 // aggregate in a parameter register. 4747 unsigned Size = Flags.getByValSize(); 4748 4749 // An empty aggregate parameter takes up no storage and no 4750 // registers. 4751 if (Size == 0) 4752 continue; 4753 4754 if (CallConv == CallingConv::Fast) 4755 ComputePtrOff(); 4756 4757 // All aggregates smaller than 8 bytes must be passed right-justified. 4758 if (Size==1 || Size==2 || Size==4) { 4759 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 4760 if (GPR_idx != NumGPRs) { 4761 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4762 MachinePointerInfo(), VT, 4763 false, false, false, 0); 4764 MemOpChains.push_back(Load.getValue(1)); 4765 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4766 4767 ArgOffset += PtrByteSize; 4768 continue; 4769 } 4770 } 4771 4772 if (GPR_idx == NumGPRs && Size < 8) { 4773 SDValue AddPtr = PtrOff; 4774 if (!isLittleEndian) { 4775 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4776 PtrOff.getValueType()); 4777 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4778 } 4779 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4780 CallSeqStart, 4781 Flags, DAG, dl); 4782 ArgOffset += PtrByteSize; 4783 continue; 4784 } 4785 // Copy entire object into memory. There are cases where gcc-generated 4786 // code assumes it is there, even if it could be put entirely into 4787 // registers. (This is not what the doc says.) 4788 4789 // FIXME: The above statement is likely due to a misunderstanding of the 4790 // documents. All arguments must be copied into the parameter area BY 4791 // THE CALLEE in the event that the callee takes the address of any 4792 // formal argument. That has not yet been implemented. However, it is 4793 // reasonable to use the stack area as a staging area for the register 4794 // load. 4795 4796 // Skip this for small aggregates, as we will use the same slot for a 4797 // right-justified copy, below. 4798 if (Size >= 8) 4799 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4800 CallSeqStart, 4801 Flags, DAG, dl); 4802 4803 // When a register is available, pass a small aggregate right-justified. 4804 if (Size < 8 && GPR_idx != NumGPRs) { 4805 // The easiest way to get this right-justified in a register 4806 // is to copy the structure into the rightmost portion of a 4807 // local variable slot, then load the whole slot into the 4808 // register. 4809 // FIXME: The memcpy seems to produce pretty awful code for 4810 // small aggregates, particularly for packed ones. 4811 // FIXME: It would be preferable to use the slot in the 4812 // parameter save area instead of a new local variable. 4813 SDValue AddPtr = PtrOff; 4814 if (!isLittleEndian) { 4815 SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); 4816 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4817 } 4818 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4819 CallSeqStart, 4820 Flags, DAG, dl); 4821 4822 // Load the slot into the register. 4823 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, 4824 MachinePointerInfo(), 4825 false, false, false, 0); 4826 MemOpChains.push_back(Load.getValue(1)); 4827 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4828 4829 // Done with this argument. 4830 ArgOffset += PtrByteSize; 4831 continue; 4832 } 4833 4834 // For aggregates larger than PtrByteSize, copy the pieces of the 4835 // object that fit into registers from the parameter save area. 4836 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4837 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4838 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4839 if (GPR_idx != NumGPRs) { 4840 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4841 MachinePointerInfo(), 4842 false, false, false, 0); 4843 MemOpChains.push_back(Load.getValue(1)); 4844 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4845 ArgOffset += PtrByteSize; 4846 } else { 4847 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4848 break; 4849 } 4850 } 4851 continue; 4852 } 4853 4854 switch (Arg.getSimpleValueType().SimpleTy) { 4855 default: llvm_unreachable("Unexpected ValueType for argument!"); 4856 case MVT::i1: 4857 case MVT::i32: 4858 case MVT::i64: 4859 // These can be scalar arguments or elements of an integer array type 4860 // passed directly. Clang may use those instead of "byval" aggregate 4861 // types to avoid forcing arguments to memory unnecessarily. 4862 if (GPR_idx != NumGPRs) { 4863 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4864 } else { 4865 if (CallConv == CallingConv::Fast) 4866 ComputePtrOff(); 4867 4868 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4869 true, isTailCall, false, MemOpChains, 4870 TailCallArguments, dl); 4871 if (CallConv == CallingConv::Fast) 4872 ArgOffset += PtrByteSize; 4873 } 4874 if (CallConv != CallingConv::Fast) 4875 ArgOffset += PtrByteSize; 4876 break; 4877 case MVT::f32: 4878 case MVT::f64: { 4879 // These can be scalar arguments or elements of a float array type 4880 // passed directly. The latter are used to implement ELFv2 homogenous 4881 // float aggregates. 4882 4883 // Named arguments go into FPRs first, and once they overflow, the 4884 // remaining arguments go into GPRs and then the parameter save area. 4885 // Unnamed arguments for vararg functions always go to GPRs and 4886 // then the parameter save area. For now, put all arguments to vararg 4887 // routines always in both locations (FPR *and* GPR or stack slot). 4888 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 4889 bool NeededLoad = false; 4890 4891 // First load the argument into the next available FPR. 4892 if (FPR_idx != NumFPRs) 4893 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4894 4895 // Next, load the argument into GPR or stack slot if needed. 4896 if (!NeedGPROrStack) 4897 ; 4898 else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) { 4899 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 4900 // once we support fp <-> gpr moves. 4901 4902 // In the non-vararg case, this can only ever happen in the 4903 // presence of f32 array types, since otherwise we never run 4904 // out of FPRs before running out of GPRs. 4905 SDValue ArgVal; 4906 4907 // Double values are always passed in a single GPR. 4908 if (Arg.getValueType() != MVT::f32) { 4909 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 4910 4911 // Non-array float values are extended and passed in a GPR. 4912 } else if (!Flags.isInConsecutiveRegs()) { 4913 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4914 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 4915 4916 // If we have an array of floats, we collect every odd element 4917 // together with its predecessor into one GPR. 4918 } else if (ArgOffset % PtrByteSize != 0) { 4919 SDValue Lo, Hi; 4920 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 4921 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4922 if (!isLittleEndian) 4923 std::swap(Lo, Hi); 4924 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 4925 4926 // The final element, if even, goes into the first half of a GPR. 4927 } else if (Flags.isInConsecutiveRegsLast()) { 4928 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4929 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 4930 if (!isLittleEndian) 4931 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 4932 DAG.getConstant(32, MVT::i32)); 4933 4934 // Non-final even elements are skipped; they will be handled 4935 // together the with subsequent argument on the next go-around. 4936 } else 4937 ArgVal = SDValue(); 4938 4939 if (ArgVal.getNode()) 4940 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); 4941 } else { 4942 if (CallConv == CallingConv::Fast) 4943 ComputePtrOff(); 4944 4945 // Single-precision floating-point values are mapped to the 4946 // second (rightmost) word of the stack doubleword. 4947 if (Arg.getValueType() == MVT::f32 && 4948 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 4949 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4950 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4951 } 4952 4953 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4954 true, isTailCall, false, MemOpChains, 4955 TailCallArguments, dl); 4956 4957 NeededLoad = true; 4958 } 4959 // When passing an array of floats, the array occupies consecutive 4960 // space in the argument area; only round up to the next doubleword 4961 // at the end of the array. Otherwise, each float takes 8 bytes. 4962 if (CallConv != CallingConv::Fast || NeededLoad) { 4963 ArgOffset += (Arg.getValueType() == MVT::f32 && 4964 Flags.isInConsecutiveRegs()) ? 4 : 8; 4965 if (Flags.isInConsecutiveRegsLast()) 4966 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4967 } 4968 break; 4969 } 4970 case MVT::v4f32: 4971 case MVT::v4i32: 4972 case MVT::v8i16: 4973 case MVT::v16i8: 4974 case MVT::v2f64: 4975 case MVT::v2i64: 4976 if (!Subtarget.hasQPX()) { 4977 // These can be scalar arguments or elements of a vector array type 4978 // passed directly. The latter are used to implement ELFv2 homogenous 4979 // vector aggregates. 4980 4981 // For a varargs call, named arguments go into VRs or on the stack as 4982 // usual; unnamed arguments always go to the stack or the corresponding 4983 // GPRs when within range. For now, we always put the value in both 4984 // locations (or even all three). 4985 if (isVarArg) { 4986 // We could elide this store in the case where the object fits 4987 // entirely in R registers. Maybe later. 4988 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4989 MachinePointerInfo(), false, false, 0); 4990 MemOpChains.push_back(Store); 4991 if (VR_idx != NumVRs) { 4992 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4993 MachinePointerInfo(), 4994 false, false, false, 0); 4995 MemOpChains.push_back(Load.getValue(1)); 4996 4997 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 4998 Arg.getSimpleValueType() == MVT::v2i64) ? 4999 VSRH[VR_idx] : VR[VR_idx]; 5000 ++VR_idx; 5001 5002 RegsToPass.push_back(std::make_pair(VReg, Load)); 5003 } 5004 ArgOffset += 16; 5005 for (unsigned i=0; i<16; i+=PtrByteSize) { 5006 if (GPR_idx == NumGPRs) 5007 break; 5008 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5009 DAG.getConstant(i, PtrVT)); 5010 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 5011 false, false, false, 0); 5012 MemOpChains.push_back(Load.getValue(1)); 5013 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5014 } 5015 break; 5016 } 5017 5018 // Non-varargs Altivec params go into VRs or on the stack. 5019 if (VR_idx != NumVRs) { 5020 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 5021 Arg.getSimpleValueType() == MVT::v2i64) ? 5022 VSRH[VR_idx] : VR[VR_idx]; 5023 ++VR_idx; 5024 5025 RegsToPass.push_back(std::make_pair(VReg, Arg)); 5026 } else { 5027 if (CallConv == CallingConv::Fast) 5028 ComputePtrOff(); 5029 5030 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5031 true, isTailCall, true, MemOpChains, 5032 TailCallArguments, dl); 5033 if (CallConv == CallingConv::Fast) 5034 ArgOffset += 16; 5035 } 5036 5037 if (CallConv != CallingConv::Fast) 5038 ArgOffset += 16; 5039 break; 5040 } // not QPX 5041 5042 assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && 5043 "Invalid QPX parameter type"); 5044 5045 /* fall through */ 5046 case MVT::v4f64: 5047 case MVT::v4i1: { 5048 bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; 5049 if (isVarArg) { 5050 // We could elide this store in the case where the object fits 5051 // entirely in R registers. Maybe later. 5052 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 5053 MachinePointerInfo(), false, false, 0); 5054 MemOpChains.push_back(Store); 5055 if (QFPR_idx != NumQFPRs) { 5056 SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, 5057 Store, PtrOff, MachinePointerInfo(), 5058 false, false, false, 0); 5059 MemOpChains.push_back(Load.getValue(1)); 5060 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); 5061 } 5062 ArgOffset += (IsF32 ? 16 : 32); 5063 for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) { 5064 if (GPR_idx == NumGPRs) 5065 break; 5066 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5067 DAG.getConstant(i, PtrVT)); 5068 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 5069 false, false, false, 0); 5070 MemOpChains.push_back(Load.getValue(1)); 5071 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5072 } 5073 break; 5074 } 5075 5076 // Non-varargs QPX params go into registers or on the stack. 5077 if (QFPR_idx != NumQFPRs) { 5078 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); 5079 } else { 5080 if (CallConv == CallingConv::Fast) 5081 ComputePtrOff(); 5082 5083 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5084 true, isTailCall, true, MemOpChains, 5085 TailCallArguments, dl); 5086 if (CallConv == CallingConv::Fast) 5087 ArgOffset += (IsF32 ? 16 : 32); 5088 } 5089 5090 if (CallConv != CallingConv::Fast) 5091 ArgOffset += (IsF32 ? 16 : 32); 5092 break; 5093 } 5094 } 5095 } 5096 5097 assert(NumBytesActuallyUsed == ArgOffset); 5098 (void)NumBytesActuallyUsed; 5099 5100 if (!MemOpChains.empty()) 5101 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5102 5103 // Check if this is an indirect call (MTCTR/BCTRL). 5104 // See PrepareCall() for more information about calls through function 5105 // pointers in the 64-bit SVR4 ABI. 5106 if (!isTailCall && !IsPatchPoint && 5107 !isFunctionGlobalAddress(Callee) && 5108 !isa<ExternalSymbolSDNode>(Callee)) { 5109 // Load r2 into a virtual register and store it to the TOC save area. 5110 setUsesTOCBasePtr(DAG); 5111 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 5112 // TOC save area offset. 5113 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 5114 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset); 5115 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5116 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, 5117 MachinePointerInfo::getStack(TOCSaveOffset), 5118 false, false, 0); 5119 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 5120 // This does not mean the MTCTR instruction must use R12; it's easier 5121 // to model this as an extra parameter, so do that. 5122 if (isELFv2ABI && !IsPatchPoint) 5123 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 5124 } 5125 5126 // Build a sequence of copy-to-reg nodes chained together with token chain 5127 // and flag operands which copy the outgoing args into the appropriate regs. 5128 SDValue InFlag; 5129 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5130 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5131 RegsToPass[i].second, InFlag); 5132 InFlag = Chain.getValue(1); 5133 } 5134 5135 if (isTailCall) 5136 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, 5137 FPOp, true, TailCallArguments); 5138 5139 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 5140 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 5141 NumBytes, Ins, InVals, CS); 5142 } 5143 5144 SDValue 5145 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 5146 CallingConv::ID CallConv, bool isVarArg, 5147 bool isTailCall, bool IsPatchPoint, 5148 const SmallVectorImpl<ISD::OutputArg> &Outs, 5149 const SmallVectorImpl<SDValue> &OutVals, 5150 const SmallVectorImpl<ISD::InputArg> &Ins, 5151 SDLoc dl, SelectionDAG &DAG, 5152 SmallVectorImpl<SDValue> &InVals, 5153 ImmutableCallSite *CS) const { 5154 5155 unsigned NumOps = Outs.size(); 5156 5157 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5158 bool isPPC64 = PtrVT == MVT::i64; 5159 unsigned PtrByteSize = isPPC64 ? 8 : 4; 5160 5161 MachineFunction &MF = DAG.getMachineFunction(); 5162 5163 // Mark this function as potentially containing a function that contains a 5164 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5165 // and restoring the callers stack pointer in this functions epilog. This is 5166 // done because by tail calling the called function might overwrite the value 5167 // in this function's (MF) stack pointer stack slot 0(SP). 5168 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5169 CallConv == CallingConv::Fast) 5170 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5171 5172 // Count how many bytes are to be pushed on the stack, including the linkage 5173 // area, and parameter passing area. We start with 24/48 bytes, which is 5174 // prereserved space for [SP][CR][LR][3 x unused]. 5175 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 5176 unsigned NumBytes = LinkageSize; 5177 5178 // Add up all the space actually used. 5179 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 5180 // they all go in registers, but we must reserve stack space for them for 5181 // possible use by the caller. In varargs or 64-bit calls, parameters are 5182 // assigned stack space in order, with padding so Altivec parameters are 5183 // 16-byte aligned. 5184 unsigned nAltivecParamsAtEnd = 0; 5185 for (unsigned i = 0; i != NumOps; ++i) { 5186 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5187 EVT ArgVT = Outs[i].VT; 5188 // Varargs Altivec parameters are padded to a 16 byte boundary. 5189 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 5190 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 5191 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 5192 if (!isVarArg && !isPPC64) { 5193 // Non-varargs Altivec parameters go after all the non-Altivec 5194 // parameters; handle those later so we know how much padding we need. 5195 nAltivecParamsAtEnd++; 5196 continue; 5197 } 5198 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 5199 NumBytes = ((NumBytes+15)/16)*16; 5200 } 5201 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5202 } 5203 5204 // Allow for Altivec parameters at the end, if needed. 5205 if (nAltivecParamsAtEnd) { 5206 NumBytes = ((NumBytes+15)/16)*16; 5207 NumBytes += 16*nAltivecParamsAtEnd; 5208 } 5209 5210 // The prolog code of the callee may store up to 8 GPR argument registers to 5211 // the stack, allowing va_start to index over them in memory if its varargs. 5212 // Because we cannot tell if this is needed on the caller side, we have to 5213 // conservatively assume that it is needed. As such, make sure we have at 5214 // least enough stack space for the caller to store the 8 GPRs. 5215 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5216 5217 // Tail call needs the stack to be aligned. 5218 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5219 CallConv == CallingConv::Fast) 5220 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5221 5222 // Calculate by how many bytes the stack has to be adjusted in case of tail 5223 // call optimization. 5224 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5225 5226 // To protect arguments on the stack from being clobbered in a tail call, 5227 // force all the loads to happen before doing any other lowering. 5228 if (isTailCall) 5229 Chain = DAG.getStackArgumentTokenFactor(Chain); 5230 5231 // Adjust the stack pointer for the new arguments... 5232 // These operations are automatically eliminated by the prolog/epilog pass 5233 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 5234 dl); 5235 SDValue CallSeqStart = Chain; 5236 5237 // Load the return address and frame pointer so it can be move somewhere else 5238 // later. 5239 SDValue LROp, FPOp; 5240 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 5241 dl); 5242 5243 // Set up a copy of the stack pointer for use loading and storing any 5244 // arguments that may not fit in the registers available for argument 5245 // passing. 5246 SDValue StackPtr; 5247 if (isPPC64) 5248 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5249 else 5250 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 5251 5252 // Figure out which arguments are going to go in registers, and which in 5253 // memory. Also, if this is a vararg function, floating point operations 5254 // must be stored to our stack, and loaded into integer regs as well, if 5255 // any integer regs are available for argument passing. 5256 unsigned ArgOffset = LinkageSize; 5257 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5258 5259 static const MCPhysReg GPR_32[] = { // 32-bit registers. 5260 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 5261 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 5262 }; 5263 static const MCPhysReg GPR_64[] = { // 64-bit registers. 5264 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5265 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5266 }; 5267 static const MCPhysReg VR[] = { 5268 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5269 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5270 }; 5271 const unsigned NumGPRs = array_lengthof(GPR_32); 5272 const unsigned NumFPRs = 13; 5273 const unsigned NumVRs = array_lengthof(VR); 5274 5275 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 5276 5277 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5278 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5279 5280 SmallVector<SDValue, 8> MemOpChains; 5281 for (unsigned i = 0; i != NumOps; ++i) { 5282 SDValue Arg = OutVals[i]; 5283 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5284 5285 // PtrOff will be used to store the current argument to the stack if a 5286 // register cannot be found for it. 5287 SDValue PtrOff; 5288 5289 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 5290 5291 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5292 5293 // On PPC64, promote integers to 64-bit values. 5294 if (isPPC64 && Arg.getValueType() == MVT::i32) { 5295 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5296 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5297 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5298 } 5299 5300 // FIXME memcpy is used way more than necessary. Correctness first. 5301 // Note: "by value" is code for passing a structure by value, not 5302 // basic types. 5303 if (Flags.isByVal()) { 5304 unsigned Size = Flags.getByValSize(); 5305 // Very small objects are passed right-justified. Everything else is 5306 // passed left-justified. 5307 if (Size==1 || Size==2) { 5308 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 5309 if (GPR_idx != NumGPRs) { 5310 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5311 MachinePointerInfo(), VT, 5312 false, false, false, 0); 5313 MemOpChains.push_back(Load.getValue(1)); 5314 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5315 5316 ArgOffset += PtrByteSize; 5317 } else { 5318 SDValue Const = DAG.getConstant(PtrByteSize - Size, 5319 PtrOff.getValueType()); 5320 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5321 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5322 CallSeqStart, 5323 Flags, DAG, dl); 5324 ArgOffset += PtrByteSize; 5325 } 5326 continue; 5327 } 5328 // Copy entire object into memory. There are cases where gcc-generated 5329 // code assumes it is there, even if it could be put entirely into 5330 // registers. (This is not what the doc says.) 5331 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5332 CallSeqStart, 5333 Flags, DAG, dl); 5334 5335 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 5336 // copy the pieces of the object that fit into registers from the 5337 // parameter save area. 5338 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5339 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 5340 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5341 if (GPR_idx != NumGPRs) { 5342 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 5343 MachinePointerInfo(), 5344 false, false, false, 0); 5345 MemOpChains.push_back(Load.getValue(1)); 5346 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5347 ArgOffset += PtrByteSize; 5348 } else { 5349 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5350 break; 5351 } 5352 } 5353 continue; 5354 } 5355 5356 switch (Arg.getSimpleValueType().SimpleTy) { 5357 default: llvm_unreachable("Unexpected ValueType for argument!"); 5358 case MVT::i1: 5359 case MVT::i32: 5360 case MVT::i64: 5361 if (GPR_idx != NumGPRs) { 5362 if (Arg.getValueType() == MVT::i1) 5363 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 5364 5365 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5366 } else { 5367 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5368 isPPC64, isTailCall, false, MemOpChains, 5369 TailCallArguments, dl); 5370 } 5371 ArgOffset += PtrByteSize; 5372 break; 5373 case MVT::f32: 5374 case MVT::f64: 5375 if (FPR_idx != NumFPRs) { 5376 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5377 5378 if (isVarArg) { 5379 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 5380 MachinePointerInfo(), false, false, 0); 5381 MemOpChains.push_back(Store); 5382 5383 // Float varargs are always shadowed in available integer registers 5384 if (GPR_idx != NumGPRs) { 5385 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 5386 MachinePointerInfo(), false, false, 5387 false, 0); 5388 MemOpChains.push_back(Load.getValue(1)); 5389 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5390 } 5391 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 5392 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 5393 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5394 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 5395 MachinePointerInfo(), 5396 false, false, false, 0); 5397 MemOpChains.push_back(Load.getValue(1)); 5398 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5399 } 5400 } else { 5401 // If we have any FPRs remaining, we may also have GPRs remaining. 5402 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 5403 // GPRs. 5404 if (GPR_idx != NumGPRs) 5405 ++GPR_idx; 5406 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 5407 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 5408 ++GPR_idx; 5409 } 5410 } else 5411 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5412 isPPC64, isTailCall, false, MemOpChains, 5413 TailCallArguments, dl); 5414 if (isPPC64) 5415 ArgOffset += 8; 5416 else 5417 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 5418 break; 5419 case MVT::v4f32: 5420 case MVT::v4i32: 5421 case MVT::v8i16: 5422 case MVT::v16i8: 5423 if (isVarArg) { 5424 // These go aligned on the stack, or in the corresponding R registers 5425 // when within range. The Darwin PPC ABI doc claims they also go in 5426 // V registers; in fact gcc does this only for arguments that are 5427 // prototyped, not for those that match the ... We do it for all 5428 // arguments, seems to work. 5429 while (ArgOffset % 16 !=0) { 5430 ArgOffset += PtrByteSize; 5431 if (GPR_idx != NumGPRs) 5432 GPR_idx++; 5433 } 5434 // We could elide this store in the case where the object fits 5435 // entirely in R registers. Maybe later. 5436 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 5437 DAG.getConstant(ArgOffset, PtrVT)); 5438 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 5439 MachinePointerInfo(), false, false, 0); 5440 MemOpChains.push_back(Store); 5441 if (VR_idx != NumVRs) { 5442 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 5443 MachinePointerInfo(), 5444 false, false, false, 0); 5445 MemOpChains.push_back(Load.getValue(1)); 5446 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5447 } 5448 ArgOffset += 16; 5449 for (unsigned i=0; i<16; i+=PtrByteSize) { 5450 if (GPR_idx == NumGPRs) 5451 break; 5452 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5453 DAG.getConstant(i, PtrVT)); 5454 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 5455 false, false, false, 0); 5456 MemOpChains.push_back(Load.getValue(1)); 5457 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5458 } 5459 break; 5460 } 5461 5462 // Non-varargs Altivec params generally go in registers, but have 5463 // stack space allocated at the end. 5464 if (VR_idx != NumVRs) { 5465 // Doesn't have GPR space allocated. 5466 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5467 } else if (nAltivecParamsAtEnd==0) { 5468 // We are emitting Altivec params in order. 5469 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5470 isPPC64, isTailCall, true, MemOpChains, 5471 TailCallArguments, dl); 5472 ArgOffset += 16; 5473 } 5474 break; 5475 } 5476 } 5477 // If all Altivec parameters fit in registers, as they usually do, 5478 // they get stack space following the non-Altivec parameters. We 5479 // don't track this here because nobody below needs it. 5480 // If there are more Altivec parameters than fit in registers emit 5481 // the stores here. 5482 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 5483 unsigned j = 0; 5484 // Offset is aligned; skip 1st 12 params which go in V registers. 5485 ArgOffset = ((ArgOffset+15)/16)*16; 5486 ArgOffset += 12*16; 5487 for (unsigned i = 0; i != NumOps; ++i) { 5488 SDValue Arg = OutVals[i]; 5489 EVT ArgType = Outs[i].VT; 5490 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 5491 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 5492 if (++j > NumVRs) { 5493 SDValue PtrOff; 5494 // We are emitting Altivec params in order. 5495 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5496 isPPC64, isTailCall, true, MemOpChains, 5497 TailCallArguments, dl); 5498 ArgOffset += 16; 5499 } 5500 } 5501 } 5502 } 5503 5504 if (!MemOpChains.empty()) 5505 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5506 5507 // On Darwin, R12 must contain the address of an indirect callee. This does 5508 // not mean the MTCTR instruction must use R12; it's easier to model this as 5509 // an extra parameter, so do that. 5510 if (!isTailCall && 5511 !isFunctionGlobalAddress(Callee) && 5512 !isa<ExternalSymbolSDNode>(Callee) && 5513 !isBLACompatibleAddress(Callee, DAG)) 5514 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 5515 PPC::R12), Callee)); 5516 5517 // Build a sequence of copy-to-reg nodes chained together with token chain 5518 // and flag operands which copy the outgoing args into the appropriate regs. 5519 SDValue InFlag; 5520 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5521 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5522 RegsToPass[i].second, InFlag); 5523 InFlag = Chain.getValue(1); 5524 } 5525 5526 if (isTailCall) 5527 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 5528 FPOp, true, TailCallArguments); 5529 5530 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 5531 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 5532 NumBytes, Ins, InVals, CS); 5533 } 5534 5535 bool 5536 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 5537 MachineFunction &MF, bool isVarArg, 5538 const SmallVectorImpl<ISD::OutputArg> &Outs, 5539 LLVMContext &Context) const { 5540 SmallVector<CCValAssign, 16> RVLocs; 5541 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 5542 return CCInfo.CheckReturn(Outs, RetCC_PPC); 5543 } 5544 5545 SDValue 5546 PPCTargetLowering::LowerReturn(SDValue Chain, 5547 CallingConv::ID CallConv, bool isVarArg, 5548 const SmallVectorImpl<ISD::OutputArg> &Outs, 5549 const SmallVectorImpl<SDValue> &OutVals, 5550 SDLoc dl, SelectionDAG &DAG) const { 5551 5552 SmallVector<CCValAssign, 16> RVLocs; 5553 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 5554 *DAG.getContext()); 5555 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 5556 5557 SDValue Flag; 5558 SmallVector<SDValue, 4> RetOps(1, Chain); 5559 5560 // Copy the result values into the output registers. 5561 for (unsigned i = 0; i != RVLocs.size(); ++i) { 5562 CCValAssign &VA = RVLocs[i]; 5563 assert(VA.isRegLoc() && "Can only return in registers!"); 5564 5565 SDValue Arg = OutVals[i]; 5566 5567 switch (VA.getLocInfo()) { 5568 default: llvm_unreachable("Unknown loc info!"); 5569 case CCValAssign::Full: break; 5570 case CCValAssign::AExt: 5571 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 5572 break; 5573 case CCValAssign::ZExt: 5574 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 5575 break; 5576 case CCValAssign::SExt: 5577 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 5578 break; 5579 } 5580 5581 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 5582 Flag = Chain.getValue(1); 5583 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 5584 } 5585 5586 RetOps[0] = Chain; // Update chain. 5587 5588 // Add the flag if we have it. 5589 if (Flag.getNode()) 5590 RetOps.push_back(Flag); 5591 5592 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 5593 } 5594 5595 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 5596 const PPCSubtarget &Subtarget) const { 5597 // When we pop the dynamic allocation we need to restore the SP link. 5598 SDLoc dl(Op); 5599 5600 // Get the corect type for pointers. 5601 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5602 5603 // Construct the stack pointer operand. 5604 bool isPPC64 = Subtarget.isPPC64(); 5605 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 5606 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 5607 5608 // Get the operands for the STACKRESTORE. 5609 SDValue Chain = Op.getOperand(0); 5610 SDValue SaveSP = Op.getOperand(1); 5611 5612 // Load the old link SP. 5613 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 5614 MachinePointerInfo(), 5615 false, false, false, 0); 5616 5617 // Restore the stack pointer. 5618 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 5619 5620 // Store the old link SP. 5621 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 5622 false, false, 0); 5623 } 5624 5625 5626 5627 SDValue 5628 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 5629 MachineFunction &MF = DAG.getMachineFunction(); 5630 bool isPPC64 = Subtarget.isPPC64(); 5631 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5632 5633 // Get current frame pointer save index. The users of this index will be 5634 // primarily DYNALLOC instructions. 5635 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 5636 int RASI = FI->getReturnAddrSaveIndex(); 5637 5638 // If the frame pointer save index hasn't been defined yet. 5639 if (!RASI) { 5640 // Find out what the fix offset of the frame pointer save area. 5641 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); 5642 // Allocate the frame index for frame pointer save area. 5643 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false); 5644 // Save the result. 5645 FI->setReturnAddrSaveIndex(RASI); 5646 } 5647 return DAG.getFrameIndex(RASI, PtrVT); 5648 } 5649 5650 SDValue 5651 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 5652 MachineFunction &MF = DAG.getMachineFunction(); 5653 bool isPPC64 = Subtarget.isPPC64(); 5654 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5655 5656 // Get current frame pointer save index. The users of this index will be 5657 // primarily DYNALLOC instructions. 5658 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 5659 int FPSI = FI->getFramePointerSaveIndex(); 5660 5661 // If the frame pointer save index hasn't been defined yet. 5662 if (!FPSI) { 5663 // Find out what the fix offset of the frame pointer save area. 5664 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); 5665 // Allocate the frame index for frame pointer save area. 5666 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 5667 // Save the result. 5668 FI->setFramePointerSaveIndex(FPSI); 5669 } 5670 return DAG.getFrameIndex(FPSI, PtrVT); 5671 } 5672 5673 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 5674 SelectionDAG &DAG, 5675 const PPCSubtarget &Subtarget) const { 5676 // Get the inputs. 5677 SDValue Chain = Op.getOperand(0); 5678 SDValue Size = Op.getOperand(1); 5679 SDLoc dl(Op); 5680 5681 // Get the corect type for pointers. 5682 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5683 // Negate the size. 5684 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 5685 DAG.getConstant(0, PtrVT), Size); 5686 // Construct a node for the frame pointer save index. 5687 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 5688 // Build a DYNALLOC node. 5689 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 5690 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 5691 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 5692 } 5693 5694 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 5695 SelectionDAG &DAG) const { 5696 SDLoc DL(Op); 5697 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 5698 DAG.getVTList(MVT::i32, MVT::Other), 5699 Op.getOperand(0), Op.getOperand(1)); 5700 } 5701 5702 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 5703 SelectionDAG &DAG) const { 5704 SDLoc DL(Op); 5705 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 5706 Op.getOperand(0), Op.getOperand(1)); 5707 } 5708 5709 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 5710 if (Op.getValueType().isVector()) 5711 return LowerVectorLoad(Op, DAG); 5712 5713 assert(Op.getValueType() == MVT::i1 && 5714 "Custom lowering only for i1 loads"); 5715 5716 // First, load 8 bits into 32 bits, then truncate to 1 bit. 5717 5718 SDLoc dl(Op); 5719 LoadSDNode *LD = cast<LoadSDNode>(Op); 5720 5721 SDValue Chain = LD->getChain(); 5722 SDValue BasePtr = LD->getBasePtr(); 5723 MachineMemOperand *MMO = LD->getMemOperand(); 5724 5725 SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, 5726 BasePtr, MVT::i8, MMO); 5727 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 5728 5729 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 5730 return DAG.getMergeValues(Ops, dl); 5731 } 5732 5733 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 5734 if (Op.getOperand(1).getValueType().isVector()) 5735 return LowerVectorStore(Op, DAG); 5736 5737 assert(Op.getOperand(1).getValueType() == MVT::i1 && 5738 "Custom lowering only for i1 stores"); 5739 5740 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 5741 5742 SDLoc dl(Op); 5743 StoreSDNode *ST = cast<StoreSDNode>(Op); 5744 5745 SDValue Chain = ST->getChain(); 5746 SDValue BasePtr = ST->getBasePtr(); 5747 SDValue Value = ST->getValue(); 5748 MachineMemOperand *MMO = ST->getMemOperand(); 5749 5750 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); 5751 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 5752 } 5753 5754 // FIXME: Remove this once the ANDI glue bug is fixed: 5755 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 5756 assert(Op.getValueType() == MVT::i1 && 5757 "Custom lowering only for i1 results"); 5758 5759 SDLoc DL(Op); 5760 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 5761 Op.getOperand(0)); 5762 } 5763 5764 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 5765 /// possible. 5766 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 5767 // Not FP? Not a fsel. 5768 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 5769 !Op.getOperand(2).getValueType().isFloatingPoint()) 5770 return Op; 5771 5772 // We might be able to do better than this under some circumstances, but in 5773 // general, fsel-based lowering of select is a finite-math-only optimization. 5774 // For more information, see section F.3 of the 2.06 ISA specification. 5775 if (!DAG.getTarget().Options.NoInfsFPMath || 5776 !DAG.getTarget().Options.NoNaNsFPMath) 5777 return Op; 5778 5779 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 5780 5781 EVT ResVT = Op.getValueType(); 5782 EVT CmpVT = Op.getOperand(0).getValueType(); 5783 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5784 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 5785 SDLoc dl(Op); 5786 5787 // If the RHS of the comparison is a 0.0, we don't need to do the 5788 // subtraction at all. 5789 SDValue Sel1; 5790 if (isFloatingPointZero(RHS)) 5791 switch (CC) { 5792 default: break; // SETUO etc aren't handled by fsel. 5793 case ISD::SETNE: 5794 std::swap(TV, FV); 5795 case ISD::SETEQ: 5796 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5797 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5798 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 5799 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 5800 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 5801 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5802 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 5803 case ISD::SETULT: 5804 case ISD::SETLT: 5805 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 5806 case ISD::SETOGE: 5807 case ISD::SETGE: 5808 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5809 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5810 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 5811 case ISD::SETUGT: 5812 case ISD::SETGT: 5813 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 5814 case ISD::SETOLE: 5815 case ISD::SETLE: 5816 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5817 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5818 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5819 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 5820 } 5821 5822 SDValue Cmp; 5823 switch (CC) { 5824 default: break; // SETUO etc aren't handled by fsel. 5825 case ISD::SETNE: 5826 std::swap(TV, FV); 5827 case ISD::SETEQ: 5828 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5829 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5830 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5831 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5832 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 5833 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 5834 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5835 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 5836 case ISD::SETULT: 5837 case ISD::SETLT: 5838 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5839 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5840 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5841 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 5842 case ISD::SETOGE: 5843 case ISD::SETGE: 5844 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5845 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5846 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5847 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5848 case ISD::SETUGT: 5849 case ISD::SETGT: 5850 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 5851 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5852 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5853 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 5854 case ISD::SETOLE: 5855 case ISD::SETLE: 5856 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 5857 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5858 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5859 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5860 } 5861 return Op; 5862 } 5863 5864 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 5865 SelectionDAG &DAG, 5866 SDLoc dl) const { 5867 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 5868 SDValue Src = Op.getOperand(0); 5869 if (Src.getValueType() == MVT::f32) 5870 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 5871 5872 SDValue Tmp; 5873 switch (Op.getSimpleValueType().SimpleTy) { 5874 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 5875 case MVT::i32: 5876 Tmp = DAG.getNode( 5877 Op.getOpcode() == ISD::FP_TO_SINT 5878 ? PPCISD::FCTIWZ 5879 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 5880 dl, MVT::f64, Src); 5881 break; 5882 case MVT::i64: 5883 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 5884 "i64 FP_TO_UINT is supported only with FPCVT"); 5885 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 5886 PPCISD::FCTIDUZ, 5887 dl, MVT::f64, Src); 5888 break; 5889 } 5890 5891 // Convert the FP value to an int value through memory. 5892 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 5893 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 5894 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 5895 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 5896 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); 5897 5898 // Emit a store to the stack slot. 5899 SDValue Chain; 5900 if (i32Stack) { 5901 MachineFunction &MF = DAG.getMachineFunction(); 5902 MachineMemOperand *MMO = 5903 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 5904 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 5905 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 5906 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 5907 } else 5908 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 5909 MPI, false, false, 0); 5910 5911 // Result is a load from the stack slot. If loading 4 bytes, make sure to 5912 // add in a bias. 5913 if (Op.getValueType() == MVT::i32 && !i32Stack) { 5914 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 5915 DAG.getConstant(4, FIPtr.getValueType())); 5916 MPI = MPI.getWithOffset(4); 5917 } 5918 5919 RLI.Chain = Chain; 5920 RLI.Ptr = FIPtr; 5921 RLI.MPI = MPI; 5922 } 5923 5924 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 5925 SDLoc dl) const { 5926 ReuseLoadInfo RLI; 5927 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 5928 5929 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, 5930 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, 5931 RLI.Ranges); 5932 } 5933 5934 // We're trying to insert a regular store, S, and then a load, L. If the 5935 // incoming value, O, is a load, we might just be able to have our load use the 5936 // address used by O. However, we don't know if anything else will store to 5937 // that address before we can load from it. To prevent this situation, we need 5938 // to insert our load, L, into the chain as a peer of O. To do this, we give L 5939 // the same chain operand as O, we create a token factor from the chain results 5940 // of O and L, and we replace all uses of O's chain result with that token 5941 // factor (see spliceIntoChain below for this last part). 5942 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, 5943 ReuseLoadInfo &RLI, 5944 SelectionDAG &DAG, 5945 ISD::LoadExtType ET) const { 5946 SDLoc dl(Op); 5947 if (ET == ISD::NON_EXTLOAD && 5948 (Op.getOpcode() == ISD::FP_TO_UINT || 5949 Op.getOpcode() == ISD::FP_TO_SINT) && 5950 isOperationLegalOrCustom(Op.getOpcode(), 5951 Op.getOperand(0).getValueType())) { 5952 5953 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 5954 return true; 5955 } 5956 5957 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); 5958 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || 5959 LD->isNonTemporal()) 5960 return false; 5961 if (LD->getMemoryVT() != MemVT) 5962 return false; 5963 5964 RLI.Ptr = LD->getBasePtr(); 5965 if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) { 5966 assert(LD->getAddressingMode() == ISD::PRE_INC && 5967 "Non-pre-inc AM on PPC?"); 5968 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, 5969 LD->getOffset()); 5970 } 5971 5972 RLI.Chain = LD->getChain(); 5973 RLI.MPI = LD->getPointerInfo(); 5974 RLI.IsInvariant = LD->isInvariant(); 5975 RLI.Alignment = LD->getAlignment(); 5976 RLI.AAInfo = LD->getAAInfo(); 5977 RLI.Ranges = LD->getRanges(); 5978 5979 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); 5980 return true; 5981 } 5982 5983 // Given the head of the old chain, ResChain, insert a token factor containing 5984 // it and NewResChain, and make users of ResChain now be users of that token 5985 // factor. 5986 void PPCTargetLowering::spliceIntoChain(SDValue ResChain, 5987 SDValue NewResChain, 5988 SelectionDAG &DAG) const { 5989 if (!ResChain) 5990 return; 5991 5992 SDLoc dl(NewResChain); 5993 5994 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 5995 NewResChain, DAG.getUNDEF(MVT::Other)); 5996 assert(TF.getNode() != NewResChain.getNode() && 5997 "A new TF really is required here"); 5998 5999 DAG.ReplaceAllUsesOfValueWith(ResChain, TF); 6000 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); 6001 } 6002 6003 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 6004 SelectionDAG &DAG) const { 6005 SDLoc dl(Op); 6006 6007 if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { 6008 if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) 6009 return SDValue(); 6010 6011 SDValue Value = Op.getOperand(0); 6012 // The values are now known to be -1 (false) or 1 (true). To convert this 6013 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 6014 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 6015 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 6016 6017 SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64); 6018 FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, 6019 FPHalfs, FPHalfs, FPHalfs, FPHalfs); 6020 6021 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 6022 6023 if (Op.getValueType() != MVT::v4f64) 6024 Value = DAG.getNode(ISD::FP_ROUND, dl, 6025 Op.getValueType(), Value, DAG.getIntPtrConstant(1)); 6026 return Value; 6027 } 6028 6029 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 6030 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 6031 return SDValue(); 6032 6033 if (Op.getOperand(0).getValueType() == MVT::i1) 6034 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 6035 DAG.getConstantFP(1.0, Op.getValueType()), 6036 DAG.getConstantFP(0.0, Op.getValueType())); 6037 6038 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 6039 "UINT_TO_FP is supported only with FPCVT"); 6040 6041 // If we have FCFIDS, then use it when converting to single-precision. 6042 // Otherwise, convert to double-precision and then round. 6043 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6044 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 6045 : PPCISD::FCFIDS) 6046 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 6047 : PPCISD::FCFID); 6048 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6049 ? MVT::f32 6050 : MVT::f64; 6051 6052 if (Op.getOperand(0).getValueType() == MVT::i64) { 6053 SDValue SINT = Op.getOperand(0); 6054 // When converting to single-precision, we actually need to convert 6055 // to double-precision first and then round to single-precision. 6056 // To avoid double-rounding effects during that operation, we have 6057 // to prepare the input operand. Bits that might be truncated when 6058 // converting to double-precision are replaced by a bit that won't 6059 // be lost at this stage, but is below the single-precision rounding 6060 // position. 6061 // 6062 // However, if -enable-unsafe-fp-math is in effect, accept double 6063 // rounding to avoid the extra overhead. 6064 if (Op.getValueType() == MVT::f32 && 6065 !Subtarget.hasFPCVT() && 6066 !DAG.getTarget().Options.UnsafeFPMath) { 6067 6068 // Twiddle input to make sure the low 11 bits are zero. (If this 6069 // is the case, we are guaranteed the value will fit into the 53 bit 6070 // mantissa of an IEEE double-precision value without rounding.) 6071 // If any of those low 11 bits were not zero originally, make sure 6072 // bit 12 (value 2048) is set instead, so that the final rounding 6073 // to single-precision gets the correct result. 6074 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6075 SINT, DAG.getConstant(2047, MVT::i64)); 6076 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 6077 Round, DAG.getConstant(2047, MVT::i64)); 6078 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 6079 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6080 Round, DAG.getConstant(-2048, MVT::i64)); 6081 6082 // However, we cannot use that value unconditionally: if the magnitude 6083 // of the input value is small, the bit-twiddling we did above might 6084 // end up visibly changing the output. Fortunately, in that case, we 6085 // don't need to twiddle bits since the original input will convert 6086 // exactly to double-precision floating-point already. Therefore, 6087 // construct a conditional to use the original value if the top 11 6088 // bits are all sign-bit copies, and use the rounded value computed 6089 // above otherwise. 6090 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 6091 SINT, DAG.getConstant(53, MVT::i32)); 6092 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 6093 Cond, DAG.getConstant(1, MVT::i64)); 6094 Cond = DAG.getSetCC(dl, MVT::i32, 6095 Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); 6096 6097 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 6098 } 6099 6100 ReuseLoadInfo RLI; 6101 SDValue Bits; 6102 6103 MachineFunction &MF = DAG.getMachineFunction(); 6104 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { 6105 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, 6106 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, 6107 RLI.Ranges); 6108 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6109 } else if (Subtarget.hasLFIWAX() && 6110 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { 6111 MachineMemOperand *MMO = 6112 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6113 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6114 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6115 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, 6116 DAG.getVTList(MVT::f64, MVT::Other), 6117 Ops, MVT::i32, MMO); 6118 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6119 } else if (Subtarget.hasFPCVT() && 6120 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { 6121 MachineMemOperand *MMO = 6122 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6123 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6124 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6125 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, 6126 DAG.getVTList(MVT::f64, MVT::Other), 6127 Ops, MVT::i32, MMO); 6128 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6129 } else if (((Subtarget.hasLFIWAX() && 6130 SINT.getOpcode() == ISD::SIGN_EXTEND) || 6131 (Subtarget.hasFPCVT() && 6132 SINT.getOpcode() == ISD::ZERO_EXTEND)) && 6133 SINT.getOperand(0).getValueType() == MVT::i32) { 6134 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 6135 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 6136 6137 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 6138 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6139 6140 SDValue Store = 6141 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, 6142 MachinePointerInfo::getFixedStack(FrameIdx), 6143 false, false, 0); 6144 6145 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6146 "Expected an i32 store"); 6147 6148 RLI.Ptr = FIdx; 6149 RLI.Chain = Store; 6150 RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); 6151 RLI.Alignment = 4; 6152 6153 MachineMemOperand *MMO = 6154 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6155 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6156 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6157 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? 6158 PPCISD::LFIWZX : PPCISD::LFIWAX, 6159 dl, DAG.getVTList(MVT::f64, MVT::Other), 6160 Ops, MVT::i32, MMO); 6161 } else 6162 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 6163 6164 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 6165 6166 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6167 FP = DAG.getNode(ISD::FP_ROUND, dl, 6168 MVT::f32, FP, DAG.getIntPtrConstant(0)); 6169 return FP; 6170 } 6171 6172 assert(Op.getOperand(0).getValueType() == MVT::i32 && 6173 "Unhandled INT_TO_FP type in custom expander!"); 6174 // Since we only generate this in 64-bit mode, we can take advantage of 6175 // 64-bit registers. In particular, sign extend the input value into the 6176 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 6177 // then lfd it and fcfid it. 6178 MachineFunction &MF = DAG.getMachineFunction(); 6179 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 6180 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 6181 6182 SDValue Ld; 6183 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 6184 ReuseLoadInfo RLI; 6185 bool ReusingLoad; 6186 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, 6187 DAG))) { 6188 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 6189 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6190 6191 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 6192 MachinePointerInfo::getFixedStack(FrameIdx), 6193 false, false, 0); 6194 6195 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6196 "Expected an i32 store"); 6197 6198 RLI.Ptr = FIdx; 6199 RLI.Chain = Store; 6200 RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); 6201 RLI.Alignment = 4; 6202 } 6203 6204 MachineMemOperand *MMO = 6205 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6206 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6207 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6208 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 6209 PPCISD::LFIWZX : PPCISD::LFIWAX, 6210 dl, DAG.getVTList(MVT::f64, MVT::Other), 6211 Ops, MVT::i32, MMO); 6212 if (ReusingLoad) 6213 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); 6214 } else { 6215 assert(Subtarget.isPPC64() && 6216 "i32->FP without LFIWAX supported only on PPC64"); 6217 6218 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 6219 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6220 6221 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 6222 Op.getOperand(0)); 6223 6224 // STD the extended value into the stack slot. 6225 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, 6226 MachinePointerInfo::getFixedStack(FrameIdx), 6227 false, false, 0); 6228 6229 // Load the value as a double. 6230 Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, 6231 MachinePointerInfo::getFixedStack(FrameIdx), 6232 false, false, false, 0); 6233 } 6234 6235 // FCFID it and return it. 6236 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 6237 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6238 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); 6239 return FP; 6240 } 6241 6242 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 6243 SelectionDAG &DAG) const { 6244 SDLoc dl(Op); 6245 /* 6246 The rounding mode is in bits 30:31 of FPSR, and has the following 6247 settings: 6248 00 Round to nearest 6249 01 Round to 0 6250 10 Round to +inf 6251 11 Round to -inf 6252 6253 FLT_ROUNDS, on the other hand, expects the following: 6254 -1 Undefined 6255 0 Round to 0 6256 1 Round to nearest 6257 2 Round to +inf 6258 3 Round to -inf 6259 6260 To perform the conversion, we do: 6261 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 6262 */ 6263 6264 MachineFunction &MF = DAG.getMachineFunction(); 6265 EVT VT = Op.getValueType(); 6266 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 6267 6268 // Save FP Control Word to register 6269 EVT NodeTys[] = { 6270 MVT::f64, // return register 6271 MVT::Glue // unused in this context 6272 }; 6273 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 6274 6275 // Save FP register to stack slot 6276 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 6277 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 6278 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 6279 StackSlot, MachinePointerInfo(), false, false,0); 6280 6281 // Load FP Control Word from low 32 bits of stack slot. 6282 SDValue Four = DAG.getConstant(4, PtrVT); 6283 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 6284 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 6285 false, false, false, 0); 6286 6287 // Transform as necessary 6288 SDValue CWD1 = 6289 DAG.getNode(ISD::AND, dl, MVT::i32, 6290 CWD, DAG.getConstant(3, MVT::i32)); 6291 SDValue CWD2 = 6292 DAG.getNode(ISD::SRL, dl, MVT::i32, 6293 DAG.getNode(ISD::AND, dl, MVT::i32, 6294 DAG.getNode(ISD::XOR, dl, MVT::i32, 6295 CWD, DAG.getConstant(3, MVT::i32)), 6296 DAG.getConstant(3, MVT::i32)), 6297 DAG.getConstant(1, MVT::i32)); 6298 6299 SDValue RetVal = 6300 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 6301 6302 return DAG.getNode((VT.getSizeInBits() < 16 ? 6303 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 6304 } 6305 6306 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 6307 EVT VT = Op.getValueType(); 6308 unsigned BitWidth = VT.getSizeInBits(); 6309 SDLoc dl(Op); 6310 assert(Op.getNumOperands() == 3 && 6311 VT == Op.getOperand(1).getValueType() && 6312 "Unexpected SHL!"); 6313 6314 // Expand into a bunch of logical ops. Note that these ops 6315 // depend on the PPC behavior for oversized shift amounts. 6316 SDValue Lo = Op.getOperand(0); 6317 SDValue Hi = Op.getOperand(1); 6318 SDValue Amt = Op.getOperand(2); 6319 EVT AmtVT = Amt.getValueType(); 6320 6321 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6322 DAG.getConstant(BitWidth, AmtVT), Amt); 6323 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 6324 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 6325 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 6326 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6327 DAG.getConstant(-BitWidth, AmtVT)); 6328 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 6329 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 6330 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 6331 SDValue OutOps[] = { OutLo, OutHi }; 6332 return DAG.getMergeValues(OutOps, dl); 6333 } 6334 6335 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 6336 EVT VT = Op.getValueType(); 6337 SDLoc dl(Op); 6338 unsigned BitWidth = VT.getSizeInBits(); 6339 assert(Op.getNumOperands() == 3 && 6340 VT == Op.getOperand(1).getValueType() && 6341 "Unexpected SRL!"); 6342 6343 // Expand into a bunch of logical ops. Note that these ops 6344 // depend on the PPC behavior for oversized shift amounts. 6345 SDValue Lo = Op.getOperand(0); 6346 SDValue Hi = Op.getOperand(1); 6347 SDValue Amt = Op.getOperand(2); 6348 EVT AmtVT = Amt.getValueType(); 6349 6350 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6351 DAG.getConstant(BitWidth, AmtVT), Amt); 6352 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 6353 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 6354 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 6355 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6356 DAG.getConstant(-BitWidth, AmtVT)); 6357 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 6358 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 6359 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 6360 SDValue OutOps[] = { OutLo, OutHi }; 6361 return DAG.getMergeValues(OutOps, dl); 6362 } 6363 6364 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 6365 SDLoc dl(Op); 6366 EVT VT = Op.getValueType(); 6367 unsigned BitWidth = VT.getSizeInBits(); 6368 assert(Op.getNumOperands() == 3 && 6369 VT == Op.getOperand(1).getValueType() && 6370 "Unexpected SRA!"); 6371 6372 // Expand into a bunch of logical ops, followed by a select_cc. 6373 SDValue Lo = Op.getOperand(0); 6374 SDValue Hi = Op.getOperand(1); 6375 SDValue Amt = Op.getOperand(2); 6376 EVT AmtVT = Amt.getValueType(); 6377 6378 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6379 DAG.getConstant(BitWidth, AmtVT), Amt); 6380 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 6381 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 6382 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 6383 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6384 DAG.getConstant(-BitWidth, AmtVT)); 6385 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 6386 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 6387 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), 6388 Tmp4, Tmp6, ISD::SETLE); 6389 SDValue OutOps[] = { OutLo, OutHi }; 6390 return DAG.getMergeValues(OutOps, dl); 6391 } 6392 6393 //===----------------------------------------------------------------------===// 6394 // Vector related lowering. 6395 // 6396 6397 /// BuildSplatI - Build a canonical splati of Val with an element size of 6398 /// SplatSize. Cast the result to VT. 6399 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 6400 SelectionDAG &DAG, SDLoc dl) { 6401 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 6402 6403 static const MVT VTys[] = { // canonical VT to use for each size. 6404 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 6405 }; 6406 6407 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 6408 6409 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 6410 if (Val == -1) 6411 SplatSize = 1; 6412 6413 EVT CanonicalVT = VTys[SplatSize-1]; 6414 6415 // Build a canonical splat for this value. 6416 SDValue Elt = DAG.getConstant(Val, MVT::i32); 6417 SmallVector<SDValue, 8> Ops; 6418 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 6419 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops); 6420 return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); 6421 } 6422 6423 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the 6424 /// specified intrinsic ID. 6425 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, 6426 SelectionDAG &DAG, SDLoc dl, 6427 EVT DestVT = MVT::Other) { 6428 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 6429 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 6430 DAG.getConstant(IID, MVT::i32), Op); 6431 } 6432 6433 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 6434 /// specified intrinsic ID. 6435 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 6436 SelectionDAG &DAG, SDLoc dl, 6437 EVT DestVT = MVT::Other) { 6438 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 6439 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 6440 DAG.getConstant(IID, MVT::i32), LHS, RHS); 6441 } 6442 6443 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 6444 /// specified intrinsic ID. 6445 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 6446 SDValue Op2, SelectionDAG &DAG, 6447 SDLoc dl, EVT DestVT = MVT::Other) { 6448 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 6449 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 6450 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 6451 } 6452 6453 6454 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 6455 /// amount. The result has the specified value type. 6456 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 6457 EVT VT, SelectionDAG &DAG, SDLoc dl) { 6458 // Force LHS/RHS to be the right type. 6459 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 6460 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 6461 6462 int Ops[16]; 6463 for (unsigned i = 0; i != 16; ++i) 6464 Ops[i] = i + Amt; 6465 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 6466 return DAG.getNode(ISD::BITCAST, dl, VT, T); 6467 } 6468 6469 // If this is a case we can't handle, return null and let the default 6470 // expansion code take care of it. If we CAN select this case, and if it 6471 // selects to a single instruction, return Op. Otherwise, if we can codegen 6472 // this case more efficiently than a constant pool load, lower it to the 6473 // sequence of ops that should be used. 6474 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 6475 SelectionDAG &DAG) const { 6476 SDLoc dl(Op); 6477 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 6478 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 6479 6480 if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { 6481 // We first build an i32 vector, load it into a QPX register, 6482 // then convert it to a floating-point vector and compare it 6483 // to a zero vector to get the boolean result. 6484 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 6485 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 6486 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); 6487 EVT PtrVT = getPointerTy(); 6488 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6489 6490 assert(BVN->getNumOperands() == 4 && 6491 "BUILD_VECTOR for v4i1 does not have 4 operands"); 6492 6493 bool IsConst = true; 6494 for (unsigned i = 0; i < 4; ++i) { 6495 if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue; 6496 if (!isa<ConstantSDNode>(BVN->getOperand(i))) { 6497 IsConst = false; 6498 break; 6499 } 6500 } 6501 6502 if (IsConst) { 6503 Constant *One = 6504 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); 6505 Constant *NegOne = 6506 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); 6507 6508 SmallVector<Constant*, 4> CV(4, NegOne); 6509 for (unsigned i = 0; i < 4; ++i) { 6510 if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) 6511 CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); 6512 else if (cast<ConstantSDNode>(BVN->getOperand(i))-> 6513 getConstantIntValue()->isZero()) 6514 continue; 6515 else 6516 CV[i] = One; 6517 } 6518 6519 Constant *CP = ConstantVector::get(CV); 6520 SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(), 6521 16 /* alignment */); 6522 6523 SmallVector<SDValue, 2> Ops; 6524 Ops.push_back(DAG.getEntryNode()); 6525 Ops.push_back(CPIdx); 6526 6527 SmallVector<EVT, 2> ValueVTs; 6528 ValueVTs.push_back(MVT::v4i1); 6529 ValueVTs.push_back(MVT::Other); // chain 6530 SDVTList VTs = DAG.getVTList(ValueVTs); 6531 6532 return DAG.getMemIntrinsicNode(PPCISD::QVLFSb, 6533 dl, VTs, Ops, MVT::v4f32, 6534 MachinePointerInfo::getConstantPool()); 6535 } 6536 6537 SmallVector<SDValue, 4> Stores; 6538 for (unsigned i = 0; i < 4; ++i) { 6539 if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue; 6540 6541 unsigned Offset = 4*i; 6542 SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType()); 6543 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 6544 6545 unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); 6546 if (StoreSize > 4) { 6547 Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, 6548 BVN->getOperand(i), Idx, 6549 PtrInfo.getWithOffset(Offset), 6550 MVT::i32, false, false, 0)); 6551 } else { 6552 SDValue StoreValue = BVN->getOperand(i); 6553 if (StoreSize < 4) 6554 StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); 6555 6556 Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, 6557 StoreValue, Idx, 6558 PtrInfo.getWithOffset(Offset), 6559 false, false, 0)); 6560 } 6561 } 6562 6563 SDValue StoreChain; 6564 if (!Stores.empty()) 6565 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 6566 else 6567 StoreChain = DAG.getEntryNode(); 6568 6569 // Now load from v4i32 into the QPX register; this will extend it to 6570 // v4i64 but not yet convert it to a floating point. Nevertheless, this 6571 // is typed as v4f64 because the QPX register integer states are not 6572 // explicitly represented. 6573 6574 SmallVector<SDValue, 2> Ops; 6575 Ops.push_back(StoreChain); 6576 Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, MVT::i32)); 6577 Ops.push_back(FIdx); 6578 6579 SmallVector<EVT, 2> ValueVTs; 6580 ValueVTs.push_back(MVT::v4f64); 6581 ValueVTs.push_back(MVT::Other); // chain 6582 SDVTList VTs = DAG.getVTList(ValueVTs); 6583 6584 SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, 6585 dl, VTs, Ops, MVT::v4i32, PtrInfo); 6586 LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 6587 DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, MVT::i32), 6588 LoadedVect); 6589 6590 SDValue FPZeros = DAG.getConstantFP(0.0, MVT::f64); 6591 FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, 6592 FPZeros, FPZeros, FPZeros, FPZeros); 6593 6594 return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); 6595 } 6596 6597 // All other QPX vectors are handled by generic code. 6598 if (Subtarget.hasQPX()) 6599 return SDValue(); 6600 6601 // Check if this is a splat of a constant value. 6602 APInt APSplatBits, APSplatUndef; 6603 unsigned SplatBitSize; 6604 bool HasAnyUndefs; 6605 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 6606 HasAnyUndefs, 0, true) || SplatBitSize > 32) 6607 return SDValue(); 6608 6609 unsigned SplatBits = APSplatBits.getZExtValue(); 6610 unsigned SplatUndef = APSplatUndef.getZExtValue(); 6611 unsigned SplatSize = SplatBitSize / 8; 6612 6613 // First, handle single instruction cases. 6614 6615 // All zeros? 6616 if (SplatBits == 0) { 6617 // Canonicalize all zero vectors to be v4i32. 6618 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 6619 SDValue Z = DAG.getConstant(0, MVT::i32); 6620 Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); 6621 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 6622 } 6623 return Op; 6624 } 6625 6626 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 6627 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 6628 (32-SplatBitSize)); 6629 if (SextVal >= -16 && SextVal <= 15) 6630 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 6631 6632 6633 // Two instruction sequences. 6634 6635 // If this value is in the range [-32,30] and is even, use: 6636 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 6637 // If this value is in the range [17,31] and is odd, use: 6638 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 6639 // If this value is in the range [-31,-17] and is odd, use: 6640 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 6641 // Note the last two are three-instruction sequences. 6642 if (SextVal >= -32 && SextVal <= 31) { 6643 // To avoid having these optimizations undone by constant folding, 6644 // we convert to a pseudo that will be expanded later into one of 6645 // the above forms. 6646 SDValue Elt = DAG.getConstant(SextVal, MVT::i32); 6647 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 6648 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 6649 SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32); 6650 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 6651 if (VT == Op.getValueType()) 6652 return RetVal; 6653 else 6654 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 6655 } 6656 6657 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 6658 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 6659 // for fneg/fabs. 6660 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 6661 // Make -1 and vspltisw -1: 6662 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 6663 6664 // Make the VSLW intrinsic, computing 0x8000_0000. 6665 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 6666 OnesV, DAG, dl); 6667 6668 // xor by OnesV to invert it. 6669 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 6670 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6671 } 6672 6673 // The remaining cases assume either big endian element order or 6674 // a splat-size that equates to the element size of the vector 6675 // to be built. An example that doesn't work for little endian is 6676 // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits 6677 // and a vector element size of 16 bits. The code below will 6678 // produce the vector in big endian element order, which for little 6679 // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. 6680 6681 // For now, just avoid these optimizations in that case. 6682 // FIXME: Develop correct optimizations for LE with mismatched 6683 // splat and element sizes. 6684 6685 if (Subtarget.isLittleEndian() && 6686 SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) 6687 return SDValue(); 6688 6689 // Check to see if this is a wide variety of vsplti*, binop self cases. 6690 static const signed char SplatCsts[] = { 6691 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 6692 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 6693 }; 6694 6695 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 6696 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 6697 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 6698 int i = SplatCsts[idx]; 6699 6700 // Figure out what shift amount will be used by altivec if shifted by i in 6701 // this splat size. 6702 unsigned TypeShiftAmt = i & (SplatBitSize-1); 6703 6704 // vsplti + shl self. 6705 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 6706 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6707 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6708 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 6709 Intrinsic::ppc_altivec_vslw 6710 }; 6711 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6712 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6713 } 6714 6715 // vsplti + srl self. 6716 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 6717 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6718 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6719 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 6720 Intrinsic::ppc_altivec_vsrw 6721 }; 6722 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6723 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6724 } 6725 6726 // vsplti + sra self. 6727 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 6728 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6729 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6730 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 6731 Intrinsic::ppc_altivec_vsraw 6732 }; 6733 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6734 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6735 } 6736 6737 // vsplti + rol self. 6738 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 6739 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 6740 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6741 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6742 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 6743 Intrinsic::ppc_altivec_vrlw 6744 }; 6745 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6746 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6747 } 6748 6749 // t = vsplti c, result = vsldoi t, t, 1 6750 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 6751 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6752 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); 6753 } 6754 // t = vsplti c, result = vsldoi t, t, 2 6755 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 6756 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6757 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); 6758 } 6759 // t = vsplti c, result = vsldoi t, t, 3 6760 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 6761 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6762 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); 6763 } 6764 } 6765 6766 return SDValue(); 6767 } 6768 6769 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 6770 /// the specified operations to build the shuffle. 6771 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 6772 SDValue RHS, SelectionDAG &DAG, 6773 SDLoc dl) { 6774 unsigned OpNum = (PFEntry >> 26) & 0x0F; 6775 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 6776 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 6777 6778 enum { 6779 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 6780 OP_VMRGHW, 6781 OP_VMRGLW, 6782 OP_VSPLTISW0, 6783 OP_VSPLTISW1, 6784 OP_VSPLTISW2, 6785 OP_VSPLTISW3, 6786 OP_VSLDOI4, 6787 OP_VSLDOI8, 6788 OP_VSLDOI12 6789 }; 6790 6791 if (OpNum == OP_COPY) { 6792 if (LHSID == (1*9+2)*9+3) return LHS; 6793 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 6794 return RHS; 6795 } 6796 6797 SDValue OpLHS, OpRHS; 6798 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 6799 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 6800 6801 int ShufIdxs[16]; 6802 switch (OpNum) { 6803 default: llvm_unreachable("Unknown i32 permute!"); 6804 case OP_VMRGHW: 6805 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 6806 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 6807 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 6808 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 6809 break; 6810 case OP_VMRGLW: 6811 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 6812 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 6813 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 6814 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 6815 break; 6816 case OP_VSPLTISW0: 6817 for (unsigned i = 0; i != 16; ++i) 6818 ShufIdxs[i] = (i&3)+0; 6819 break; 6820 case OP_VSPLTISW1: 6821 for (unsigned i = 0; i != 16; ++i) 6822 ShufIdxs[i] = (i&3)+4; 6823 break; 6824 case OP_VSPLTISW2: 6825 for (unsigned i = 0; i != 16; ++i) 6826 ShufIdxs[i] = (i&3)+8; 6827 break; 6828 case OP_VSPLTISW3: 6829 for (unsigned i = 0; i != 16; ++i) 6830 ShufIdxs[i] = (i&3)+12; 6831 break; 6832 case OP_VSLDOI4: 6833 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 6834 case OP_VSLDOI8: 6835 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 6836 case OP_VSLDOI12: 6837 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 6838 } 6839 EVT VT = OpLHS.getValueType(); 6840 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 6841 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 6842 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 6843 return DAG.getNode(ISD::BITCAST, dl, VT, T); 6844 } 6845 6846 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 6847 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 6848 /// return the code it can be lowered into. Worst case, it can always be 6849 /// lowered into a vperm. 6850 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 6851 SelectionDAG &DAG) const { 6852 SDLoc dl(Op); 6853 SDValue V1 = Op.getOperand(0); 6854 SDValue V2 = Op.getOperand(1); 6855 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 6856 EVT VT = Op.getValueType(); 6857 bool isLittleEndian = Subtarget.isLittleEndian(); 6858 6859 if (Subtarget.hasQPX()) { 6860 if (VT.getVectorNumElements() != 4) 6861 return SDValue(); 6862 6863 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 6864 6865 int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); 6866 if (AlignIdx != -1) { 6867 return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, 6868 DAG.getConstant(AlignIdx, MVT::i32)); 6869 } else if (SVOp->isSplat()) { 6870 int SplatIdx = SVOp->getSplatIndex(); 6871 if (SplatIdx >= 4) { 6872 std::swap(V1, V2); 6873 SplatIdx -= 4; 6874 } 6875 6876 // FIXME: If SplatIdx == 0 and the input came from a load, then there is 6877 // nothing to do. 6878 6879 return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, 6880 DAG.getConstant(SplatIdx, MVT::i32)); 6881 } 6882 6883 // Lower this into a qvgpci/qvfperm pair. 6884 6885 // Compute the qvgpci literal 6886 unsigned idx = 0; 6887 for (unsigned i = 0; i < 4; ++i) { 6888 int m = SVOp->getMaskElt(i); 6889 unsigned mm = m >= 0 ? (unsigned) m : i; 6890 idx |= mm << (3-i)*3; 6891 } 6892 6893 SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, 6894 DAG.getConstant(idx, MVT::i32)); 6895 return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); 6896 } 6897 6898 // Cases that are handled by instructions that take permute immediates 6899 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 6900 // selected by the instruction selector. 6901 if (V2.getOpcode() == ISD::UNDEF) { 6902 if (PPC::isSplatShuffleMask(SVOp, 1) || 6903 PPC::isSplatShuffleMask(SVOp, 2) || 6904 PPC::isSplatShuffleMask(SVOp, 4) || 6905 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 6906 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 6907 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 6908 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 6909 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 6910 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 6911 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 6912 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 6913 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) { 6914 return Op; 6915 } 6916 } 6917 6918 // Altivec has a variety of "shuffle immediates" that take two vector inputs 6919 // and produce a fixed permutation. If any of these match, do not lower to 6920 // VPERM. 6921 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 6922 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 6923 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 6924 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 6925 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 6926 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 6927 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 6928 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 6929 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 6930 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)) 6931 return Op; 6932 6933 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 6934 // perfect shuffle table to emit an optimal matching sequence. 6935 ArrayRef<int> PermMask = SVOp->getMask(); 6936 6937 unsigned PFIndexes[4]; 6938 bool isFourElementShuffle = true; 6939 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 6940 unsigned EltNo = 8; // Start out undef. 6941 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 6942 if (PermMask[i*4+j] < 0) 6943 continue; // Undef, ignore it. 6944 6945 unsigned ByteSource = PermMask[i*4+j]; 6946 if ((ByteSource & 3) != j) { 6947 isFourElementShuffle = false; 6948 break; 6949 } 6950 6951 if (EltNo == 8) { 6952 EltNo = ByteSource/4; 6953 } else if (EltNo != ByteSource/4) { 6954 isFourElementShuffle = false; 6955 break; 6956 } 6957 } 6958 PFIndexes[i] = EltNo; 6959 } 6960 6961 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 6962 // perfect shuffle vector to determine if it is cost effective to do this as 6963 // discrete instructions, or whether we should use a vperm. 6964 // For now, we skip this for little endian until such time as we have a 6965 // little-endian perfect shuffle table. 6966 if (isFourElementShuffle && !isLittleEndian) { 6967 // Compute the index in the perfect shuffle table. 6968 unsigned PFTableIndex = 6969 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 6970 6971 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 6972 unsigned Cost = (PFEntry >> 30); 6973 6974 // Determining when to avoid vperm is tricky. Many things affect the cost 6975 // of vperm, particularly how many times the perm mask needs to be computed. 6976 // For example, if the perm mask can be hoisted out of a loop or is already 6977 // used (perhaps because there are multiple permutes with the same shuffle 6978 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 6979 // the loop requires an extra register. 6980 // 6981 // As a compromise, we only emit discrete instructions if the shuffle can be 6982 // generated in 3 or fewer operations. When we have loop information 6983 // available, if this block is within a loop, we should avoid using vperm 6984 // for 3-operation perms and use a constant pool load instead. 6985 if (Cost < 3) 6986 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 6987 } 6988 6989 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 6990 // vector that will get spilled to the constant pool. 6991 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 6992 6993 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 6994 // that it is in input element units, not in bytes. Convert now. 6995 6996 // For little endian, the order of the input vectors is reversed, and 6997 // the permutation mask is complemented with respect to 31. This is 6998 // necessary to produce proper semantics with the big-endian-biased vperm 6999 // instruction. 7000 EVT EltVT = V1.getValueType().getVectorElementType(); 7001 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 7002 7003 SmallVector<SDValue, 16> ResultMask; 7004 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 7005 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 7006 7007 for (unsigned j = 0; j != BytesPerElement; ++j) 7008 if (isLittleEndian) 7009 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j), 7010 MVT::i32)); 7011 else 7012 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 7013 MVT::i32)); 7014 } 7015 7016 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 7017 ResultMask); 7018 if (isLittleEndian) 7019 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7020 V2, V1, VPermMask); 7021 else 7022 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7023 V1, V2, VPermMask); 7024 } 7025 7026 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 7027 /// altivec comparison. If it is, return true and fill in Opc/isDot with 7028 /// information about the intrinsic. 7029 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 7030 bool &isDot) { 7031 unsigned IntrinsicID = 7032 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 7033 CompareOpc = -1; 7034 isDot = false; 7035 switch (IntrinsicID) { 7036 default: return false; 7037 // Comparison predicates. 7038 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 7039 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 7040 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 7041 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 7042 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 7043 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 7044 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 7045 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 7046 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 7047 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 7048 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 7049 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 7050 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 7051 7052 // Normal Comparisons. 7053 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 7054 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 7055 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 7056 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 7057 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 7058 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 7059 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 7060 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 7061 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 7062 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 7063 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 7064 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 7065 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 7066 } 7067 return true; 7068 } 7069 7070 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 7071 /// lower, do it, otherwise return null. 7072 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 7073 SelectionDAG &DAG) const { 7074 // If this is a lowered altivec predicate compare, CompareOpc is set to the 7075 // opcode number of the comparison. 7076 SDLoc dl(Op); 7077 int CompareOpc; 7078 bool isDot; 7079 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 7080 return SDValue(); // Don't custom lower most intrinsics. 7081 7082 // If this is a non-dot comparison, make the VCMP node and we are done. 7083 if (!isDot) { 7084 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 7085 Op.getOperand(1), Op.getOperand(2), 7086 DAG.getConstant(CompareOpc, MVT::i32)); 7087 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 7088 } 7089 7090 // Create the PPCISD altivec 'dot' comparison node. 7091 SDValue Ops[] = { 7092 Op.getOperand(2), // LHS 7093 Op.getOperand(3), // RHS 7094 DAG.getConstant(CompareOpc, MVT::i32) 7095 }; 7096 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 7097 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 7098 7099 // Now that we have the comparison, emit a copy from the CR to a GPR. 7100 // This is flagged to the above dot comparison. 7101 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 7102 DAG.getRegister(PPC::CR6, MVT::i32), 7103 CompNode.getValue(1)); 7104 7105 // Unpack the result based on how the target uses it. 7106 unsigned BitNo; // Bit # of CR6. 7107 bool InvertBit; // Invert result? 7108 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 7109 default: // Can't happen, don't crash on invalid number though. 7110 case 0: // Return the value of the EQ bit of CR6. 7111 BitNo = 0; InvertBit = false; 7112 break; 7113 case 1: // Return the inverted value of the EQ bit of CR6. 7114 BitNo = 0; InvertBit = true; 7115 break; 7116 case 2: // Return the value of the LT bit of CR6. 7117 BitNo = 2; InvertBit = false; 7118 break; 7119 case 3: // Return the inverted value of the LT bit of CR6. 7120 BitNo = 2; InvertBit = true; 7121 break; 7122 } 7123 7124 // Shift the bit into the low position. 7125 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 7126 DAG.getConstant(8-(3-BitNo), MVT::i32)); 7127 // Isolate the bit. 7128 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 7129 DAG.getConstant(1, MVT::i32)); 7130 7131 // If we are supposed to, toggle the bit. 7132 if (InvertBit) 7133 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 7134 DAG.getConstant(1, MVT::i32)); 7135 return Flags; 7136 } 7137 7138 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 7139 SelectionDAG &DAG) const { 7140 SDLoc dl(Op); 7141 // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int 7142 // instructions), but for smaller types, we need to first extend up to v2i32 7143 // before doing going farther. 7144 if (Op.getValueType() == MVT::v2i64) { 7145 EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 7146 if (ExtVT != MVT::v2i32) { 7147 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); 7148 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, 7149 DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), 7150 ExtVT.getVectorElementType(), 4))); 7151 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); 7152 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, 7153 DAG.getValueType(MVT::v2i32)); 7154 } 7155 7156 return Op; 7157 } 7158 7159 return SDValue(); 7160 } 7161 7162 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 7163 SelectionDAG &DAG) const { 7164 SDLoc dl(Op); 7165 // Create a stack slot that is 16-byte aligned. 7166 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 7167 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 7168 EVT PtrVT = getPointerTy(); 7169 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7170 7171 // Store the input value into Value#0 of the stack slot. 7172 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 7173 Op.getOperand(0), FIdx, MachinePointerInfo(), 7174 false, false, 0); 7175 // Load it out. 7176 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 7177 false, false, false, 0); 7178 } 7179 7180 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 7181 SelectionDAG &DAG) const { 7182 SDLoc dl(Op); 7183 SDNode *N = Op.getNode(); 7184 7185 assert(N->getOperand(0).getValueType() == MVT::v4i1 && 7186 "Unknown extract_vector_elt type"); 7187 7188 SDValue Value = N->getOperand(0); 7189 7190 // The first part of this is like the store lowering except that we don't 7191 // need to track the chain. 7192 7193 // The values are now known to be -1 (false) or 1 (true). To convert this 7194 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 7195 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 7196 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 7197 7198 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 7199 // understand how to form the extending load. 7200 SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64); 7201 FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, 7202 FPHalfs, FPHalfs, FPHalfs, FPHalfs); 7203 7204 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 7205 7206 // Now convert to an integer and store. 7207 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 7208 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32), 7209 Value); 7210 7211 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 7212 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 7213 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); 7214 EVT PtrVT = getPointerTy(); 7215 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7216 7217 SDValue StoreChain = DAG.getEntryNode(); 7218 SmallVector<SDValue, 2> Ops; 7219 Ops.push_back(StoreChain); 7220 Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32)); 7221 Ops.push_back(Value); 7222 Ops.push_back(FIdx); 7223 7224 SmallVector<EVT, 2> ValueVTs; 7225 ValueVTs.push_back(MVT::Other); // chain 7226 SDVTList VTs = DAG.getVTList(ValueVTs); 7227 7228 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 7229 dl, VTs, Ops, MVT::v4i32, PtrInfo); 7230 7231 // Extract the value requested. 7232 unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 7233 SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType()); 7234 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 7235 7236 SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, 7237 PtrInfo.getWithOffset(Offset), 7238 false, false, false, 0); 7239 7240 if (!Subtarget.useCRBits()) 7241 return IntVal; 7242 7243 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); 7244 } 7245 7246 /// Lowering for QPX v4i1 loads 7247 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, 7248 SelectionDAG &DAG) const { 7249 SDLoc dl(Op); 7250 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); 7251 SDValue LoadChain = LN->getChain(); 7252 SDValue BasePtr = LN->getBasePtr(); 7253 7254 if (Op.getValueType() == MVT::v4f64 || 7255 Op.getValueType() == MVT::v4f32) { 7256 EVT MemVT = LN->getMemoryVT(); 7257 unsigned Alignment = LN->getAlignment(); 7258 7259 // If this load is properly aligned, then it is legal. 7260 if (Alignment >= MemVT.getStoreSize()) 7261 return Op; 7262 7263 EVT ScalarVT = Op.getValueType().getScalarType(), 7264 ScalarMemVT = MemVT.getScalarType(); 7265 unsigned Stride = ScalarMemVT.getStoreSize(); 7266 7267 SmallVector<SDValue, 8> Vals, LoadChains; 7268 for (unsigned Idx = 0; Idx < 4; ++Idx) { 7269 SDValue Load; 7270 if (ScalarVT != ScalarMemVT) 7271 Load = 7272 DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, 7273 BasePtr, 7274 LN->getPointerInfo().getWithOffset(Idx*Stride), 7275 ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(), 7276 LN->isInvariant(), MinAlign(Alignment, Idx*Stride), 7277 LN->getAAInfo()); 7278 else 7279 Load = 7280 DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, 7281 LN->getPointerInfo().getWithOffset(Idx*Stride), 7282 LN->isVolatile(), LN->isNonTemporal(), 7283 LN->isInvariant(), MinAlign(Alignment, Idx*Stride), 7284 LN->getAAInfo()); 7285 7286 if (Idx == 0 && LN->isIndexed()) { 7287 assert(LN->getAddressingMode() == ISD::PRE_INC && 7288 "Unknown addressing mode on vector load"); 7289 Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), 7290 LN->getAddressingMode()); 7291 } 7292 7293 Vals.push_back(Load); 7294 LoadChains.push_back(Load.getValue(1)); 7295 7296 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 7297 DAG.getConstant(Stride, BasePtr.getValueType())); 7298 } 7299 7300 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 7301 SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, 7302 Op.getValueType(), Vals); 7303 7304 if (LN->isIndexed()) { 7305 SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; 7306 return DAG.getMergeValues(RetOps, dl); 7307 } 7308 7309 SDValue RetOps[] = { Value, TF }; 7310 return DAG.getMergeValues(RetOps, dl); 7311 } 7312 7313 assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); 7314 assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); 7315 7316 // To lower v4i1 from a byte array, we load the byte elements of the 7317 // vector and then reuse the BUILD_VECTOR logic. 7318 7319 SmallVector<SDValue, 4> VectElmts, VectElmtChains; 7320 for (unsigned i = 0; i < 4; ++i) { 7321 SDValue Idx = DAG.getConstant(i, BasePtr.getValueType()); 7322 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 7323 7324 VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD, 7325 dl, MVT::i32, LoadChain, Idx, 7326 LN->getPointerInfo().getWithOffset(i), 7327 MVT::i8 /* memory type */, 7328 LN->isVolatile(), LN->isNonTemporal(), 7329 LN->isInvariant(), 7330 1 /* alignment */, LN->getAAInfo())); 7331 VectElmtChains.push_back(VectElmts[i].getValue(1)); 7332 } 7333 7334 LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); 7335 SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts); 7336 7337 SDValue RVals[] = { Value, LoadChain }; 7338 return DAG.getMergeValues(RVals, dl); 7339 } 7340 7341 /// Lowering for QPX v4i1 stores 7342 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, 7343 SelectionDAG &DAG) const { 7344 SDLoc dl(Op); 7345 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode()); 7346 SDValue StoreChain = SN->getChain(); 7347 SDValue BasePtr = SN->getBasePtr(); 7348 SDValue Value = SN->getValue(); 7349 7350 if (Value.getValueType() == MVT::v4f64 || 7351 Value.getValueType() == MVT::v4f32) { 7352 EVT MemVT = SN->getMemoryVT(); 7353 unsigned Alignment = SN->getAlignment(); 7354 7355 // If this store is properly aligned, then it is legal. 7356 if (Alignment >= MemVT.getStoreSize()) 7357 return Op; 7358 7359 EVT ScalarVT = Value.getValueType().getScalarType(), 7360 ScalarMemVT = MemVT.getScalarType(); 7361 unsigned Stride = ScalarMemVT.getStoreSize(); 7362 7363 SmallVector<SDValue, 8> Stores; 7364 for (unsigned Idx = 0; Idx < 4; ++Idx) { 7365 SDValue Ex = 7366 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, 7367 DAG.getConstant(Idx, getVectorIdxTy())); 7368 SDValue Store; 7369 if (ScalarVT != ScalarMemVT) 7370 Store = 7371 DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, 7372 SN->getPointerInfo().getWithOffset(Idx*Stride), 7373 ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(), 7374 MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); 7375 else 7376 Store = 7377 DAG.getStore(StoreChain, dl, Ex, BasePtr, 7378 SN->getPointerInfo().getWithOffset(Idx*Stride), 7379 SN->isVolatile(), SN->isNonTemporal(), 7380 MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); 7381 7382 if (Idx == 0 && SN->isIndexed()) { 7383 assert(SN->getAddressingMode() == ISD::PRE_INC && 7384 "Unknown addressing mode on vector store"); 7385 Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), 7386 SN->getAddressingMode()); 7387 } 7388 7389 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 7390 DAG.getConstant(Stride, BasePtr.getValueType())); 7391 Stores.push_back(Store); 7392 } 7393 7394 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 7395 7396 if (SN->isIndexed()) { 7397 SDValue RetOps[] = { TF, Stores[0].getValue(1) }; 7398 return DAG.getMergeValues(RetOps, dl); 7399 } 7400 7401 return TF; 7402 } 7403 7404 assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); 7405 assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); 7406 7407 // The values are now known to be -1 (false) or 1 (true). To convert this 7408 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 7409 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 7410 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 7411 7412 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 7413 // understand how to form the extending load. 7414 SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64); 7415 FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, 7416 FPHalfs, FPHalfs, FPHalfs, FPHalfs); 7417 7418 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 7419 7420 // Now convert to an integer and store. 7421 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 7422 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32), 7423 Value); 7424 7425 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 7426 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 7427 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); 7428 EVT PtrVT = getPointerTy(); 7429 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7430 7431 SmallVector<SDValue, 2> Ops; 7432 Ops.push_back(StoreChain); 7433 Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32)); 7434 Ops.push_back(Value); 7435 Ops.push_back(FIdx); 7436 7437 SmallVector<EVT, 2> ValueVTs; 7438 ValueVTs.push_back(MVT::Other); // chain 7439 SDVTList VTs = DAG.getVTList(ValueVTs); 7440 7441 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 7442 dl, VTs, Ops, MVT::v4i32, PtrInfo); 7443 7444 // Move data into the byte array. 7445 SmallVector<SDValue, 4> Loads, LoadChains; 7446 for (unsigned i = 0; i < 4; ++i) { 7447 unsigned Offset = 4*i; 7448 SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType()); 7449 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 7450 7451 Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx, 7452 PtrInfo.getWithOffset(Offset), 7453 false, false, false, 0)); 7454 LoadChains.push_back(Loads[i].getValue(1)); 7455 } 7456 7457 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 7458 7459 SmallVector<SDValue, 4> Stores; 7460 for (unsigned i = 0; i < 4; ++i) { 7461 SDValue Idx = DAG.getConstant(i, BasePtr.getValueType()); 7462 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 7463 7464 Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx, 7465 SN->getPointerInfo().getWithOffset(i), 7466 MVT::i8 /* memory type */, 7467 SN->isNonTemporal(), SN->isVolatile(), 7468 1 /* alignment */, SN->getAAInfo())); 7469 } 7470 7471 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 7472 7473 return StoreChain; 7474 } 7475 7476 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 7477 SDLoc dl(Op); 7478 if (Op.getValueType() == MVT::v4i32) { 7479 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 7480 7481 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 7482 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 7483 7484 SDValue RHSSwap = // = vrlw RHS, 16 7485 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 7486 7487 // Shrinkify inputs to v8i16. 7488 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 7489 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 7490 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 7491 7492 // Low parts multiplied together, generating 32-bit results (we ignore the 7493 // top parts). 7494 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 7495 LHS, RHS, DAG, dl, MVT::v4i32); 7496 7497 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 7498 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 7499 // Shift the high parts up 16 bits. 7500 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 7501 Neg16, DAG, dl); 7502 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 7503 } else if (Op.getValueType() == MVT::v8i16) { 7504 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 7505 7506 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 7507 7508 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 7509 LHS, RHS, Zero, DAG, dl); 7510 } else if (Op.getValueType() == MVT::v16i8) { 7511 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 7512 bool isLittleEndian = Subtarget.isLittleEndian(); 7513 7514 // Multiply the even 8-bit parts, producing 16-bit sums. 7515 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 7516 LHS, RHS, DAG, dl, MVT::v8i16); 7517 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 7518 7519 // Multiply the odd 8-bit parts, producing 16-bit sums. 7520 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 7521 LHS, RHS, DAG, dl, MVT::v8i16); 7522 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 7523 7524 // Merge the results together. Because vmuleub and vmuloub are 7525 // instructions with a big-endian bias, we must reverse the 7526 // element numbering and reverse the meaning of "odd" and "even" 7527 // when generating little endian code. 7528 int Ops[16]; 7529 for (unsigned i = 0; i != 8; ++i) { 7530 if (isLittleEndian) { 7531 Ops[i*2 ] = 2*i; 7532 Ops[i*2+1] = 2*i+16; 7533 } else { 7534 Ops[i*2 ] = 2*i+1; 7535 Ops[i*2+1] = 2*i+1+16; 7536 } 7537 } 7538 if (isLittleEndian) 7539 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 7540 else 7541 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 7542 } else { 7543 llvm_unreachable("Unknown mul to lower!"); 7544 } 7545 } 7546 7547 /// LowerOperation - Provide custom lowering hooks for some operations. 7548 /// 7549 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 7550 switch (Op.getOpcode()) { 7551 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 7552 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 7553 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 7554 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 7555 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 7556 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 7557 case ISD::SETCC: return LowerSETCC(Op, DAG); 7558 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 7559 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 7560 case ISD::VASTART: 7561 return LowerVASTART(Op, DAG, Subtarget); 7562 7563 case ISD::VAARG: 7564 return LowerVAARG(Op, DAG, Subtarget); 7565 7566 case ISD::VACOPY: 7567 return LowerVACOPY(Op, DAG, Subtarget); 7568 7569 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget); 7570 case ISD::DYNAMIC_STACKALLOC: 7571 return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); 7572 7573 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 7574 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 7575 7576 case ISD::LOAD: return LowerLOAD(Op, DAG); 7577 case ISD::STORE: return LowerSTORE(Op, DAG); 7578 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 7579 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 7580 case ISD::FP_TO_UINT: 7581 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 7582 SDLoc(Op)); 7583 case ISD::UINT_TO_FP: 7584 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 7585 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 7586 7587 // Lower 64-bit shifts. 7588 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 7589 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 7590 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 7591 7592 // Vector-related lowering. 7593 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 7594 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 7595 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 7596 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 7597 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 7598 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 7599 case ISD::MUL: return LowerMUL(Op, DAG); 7600 7601 // For counter-based loop handling. 7602 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 7603 7604 // Frame & Return address. 7605 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 7606 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 7607 } 7608 } 7609 7610 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 7611 SmallVectorImpl<SDValue>&Results, 7612 SelectionDAG &DAG) const { 7613 SDLoc dl(N); 7614 switch (N->getOpcode()) { 7615 default: 7616 llvm_unreachable("Do not know how to custom type legalize this operation!"); 7617 case ISD::READCYCLECOUNTER: { 7618 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 7619 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); 7620 7621 Results.push_back(RTB); 7622 Results.push_back(RTB.getValue(1)); 7623 Results.push_back(RTB.getValue(2)); 7624 break; 7625 } 7626 case ISD::INTRINSIC_W_CHAIN: { 7627 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 7628 Intrinsic::ppc_is_decremented_ctr_nonzero) 7629 break; 7630 7631 assert(N->getValueType(0) == MVT::i1 && 7632 "Unexpected result type for CTR decrement intrinsic"); 7633 EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); 7634 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 7635 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 7636 N->getOperand(1)); 7637 7638 Results.push_back(NewInt); 7639 Results.push_back(NewInt.getValue(1)); 7640 break; 7641 } 7642 case ISD::VAARG: { 7643 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) 7644 return; 7645 7646 EVT VT = N->getValueType(0); 7647 7648 if (VT == MVT::i64) { 7649 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget); 7650 7651 Results.push_back(NewNode); 7652 Results.push_back(NewNode.getValue(1)); 7653 } 7654 return; 7655 } 7656 case ISD::FP_ROUND_INREG: { 7657 assert(N->getValueType(0) == MVT::ppcf128); 7658 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 7659 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 7660 MVT::f64, N->getOperand(0), 7661 DAG.getIntPtrConstant(0)); 7662 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 7663 MVT::f64, N->getOperand(0), 7664 DAG.getIntPtrConstant(1)); 7665 7666 // Add the two halves of the long double in round-to-zero mode. 7667 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 7668 7669 // We know the low half is about to be thrown away, so just use something 7670 // convenient. 7671 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 7672 FPreg, FPreg)); 7673 return; 7674 } 7675 case ISD::FP_TO_SINT: 7676 // LowerFP_TO_INT() can only handle f32 and f64. 7677 if (N->getOperand(0).getValueType() == MVT::ppcf128) 7678 return; 7679 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 7680 return; 7681 } 7682 } 7683 7684 7685 //===----------------------------------------------------------------------===// 7686 // Other Lowering Code 7687 //===----------------------------------------------------------------------===// 7688 7689 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { 7690 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 7691 Function *Func = Intrinsic::getDeclaration(M, Id); 7692 return Builder.CreateCall(Func); 7693 } 7694 7695 // The mappings for emitLeading/TrailingFence is taken from 7696 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 7697 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 7698 AtomicOrdering Ord, bool IsStore, 7699 bool IsLoad) const { 7700 if (Ord == SequentiallyConsistent) 7701 return callIntrinsic(Builder, Intrinsic::ppc_sync); 7702 else if (isAtLeastRelease(Ord)) 7703 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 7704 else 7705 return nullptr; 7706 } 7707 7708 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 7709 AtomicOrdering Ord, bool IsStore, 7710 bool IsLoad) const { 7711 if (IsLoad && isAtLeastAcquire(Ord)) 7712 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 7713 // FIXME: this is too conservative, a dependent branch + isync is enough. 7714 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and 7715 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 7716 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. 7717 else 7718 return nullptr; 7719 } 7720 7721 MachineBasicBlock * 7722 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 7723 bool is64bit, unsigned BinOpcode) const { 7724 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 7725 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 7726 7727 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7728 MachineFunction *F = BB->getParent(); 7729 MachineFunction::iterator It = BB; 7730 ++It; 7731 7732 unsigned dest = MI->getOperand(0).getReg(); 7733 unsigned ptrA = MI->getOperand(1).getReg(); 7734 unsigned ptrB = MI->getOperand(2).getReg(); 7735 unsigned incr = MI->getOperand(3).getReg(); 7736 DebugLoc dl = MI->getDebugLoc(); 7737 7738 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 7739 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 7740 F->insert(It, loopMBB); 7741 F->insert(It, exitMBB); 7742 exitMBB->splice(exitMBB->begin(), BB, 7743 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7744 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7745 7746 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7747 unsigned TmpReg = (!BinOpcode) ? incr : 7748 RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass 7749 : &PPC::GPRCRegClass); 7750 7751 // thisMBB: 7752 // ... 7753 // fallthrough --> loopMBB 7754 BB->addSuccessor(loopMBB); 7755 7756 // loopMBB: 7757 // l[wd]arx dest, ptr 7758 // add r0, dest, incr 7759 // st[wd]cx. r0, ptr 7760 // bne- loopMBB 7761 // fallthrough --> exitMBB 7762 BB = loopMBB; 7763 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 7764 .addReg(ptrA).addReg(ptrB); 7765 if (BinOpcode) 7766 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 7767 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 7768 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 7769 BuildMI(BB, dl, TII->get(PPC::BCC)) 7770 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 7771 BB->addSuccessor(loopMBB); 7772 BB->addSuccessor(exitMBB); 7773 7774 // exitMBB: 7775 // ... 7776 BB = exitMBB; 7777 return BB; 7778 } 7779 7780 MachineBasicBlock * 7781 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 7782 MachineBasicBlock *BB, 7783 bool is8bit, // operation 7784 unsigned BinOpcode) const { 7785 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 7786 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 7787 // In 64 bit mode we have to use 64 bits for addresses, even though the 7788 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 7789 // registers without caring whether they're 32 or 64, but here we're 7790 // doing actual arithmetic on the addresses. 7791 bool is64bit = Subtarget.isPPC64(); 7792 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 7793 7794 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7795 MachineFunction *F = BB->getParent(); 7796 MachineFunction::iterator It = BB; 7797 ++It; 7798 7799 unsigned dest = MI->getOperand(0).getReg(); 7800 unsigned ptrA = MI->getOperand(1).getReg(); 7801 unsigned ptrB = MI->getOperand(2).getReg(); 7802 unsigned incr = MI->getOperand(3).getReg(); 7803 DebugLoc dl = MI->getDebugLoc(); 7804 7805 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 7806 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 7807 F->insert(It, loopMBB); 7808 F->insert(It, exitMBB); 7809 exitMBB->splice(exitMBB->begin(), BB, 7810 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7811 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7812 7813 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7814 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 7815 : &PPC::GPRCRegClass; 7816 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 7817 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 7818 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 7819 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 7820 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 7821 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 7822 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 7823 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 7824 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 7825 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 7826 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 7827 unsigned Ptr1Reg; 7828 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 7829 7830 // thisMBB: 7831 // ... 7832 // fallthrough --> loopMBB 7833 BB->addSuccessor(loopMBB); 7834 7835 // The 4-byte load must be aligned, while a char or short may be 7836 // anywhere in the word. Hence all this nasty bookkeeping code. 7837 // add ptr1, ptrA, ptrB [copy if ptrA==0] 7838 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 7839 // xori shift, shift1, 24 [16] 7840 // rlwinm ptr, ptr1, 0, 0, 29 7841 // slw incr2, incr, shift 7842 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 7843 // slw mask, mask2, shift 7844 // loopMBB: 7845 // lwarx tmpDest, ptr 7846 // add tmp, tmpDest, incr2 7847 // andc tmp2, tmpDest, mask 7848 // and tmp3, tmp, mask 7849 // or tmp4, tmp3, tmp2 7850 // stwcx. tmp4, ptr 7851 // bne- loopMBB 7852 // fallthrough --> exitMBB 7853 // srw dest, tmpDest, shift 7854 if (ptrA != ZeroReg) { 7855 Ptr1Reg = RegInfo.createVirtualRegister(RC); 7856 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 7857 .addReg(ptrA).addReg(ptrB); 7858 } else { 7859 Ptr1Reg = ptrB; 7860 } 7861 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 7862 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 7863 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 7864 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 7865 if (is64bit) 7866 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 7867 .addReg(Ptr1Reg).addImm(0).addImm(61); 7868 else 7869 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 7870 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 7871 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 7872 .addReg(incr).addReg(ShiftReg); 7873 if (is8bit) 7874 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 7875 else { 7876 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 7877 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 7878 } 7879 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 7880 .addReg(Mask2Reg).addReg(ShiftReg); 7881 7882 BB = loopMBB; 7883 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 7884 .addReg(ZeroReg).addReg(PtrReg); 7885 if (BinOpcode) 7886 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 7887 .addReg(Incr2Reg).addReg(TmpDestReg); 7888 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 7889 .addReg(TmpDestReg).addReg(MaskReg); 7890 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 7891 .addReg(TmpReg).addReg(MaskReg); 7892 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 7893 .addReg(Tmp3Reg).addReg(Tmp2Reg); 7894 BuildMI(BB, dl, TII->get(PPC::STWCX)) 7895 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 7896 BuildMI(BB, dl, TII->get(PPC::BCC)) 7897 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 7898 BB->addSuccessor(loopMBB); 7899 BB->addSuccessor(exitMBB); 7900 7901 // exitMBB: 7902 // ... 7903 BB = exitMBB; 7904 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 7905 .addReg(ShiftReg); 7906 return BB; 7907 } 7908 7909 llvm::MachineBasicBlock* 7910 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, 7911 MachineBasicBlock *MBB) const { 7912 DebugLoc DL = MI->getDebugLoc(); 7913 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 7914 7915 MachineFunction *MF = MBB->getParent(); 7916 MachineRegisterInfo &MRI = MF->getRegInfo(); 7917 7918 const BasicBlock *BB = MBB->getBasicBlock(); 7919 MachineFunction::iterator I = MBB; 7920 ++I; 7921 7922 // Memory Reference 7923 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 7924 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 7925 7926 unsigned DstReg = MI->getOperand(0).getReg(); 7927 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 7928 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 7929 unsigned mainDstReg = MRI.createVirtualRegister(RC); 7930 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 7931 7932 MVT PVT = getPointerTy(); 7933 assert((PVT == MVT::i64 || PVT == MVT::i32) && 7934 "Invalid Pointer Size!"); 7935 // For v = setjmp(buf), we generate 7936 // 7937 // thisMBB: 7938 // SjLjSetup mainMBB 7939 // bl mainMBB 7940 // v_restore = 1 7941 // b sinkMBB 7942 // 7943 // mainMBB: 7944 // buf[LabelOffset] = LR 7945 // v_main = 0 7946 // 7947 // sinkMBB: 7948 // v = phi(main, restore) 7949 // 7950 7951 MachineBasicBlock *thisMBB = MBB; 7952 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 7953 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 7954 MF->insert(I, mainMBB); 7955 MF->insert(I, sinkMBB); 7956 7957 MachineInstrBuilder MIB; 7958 7959 // Transfer the remainder of BB and its successor edges to sinkMBB. 7960 sinkMBB->splice(sinkMBB->begin(), MBB, 7961 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 7962 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 7963 7964 // Note that the structure of the jmp_buf used here is not compatible 7965 // with that used by libc, and is not designed to be. Specifically, it 7966 // stores only those 'reserved' registers that LLVM does not otherwise 7967 // understand how to spill. Also, by convention, by the time this 7968 // intrinsic is called, Clang has already stored the frame address in the 7969 // first slot of the buffer and stack address in the third. Following the 7970 // X86 target code, we'll store the jump address in the second slot. We also 7971 // need to save the TOC pointer (R2) to handle jumps between shared 7972 // libraries, and that will be stored in the fourth slot. The thread 7973 // identifier (R13) is not affected. 7974 7975 // thisMBB: 7976 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 7977 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 7978 const int64_t BPOffset = 4 * PVT.getStoreSize(); 7979 7980 // Prepare IP either in reg. 7981 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 7982 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 7983 unsigned BufReg = MI->getOperand(1).getReg(); 7984 7985 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 7986 setUsesTOCBasePtr(*MBB->getParent()); 7987 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 7988 .addReg(PPC::X2) 7989 .addImm(TOCOffset) 7990 .addReg(BufReg); 7991 MIB.setMemRefs(MMOBegin, MMOEnd); 7992 } 7993 7994 // Naked functions never have a base pointer, and so we use r1. For all 7995 // other functions, this decision must be delayed until during PEI. 7996 unsigned BaseReg; 7997 if (MF->getFunction()->hasFnAttribute(Attribute::Naked)) 7998 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 7999 else 8000 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 8001 8002 MIB = BuildMI(*thisMBB, MI, DL, 8003 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 8004 .addReg(BaseReg) 8005 .addImm(BPOffset) 8006 .addReg(BufReg); 8007 MIB.setMemRefs(MMOBegin, MMOEnd); 8008 8009 // Setup 8010 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 8011 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 8012 MIB.addRegMask(TRI->getNoPreservedMask()); 8013 8014 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 8015 8016 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 8017 .addMBB(mainMBB); 8018 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 8019 8020 thisMBB->addSuccessor(mainMBB, /* weight */ 0); 8021 thisMBB->addSuccessor(sinkMBB, /* weight */ 1); 8022 8023 // mainMBB: 8024 // mainDstReg = 0 8025 MIB = 8026 BuildMI(mainMBB, DL, 8027 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 8028 8029 // Store IP 8030 if (Subtarget.isPPC64()) { 8031 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 8032 .addReg(LabelReg) 8033 .addImm(LabelOffset) 8034 .addReg(BufReg); 8035 } else { 8036 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 8037 .addReg(LabelReg) 8038 .addImm(LabelOffset) 8039 .addReg(BufReg); 8040 } 8041 8042 MIB.setMemRefs(MMOBegin, MMOEnd); 8043 8044 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 8045 mainMBB->addSuccessor(sinkMBB); 8046 8047 // sinkMBB: 8048 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 8049 TII->get(PPC::PHI), DstReg) 8050 .addReg(mainDstReg).addMBB(mainMBB) 8051 .addReg(restoreDstReg).addMBB(thisMBB); 8052 8053 MI->eraseFromParent(); 8054 return sinkMBB; 8055 } 8056 8057 MachineBasicBlock * 8058 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, 8059 MachineBasicBlock *MBB) const { 8060 DebugLoc DL = MI->getDebugLoc(); 8061 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8062 8063 MachineFunction *MF = MBB->getParent(); 8064 MachineRegisterInfo &MRI = MF->getRegInfo(); 8065 8066 // Memory Reference 8067 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 8068 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 8069 8070 MVT PVT = getPointerTy(); 8071 assert((PVT == MVT::i64 || PVT == MVT::i32) && 8072 "Invalid Pointer Size!"); 8073 8074 const TargetRegisterClass *RC = 8075 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 8076 unsigned Tmp = MRI.createVirtualRegister(RC); 8077 // Since FP is only updated here but NOT referenced, it's treated as GPR. 8078 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 8079 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 8080 unsigned BP = 8081 (PVT == MVT::i64) 8082 ? PPC::X30 8083 : (Subtarget.isSVR4ABI() && 8084 MF->getTarget().getRelocationModel() == Reloc::PIC_ 8085 ? PPC::R29 8086 : PPC::R30); 8087 8088 MachineInstrBuilder MIB; 8089 8090 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 8091 const int64_t SPOffset = 2 * PVT.getStoreSize(); 8092 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 8093 const int64_t BPOffset = 4 * PVT.getStoreSize(); 8094 8095 unsigned BufReg = MI->getOperand(0).getReg(); 8096 8097 // Reload FP (the jumped-to function may not have had a 8098 // frame pointer, and if so, then its r31 will be restored 8099 // as necessary). 8100 if (PVT == MVT::i64) { 8101 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 8102 .addImm(0) 8103 .addReg(BufReg); 8104 } else { 8105 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 8106 .addImm(0) 8107 .addReg(BufReg); 8108 } 8109 MIB.setMemRefs(MMOBegin, MMOEnd); 8110 8111 // Reload IP 8112 if (PVT == MVT::i64) { 8113 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 8114 .addImm(LabelOffset) 8115 .addReg(BufReg); 8116 } else { 8117 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 8118 .addImm(LabelOffset) 8119 .addReg(BufReg); 8120 } 8121 MIB.setMemRefs(MMOBegin, MMOEnd); 8122 8123 // Reload SP 8124 if (PVT == MVT::i64) { 8125 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 8126 .addImm(SPOffset) 8127 .addReg(BufReg); 8128 } else { 8129 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 8130 .addImm(SPOffset) 8131 .addReg(BufReg); 8132 } 8133 MIB.setMemRefs(MMOBegin, MMOEnd); 8134 8135 // Reload BP 8136 if (PVT == MVT::i64) { 8137 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 8138 .addImm(BPOffset) 8139 .addReg(BufReg); 8140 } else { 8141 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 8142 .addImm(BPOffset) 8143 .addReg(BufReg); 8144 } 8145 MIB.setMemRefs(MMOBegin, MMOEnd); 8146 8147 // Reload TOC 8148 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 8149 setUsesTOCBasePtr(*MBB->getParent()); 8150 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 8151 .addImm(TOCOffset) 8152 .addReg(BufReg); 8153 8154 MIB.setMemRefs(MMOBegin, MMOEnd); 8155 } 8156 8157 // Jump 8158 BuildMI(*MBB, MI, DL, 8159 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 8160 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 8161 8162 MI->eraseFromParent(); 8163 return MBB; 8164 } 8165 8166 MachineBasicBlock * 8167 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 8168 MachineBasicBlock *BB) const { 8169 if (MI->getOpcode() == TargetOpcode::STACKMAP || 8170 MI->getOpcode() == TargetOpcode::PATCHPOINT) { 8171 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && 8172 MI->getOpcode() == TargetOpcode::PATCHPOINT) { 8173 // Call lowering should have added an r2 operand to indicate a dependence 8174 // on the TOC base pointer value. It can't however, because there is no 8175 // way to mark the dependence as implicit there, and so the stackmap code 8176 // will confuse it with a regular operand. Instead, add the dependence 8177 // here. 8178 setUsesTOCBasePtr(*BB->getParent()); 8179 MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); 8180 } 8181 8182 return emitPatchPoint(MI, BB); 8183 } 8184 8185 if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || 8186 MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { 8187 return emitEHSjLjSetJmp(MI, BB); 8188 } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || 8189 MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { 8190 return emitEHSjLjLongJmp(MI, BB); 8191 } 8192 8193 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8194 8195 // To "insert" these instructions we actually have to insert their 8196 // control-flow patterns. 8197 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8198 MachineFunction::iterator It = BB; 8199 ++It; 8200 8201 MachineFunction *F = BB->getParent(); 8202 8203 if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || 8204 MI->getOpcode() == PPC::SELECT_CC_I8 || 8205 MI->getOpcode() == PPC::SELECT_I4 || 8206 MI->getOpcode() == PPC::SELECT_I8)) { 8207 SmallVector<MachineOperand, 2> Cond; 8208 if (MI->getOpcode() == PPC::SELECT_CC_I4 || 8209 MI->getOpcode() == PPC::SELECT_CC_I8) 8210 Cond.push_back(MI->getOperand(4)); 8211 else 8212 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 8213 Cond.push_back(MI->getOperand(1)); 8214 8215 DebugLoc dl = MI->getDebugLoc(); 8216 TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), 8217 Cond, MI->getOperand(2).getReg(), 8218 MI->getOperand(3).getReg()); 8219 } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || 8220 MI->getOpcode() == PPC::SELECT_CC_I8 || 8221 MI->getOpcode() == PPC::SELECT_CC_F4 || 8222 MI->getOpcode() == PPC::SELECT_CC_F8 || 8223 MI->getOpcode() == PPC::SELECT_CC_QFRC || 8224 MI->getOpcode() == PPC::SELECT_CC_QSRC || 8225 MI->getOpcode() == PPC::SELECT_CC_QBRC || 8226 MI->getOpcode() == PPC::SELECT_CC_VRRC || 8227 MI->getOpcode() == PPC::SELECT_CC_VSFRC || 8228 MI->getOpcode() == PPC::SELECT_CC_VSRC || 8229 MI->getOpcode() == PPC::SELECT_I4 || 8230 MI->getOpcode() == PPC::SELECT_I8 || 8231 MI->getOpcode() == PPC::SELECT_F4 || 8232 MI->getOpcode() == PPC::SELECT_F8 || 8233 MI->getOpcode() == PPC::SELECT_QFRC || 8234 MI->getOpcode() == PPC::SELECT_QSRC || 8235 MI->getOpcode() == PPC::SELECT_QBRC || 8236 MI->getOpcode() == PPC::SELECT_VRRC || 8237 MI->getOpcode() == PPC::SELECT_VSFRC || 8238 MI->getOpcode() == PPC::SELECT_VSRC) { 8239 // The incoming instruction knows the destination vreg to set, the 8240 // condition code register to branch on, the true/false values to 8241 // select between, and a branch opcode to use. 8242 8243 // thisMBB: 8244 // ... 8245 // TrueVal = ... 8246 // cmpTY ccX, r1, r2 8247 // bCC copy1MBB 8248 // fallthrough --> copy0MBB 8249 MachineBasicBlock *thisMBB = BB; 8250 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 8251 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 8252 DebugLoc dl = MI->getDebugLoc(); 8253 F->insert(It, copy0MBB); 8254 F->insert(It, sinkMBB); 8255 8256 // Transfer the remainder of BB and its successor edges to sinkMBB. 8257 sinkMBB->splice(sinkMBB->begin(), BB, 8258 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8259 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 8260 8261 // Next, add the true and fallthrough blocks as its successors. 8262 BB->addSuccessor(copy0MBB); 8263 BB->addSuccessor(sinkMBB); 8264 8265 if (MI->getOpcode() == PPC::SELECT_I4 || 8266 MI->getOpcode() == PPC::SELECT_I8 || 8267 MI->getOpcode() == PPC::SELECT_F4 || 8268 MI->getOpcode() == PPC::SELECT_F8 || 8269 MI->getOpcode() == PPC::SELECT_QFRC || 8270 MI->getOpcode() == PPC::SELECT_QSRC || 8271 MI->getOpcode() == PPC::SELECT_QBRC || 8272 MI->getOpcode() == PPC::SELECT_VRRC || 8273 MI->getOpcode() == PPC::SELECT_VSFRC || 8274 MI->getOpcode() == PPC::SELECT_VSRC) { 8275 BuildMI(BB, dl, TII->get(PPC::BC)) 8276 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 8277 } else { 8278 unsigned SelectPred = MI->getOperand(4).getImm(); 8279 BuildMI(BB, dl, TII->get(PPC::BCC)) 8280 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 8281 } 8282 8283 // copy0MBB: 8284 // %FalseValue = ... 8285 // # fallthrough to sinkMBB 8286 BB = copy0MBB; 8287 8288 // Update machine-CFG edges 8289 BB->addSuccessor(sinkMBB); 8290 8291 // sinkMBB: 8292 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 8293 // ... 8294 BB = sinkMBB; 8295 BuildMI(*BB, BB->begin(), dl, 8296 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 8297 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 8298 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 8299 } else if (MI->getOpcode() == PPC::ReadTB) { 8300 // To read the 64-bit time-base register on a 32-bit target, we read the 8301 // two halves. Should the counter have wrapped while it was being read, we 8302 // need to try again. 8303 // ... 8304 // readLoop: 8305 // mfspr Rx,TBU # load from TBU 8306 // mfspr Ry,TB # load from TB 8307 // mfspr Rz,TBU # load from TBU 8308 // cmpw crX,Rx,Rz # check if ‘old’=’new’ 8309 // bne readLoop # branch if they're not equal 8310 // ... 8311 8312 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); 8313 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 8314 DebugLoc dl = MI->getDebugLoc(); 8315 F->insert(It, readMBB); 8316 F->insert(It, sinkMBB); 8317 8318 // Transfer the remainder of BB and its successor edges to sinkMBB. 8319 sinkMBB->splice(sinkMBB->begin(), BB, 8320 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8321 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 8322 8323 BB->addSuccessor(readMBB); 8324 BB = readMBB; 8325 8326 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8327 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 8328 unsigned LoReg = MI->getOperand(0).getReg(); 8329 unsigned HiReg = MI->getOperand(1).getReg(); 8330 8331 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); 8332 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); 8333 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); 8334 8335 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 8336 8337 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) 8338 .addReg(HiReg).addReg(ReadAgainReg); 8339 BuildMI(BB, dl, TII->get(PPC::BCC)) 8340 .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); 8341 8342 BB->addSuccessor(readMBB); 8343 BB->addSuccessor(sinkMBB); 8344 } 8345 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 8346 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 8347 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 8348 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 8349 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 8350 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 8351 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 8352 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 8353 8354 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 8355 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 8356 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 8357 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 8358 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 8359 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 8360 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 8361 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 8362 8363 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 8364 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 8365 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 8366 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 8367 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 8368 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 8369 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 8370 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 8371 8372 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 8373 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 8374 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 8375 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 8376 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 8377 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 8378 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 8379 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 8380 8381 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 8382 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 8383 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 8384 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 8385 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 8386 BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); 8387 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 8388 BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); 8389 8390 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 8391 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 8392 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 8393 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 8394 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 8395 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 8396 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 8397 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 8398 8399 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 8400 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 8401 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 8402 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 8403 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 8404 BB = EmitAtomicBinary(MI, BB, false, 0); 8405 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 8406 BB = EmitAtomicBinary(MI, BB, true, 0); 8407 8408 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 8409 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 8410 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 8411 8412 unsigned dest = MI->getOperand(0).getReg(); 8413 unsigned ptrA = MI->getOperand(1).getReg(); 8414 unsigned ptrB = MI->getOperand(2).getReg(); 8415 unsigned oldval = MI->getOperand(3).getReg(); 8416 unsigned newval = MI->getOperand(4).getReg(); 8417 DebugLoc dl = MI->getDebugLoc(); 8418 8419 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 8420 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 8421 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 8422 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8423 F->insert(It, loop1MBB); 8424 F->insert(It, loop2MBB); 8425 F->insert(It, midMBB); 8426 F->insert(It, exitMBB); 8427 exitMBB->splice(exitMBB->begin(), BB, 8428 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8429 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8430 8431 // thisMBB: 8432 // ... 8433 // fallthrough --> loopMBB 8434 BB->addSuccessor(loop1MBB); 8435 8436 // loop1MBB: 8437 // l[wd]arx dest, ptr 8438 // cmp[wd] dest, oldval 8439 // bne- midMBB 8440 // loop2MBB: 8441 // st[wd]cx. newval, ptr 8442 // bne- loopMBB 8443 // b exitBB 8444 // midMBB: 8445 // st[wd]cx. dest, ptr 8446 // exitBB: 8447 BB = loop1MBB; 8448 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 8449 .addReg(ptrA).addReg(ptrB); 8450 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 8451 .addReg(oldval).addReg(dest); 8452 BuildMI(BB, dl, TII->get(PPC::BCC)) 8453 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 8454 BB->addSuccessor(loop2MBB); 8455 BB->addSuccessor(midMBB); 8456 8457 BB = loop2MBB; 8458 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 8459 .addReg(newval).addReg(ptrA).addReg(ptrB); 8460 BuildMI(BB, dl, TII->get(PPC::BCC)) 8461 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 8462 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 8463 BB->addSuccessor(loop1MBB); 8464 BB->addSuccessor(exitMBB); 8465 8466 BB = midMBB; 8467 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 8468 .addReg(dest).addReg(ptrA).addReg(ptrB); 8469 BB->addSuccessor(exitMBB); 8470 8471 // exitMBB: 8472 // ... 8473 BB = exitMBB; 8474 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 8475 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 8476 // We must use 64-bit registers for addresses when targeting 64-bit, 8477 // since we're actually doing arithmetic on them. Other registers 8478 // can be 32-bit. 8479 bool is64bit = Subtarget.isPPC64(); 8480 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 8481 8482 unsigned dest = MI->getOperand(0).getReg(); 8483 unsigned ptrA = MI->getOperand(1).getReg(); 8484 unsigned ptrB = MI->getOperand(2).getReg(); 8485 unsigned oldval = MI->getOperand(3).getReg(); 8486 unsigned newval = MI->getOperand(4).getReg(); 8487 DebugLoc dl = MI->getDebugLoc(); 8488 8489 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 8490 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 8491 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 8492 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8493 F->insert(It, loop1MBB); 8494 F->insert(It, loop2MBB); 8495 F->insert(It, midMBB); 8496 F->insert(It, exitMBB); 8497 exitMBB->splice(exitMBB->begin(), BB, 8498 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8499 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8500 8501 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8502 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 8503 : &PPC::GPRCRegClass; 8504 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 8505 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 8506 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 8507 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 8508 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 8509 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 8510 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 8511 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 8512 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 8513 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 8514 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 8515 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 8516 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 8517 unsigned Ptr1Reg; 8518 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 8519 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 8520 // thisMBB: 8521 // ... 8522 // fallthrough --> loopMBB 8523 BB->addSuccessor(loop1MBB); 8524 8525 // The 4-byte load must be aligned, while a char or short may be 8526 // anywhere in the word. Hence all this nasty bookkeeping code. 8527 // add ptr1, ptrA, ptrB [copy if ptrA==0] 8528 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 8529 // xori shift, shift1, 24 [16] 8530 // rlwinm ptr, ptr1, 0, 0, 29 8531 // slw newval2, newval, shift 8532 // slw oldval2, oldval,shift 8533 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 8534 // slw mask, mask2, shift 8535 // and newval3, newval2, mask 8536 // and oldval3, oldval2, mask 8537 // loop1MBB: 8538 // lwarx tmpDest, ptr 8539 // and tmp, tmpDest, mask 8540 // cmpw tmp, oldval3 8541 // bne- midMBB 8542 // loop2MBB: 8543 // andc tmp2, tmpDest, mask 8544 // or tmp4, tmp2, newval3 8545 // stwcx. tmp4, ptr 8546 // bne- loop1MBB 8547 // b exitBB 8548 // midMBB: 8549 // stwcx. tmpDest, ptr 8550 // exitBB: 8551 // srw dest, tmpDest, shift 8552 if (ptrA != ZeroReg) { 8553 Ptr1Reg = RegInfo.createVirtualRegister(RC); 8554 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 8555 .addReg(ptrA).addReg(ptrB); 8556 } else { 8557 Ptr1Reg = ptrB; 8558 } 8559 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 8560 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 8561 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 8562 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 8563 if (is64bit) 8564 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 8565 .addReg(Ptr1Reg).addImm(0).addImm(61); 8566 else 8567 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 8568 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 8569 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 8570 .addReg(newval).addReg(ShiftReg); 8571 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 8572 .addReg(oldval).addReg(ShiftReg); 8573 if (is8bit) 8574 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 8575 else { 8576 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 8577 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 8578 .addReg(Mask3Reg).addImm(65535); 8579 } 8580 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 8581 .addReg(Mask2Reg).addReg(ShiftReg); 8582 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 8583 .addReg(NewVal2Reg).addReg(MaskReg); 8584 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 8585 .addReg(OldVal2Reg).addReg(MaskReg); 8586 8587 BB = loop1MBB; 8588 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 8589 .addReg(ZeroReg).addReg(PtrReg); 8590 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 8591 .addReg(TmpDestReg).addReg(MaskReg); 8592 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 8593 .addReg(TmpReg).addReg(OldVal3Reg); 8594 BuildMI(BB, dl, TII->get(PPC::BCC)) 8595 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 8596 BB->addSuccessor(loop2MBB); 8597 BB->addSuccessor(midMBB); 8598 8599 BB = loop2MBB; 8600 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 8601 .addReg(TmpDestReg).addReg(MaskReg); 8602 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 8603 .addReg(Tmp2Reg).addReg(NewVal3Reg); 8604 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 8605 .addReg(ZeroReg).addReg(PtrReg); 8606 BuildMI(BB, dl, TII->get(PPC::BCC)) 8607 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 8608 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 8609 BB->addSuccessor(loop1MBB); 8610 BB->addSuccessor(exitMBB); 8611 8612 BB = midMBB; 8613 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 8614 .addReg(ZeroReg).addReg(PtrReg); 8615 BB->addSuccessor(exitMBB); 8616 8617 // exitMBB: 8618 // ... 8619 BB = exitMBB; 8620 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 8621 .addReg(ShiftReg); 8622 } else if (MI->getOpcode() == PPC::FADDrtz) { 8623 // This pseudo performs an FADD with rounding mode temporarily forced 8624 // to round-to-zero. We emit this via custom inserter since the FPSCR 8625 // is not modeled at the SelectionDAG level. 8626 unsigned Dest = MI->getOperand(0).getReg(); 8627 unsigned Src1 = MI->getOperand(1).getReg(); 8628 unsigned Src2 = MI->getOperand(2).getReg(); 8629 DebugLoc dl = MI->getDebugLoc(); 8630 8631 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8632 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 8633 8634 // Save FPSCR value. 8635 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 8636 8637 // Set rounding mode to round-to-zero. 8638 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 8639 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 8640 8641 // Perform addition. 8642 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 8643 8644 // Restore FPSCR value. 8645 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); 8646 } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 8647 MI->getOpcode() == PPC::ANDIo_1_GT_BIT || 8648 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 8649 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { 8650 unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 8651 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? 8652 PPC::ANDIo8 : PPC::ANDIo; 8653 bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 8654 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); 8655 8656 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8657 unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? 8658 &PPC::GPRCRegClass : 8659 &PPC::G8RCRegClass); 8660 8661 DebugLoc dl = MI->getDebugLoc(); 8662 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 8663 .addReg(MI->getOperand(1).getReg()).addImm(1); 8664 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 8665 MI->getOperand(0).getReg()) 8666 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 8667 } else { 8668 llvm_unreachable("Unexpected instr type to insert"); 8669 } 8670 8671 MI->eraseFromParent(); // The pseudo instruction is gone now. 8672 return BB; 8673 } 8674 8675 //===----------------------------------------------------------------------===// 8676 // Target Optimization Hooks 8677 //===----------------------------------------------------------------------===// 8678 8679 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, 8680 DAGCombinerInfo &DCI, 8681 unsigned &RefinementSteps, 8682 bool &UseOneConstNR) const { 8683 EVT VT = Operand.getValueType(); 8684 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 8685 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 8686 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 8687 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 8688 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 8689 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 8690 // Convergence is quadratic, so we essentially double the number of digits 8691 // correct after every iteration. For both FRE and FRSQRTE, the minimum 8692 // architected relative accuracy is 2^-5. When hasRecipPrec(), this is 8693 // 2^-14. IEEE float has 23 digits and double has 52 digits. 8694 RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 8695 if (VT.getScalarType() == MVT::f64) 8696 ++RefinementSteps; 8697 UseOneConstNR = true; 8698 return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); 8699 } 8700 return SDValue(); 8701 } 8702 8703 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, 8704 DAGCombinerInfo &DCI, 8705 unsigned &RefinementSteps) const { 8706 EVT VT = Operand.getValueType(); 8707 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 8708 (VT == MVT::f64 && Subtarget.hasFRE()) || 8709 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 8710 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 8711 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 8712 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 8713 // Convergence is quadratic, so we essentially double the number of digits 8714 // correct after every iteration. For both FRE and FRSQRTE, the minimum 8715 // architected relative accuracy is 2^-5. When hasRecipPrec(), this is 8716 // 2^-14. IEEE float has 23 digits and double has 52 digits. 8717 RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 8718 if (VT.getScalarType() == MVT::f64) 8719 ++RefinementSteps; 8720 return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); 8721 } 8722 return SDValue(); 8723 } 8724 8725 bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { 8726 // Note: This functionality is used only when unsafe-fp-math is enabled, and 8727 // on cores with reciprocal estimates (which are used when unsafe-fp-math is 8728 // enabled for division), this functionality is redundant with the default 8729 // combiner logic (once the division -> reciprocal/multiply transformation 8730 // has taken place). As a result, this matters more for older cores than for 8731 // newer ones. 8732 8733 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 8734 // reciprocal if there are two or more FDIVs (for embedded cores with only 8735 // one FP pipeline) for three or more FDIVs (for generic OOO cores). 8736 switch (Subtarget.getDarwinDirective()) { 8737 default: 8738 return NumUsers > 2; 8739 case PPC::DIR_440: 8740 case PPC::DIR_A2: 8741 case PPC::DIR_E500mc: 8742 case PPC::DIR_E5500: 8743 return NumUsers > 1; 8744 } 8745 } 8746 8747 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, 8748 unsigned Bytes, int Dist, 8749 SelectionDAG &DAG) { 8750 if (VT.getSizeInBits() / 8 != Bytes) 8751 return false; 8752 8753 SDValue BaseLoc = Base->getBasePtr(); 8754 if (Loc.getOpcode() == ISD::FrameIndex) { 8755 if (BaseLoc.getOpcode() != ISD::FrameIndex) 8756 return false; 8757 const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 8758 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 8759 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 8760 int FS = MFI->getObjectSize(FI); 8761 int BFS = MFI->getObjectSize(BFI); 8762 if (FS != BFS || FS != (int)Bytes) return false; 8763 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); 8764 } 8765 8766 // Handle X+C 8767 if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && 8768 cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) 8769 return true; 8770 8771 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8772 const GlobalValue *GV1 = nullptr; 8773 const GlobalValue *GV2 = nullptr; 8774 int64_t Offset1 = 0; 8775 int64_t Offset2 = 0; 8776 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 8777 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 8778 if (isGA1 && isGA2 && GV1 == GV2) 8779 return Offset1 == (Offset2 + Dist*Bytes); 8780 return false; 8781 } 8782 8783 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 8784 // not enforce equality of the chain operands. 8785 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, 8786 unsigned Bytes, int Dist, 8787 SelectionDAG &DAG) { 8788 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { 8789 EVT VT = LS->getMemoryVT(); 8790 SDValue Loc = LS->getBasePtr(); 8791 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); 8792 } 8793 8794 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 8795 EVT VT; 8796 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 8797 default: return false; 8798 case Intrinsic::ppc_qpx_qvlfd: 8799 case Intrinsic::ppc_qpx_qvlfda: 8800 VT = MVT::v4f64; 8801 break; 8802 case Intrinsic::ppc_qpx_qvlfs: 8803 case Intrinsic::ppc_qpx_qvlfsa: 8804 VT = MVT::v4f32; 8805 break; 8806 case Intrinsic::ppc_qpx_qvlfcd: 8807 case Intrinsic::ppc_qpx_qvlfcda: 8808 VT = MVT::v2f64; 8809 break; 8810 case Intrinsic::ppc_qpx_qvlfcs: 8811 case Intrinsic::ppc_qpx_qvlfcsa: 8812 VT = MVT::v2f32; 8813 break; 8814 case Intrinsic::ppc_qpx_qvlfiwa: 8815 case Intrinsic::ppc_qpx_qvlfiwz: 8816 case Intrinsic::ppc_altivec_lvx: 8817 case Intrinsic::ppc_altivec_lvxl: 8818 case Intrinsic::ppc_vsx_lxvw4x: 8819 VT = MVT::v4i32; 8820 break; 8821 case Intrinsic::ppc_vsx_lxvd2x: 8822 VT = MVT::v2f64; 8823 break; 8824 case Intrinsic::ppc_altivec_lvebx: 8825 VT = MVT::i8; 8826 break; 8827 case Intrinsic::ppc_altivec_lvehx: 8828 VT = MVT::i16; 8829 break; 8830 case Intrinsic::ppc_altivec_lvewx: 8831 VT = MVT::i32; 8832 break; 8833 } 8834 8835 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); 8836 } 8837 8838 if (N->getOpcode() == ISD::INTRINSIC_VOID) { 8839 EVT VT; 8840 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 8841 default: return false; 8842 case Intrinsic::ppc_qpx_qvstfd: 8843 case Intrinsic::ppc_qpx_qvstfda: 8844 VT = MVT::v4f64; 8845 break; 8846 case Intrinsic::ppc_qpx_qvstfs: 8847 case Intrinsic::ppc_qpx_qvstfsa: 8848 VT = MVT::v4f32; 8849 break; 8850 case Intrinsic::ppc_qpx_qvstfcd: 8851 case Intrinsic::ppc_qpx_qvstfcda: 8852 VT = MVT::v2f64; 8853 break; 8854 case Intrinsic::ppc_qpx_qvstfcs: 8855 case Intrinsic::ppc_qpx_qvstfcsa: 8856 VT = MVT::v2f32; 8857 break; 8858 case Intrinsic::ppc_qpx_qvstfiw: 8859 case Intrinsic::ppc_qpx_qvstfiwa: 8860 case Intrinsic::ppc_altivec_stvx: 8861 case Intrinsic::ppc_altivec_stvxl: 8862 case Intrinsic::ppc_vsx_stxvw4x: 8863 VT = MVT::v4i32; 8864 break; 8865 case Intrinsic::ppc_vsx_stxvd2x: 8866 VT = MVT::v2f64; 8867 break; 8868 case Intrinsic::ppc_altivec_stvebx: 8869 VT = MVT::i8; 8870 break; 8871 case Intrinsic::ppc_altivec_stvehx: 8872 VT = MVT::i16; 8873 break; 8874 case Intrinsic::ppc_altivec_stvewx: 8875 VT = MVT::i32; 8876 break; 8877 } 8878 8879 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); 8880 } 8881 8882 return false; 8883 } 8884 8885 // Return true is there is a nearyby consecutive load to the one provided 8886 // (regardless of alignment). We search up and down the chain, looking though 8887 // token factors and other loads (but nothing else). As a result, a true result 8888 // indicates that it is safe to create a new consecutive load adjacent to the 8889 // load provided. 8890 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 8891 SDValue Chain = LD->getChain(); 8892 EVT VT = LD->getMemoryVT(); 8893 8894 SmallSet<SDNode *, 16> LoadRoots; 8895 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 8896 SmallSet<SDNode *, 16> Visited; 8897 8898 // First, search up the chain, branching to follow all token-factor operands. 8899 // If we find a consecutive load, then we're done, otherwise, record all 8900 // nodes just above the top-level loads and token factors. 8901 while (!Queue.empty()) { 8902 SDNode *ChainNext = Queue.pop_back_val(); 8903 if (!Visited.insert(ChainNext).second) 8904 continue; 8905 8906 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { 8907 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 8908 return true; 8909 8910 if (!Visited.count(ChainLD->getChain().getNode())) 8911 Queue.push_back(ChainLD->getChain().getNode()); 8912 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 8913 for (const SDUse &O : ChainNext->ops()) 8914 if (!Visited.count(O.getNode())) 8915 Queue.push_back(O.getNode()); 8916 } else 8917 LoadRoots.insert(ChainNext); 8918 } 8919 8920 // Second, search down the chain, starting from the top-level nodes recorded 8921 // in the first phase. These top-level nodes are the nodes just above all 8922 // loads and token factors. Starting with their uses, recursively look though 8923 // all loads (just the chain uses) and token factors to find a consecutive 8924 // load. 8925 Visited.clear(); 8926 Queue.clear(); 8927 8928 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 8929 IE = LoadRoots.end(); I != IE; ++I) { 8930 Queue.push_back(*I); 8931 8932 while (!Queue.empty()) { 8933 SDNode *LoadRoot = Queue.pop_back_val(); 8934 if (!Visited.insert(LoadRoot).second) 8935 continue; 8936 8937 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) 8938 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 8939 return true; 8940 8941 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 8942 UE = LoadRoot->use_end(); UI != UE; ++UI) 8943 if (((isa<MemSDNode>(*UI) && 8944 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || 8945 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 8946 Queue.push_back(*UI); 8947 } 8948 } 8949 8950 return false; 8951 } 8952 8953 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 8954 DAGCombinerInfo &DCI) const { 8955 SelectionDAG &DAG = DCI.DAG; 8956 SDLoc dl(N); 8957 8958 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); 8959 // If we're tracking CR bits, we need to be careful that we don't have: 8960 // trunc(binary-ops(zext(x), zext(y))) 8961 // or 8962 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 8963 // such that we're unnecessarily moving things into GPRs when it would be 8964 // better to keep them in CR bits. 8965 8966 // Note that trunc here can be an actual i1 trunc, or can be the effective 8967 // truncation that comes from a setcc or select_cc. 8968 if (N->getOpcode() == ISD::TRUNCATE && 8969 N->getValueType(0) != MVT::i1) 8970 return SDValue(); 8971 8972 if (N->getOperand(0).getValueType() != MVT::i32 && 8973 N->getOperand(0).getValueType() != MVT::i64) 8974 return SDValue(); 8975 8976 if (N->getOpcode() == ISD::SETCC || 8977 N->getOpcode() == ISD::SELECT_CC) { 8978 // If we're looking at a comparison, then we need to make sure that the 8979 // high bits (all except for the first) don't matter the result. 8980 ISD::CondCode CC = 8981 cast<CondCodeSDNode>(N->getOperand( 8982 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 8983 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 8984 8985 if (ISD::isSignedIntSetCC(CC)) { 8986 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 8987 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 8988 return SDValue(); 8989 } else if (ISD::isUnsignedIntSetCC(CC)) { 8990 if (!DAG.MaskedValueIsZero(N->getOperand(0), 8991 APInt::getHighBitsSet(OpBits, OpBits-1)) || 8992 !DAG.MaskedValueIsZero(N->getOperand(1), 8993 APInt::getHighBitsSet(OpBits, OpBits-1))) 8994 return SDValue(); 8995 } else { 8996 // This is neither a signed nor an unsigned comparison, just make sure 8997 // that the high bits are equal. 8998 APInt Op1Zero, Op1One; 8999 APInt Op2Zero, Op2One; 9000 DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); 9001 DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); 9002 9003 // We don't really care about what is known about the first bit (if 9004 // anything), so clear it in all masks prior to comparing them. 9005 Op1Zero.clearBit(0); Op1One.clearBit(0); 9006 Op2Zero.clearBit(0); Op2One.clearBit(0); 9007 9008 if (Op1Zero != Op2Zero || Op1One != Op2One) 9009 return SDValue(); 9010 } 9011 } 9012 9013 // We now know that the higher-order bits are irrelevant, we just need to 9014 // make sure that all of the intermediate operations are bit operations, and 9015 // all inputs are extensions. 9016 if (N->getOperand(0).getOpcode() != ISD::AND && 9017 N->getOperand(0).getOpcode() != ISD::OR && 9018 N->getOperand(0).getOpcode() != ISD::XOR && 9019 N->getOperand(0).getOpcode() != ISD::SELECT && 9020 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 9021 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 9022 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 9023 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 9024 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 9025 return SDValue(); 9026 9027 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 9028 N->getOperand(1).getOpcode() != ISD::AND && 9029 N->getOperand(1).getOpcode() != ISD::OR && 9030 N->getOperand(1).getOpcode() != ISD::XOR && 9031 N->getOperand(1).getOpcode() != ISD::SELECT && 9032 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 9033 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 9034 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 9035 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 9036 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 9037 return SDValue(); 9038 9039 SmallVector<SDValue, 4> Inputs; 9040 SmallVector<SDValue, 8> BinOps, PromOps; 9041 SmallPtrSet<SDNode *, 16> Visited; 9042 9043 for (unsigned i = 0; i < 2; ++i) { 9044 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 9045 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 9046 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 9047 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 9048 isa<ConstantSDNode>(N->getOperand(i))) 9049 Inputs.push_back(N->getOperand(i)); 9050 else 9051 BinOps.push_back(N->getOperand(i)); 9052 9053 if (N->getOpcode() == ISD::TRUNCATE) 9054 break; 9055 } 9056 9057 // Visit all inputs, collect all binary operations (and, or, xor and 9058 // select) that are all fed by extensions. 9059 while (!BinOps.empty()) { 9060 SDValue BinOp = BinOps.back(); 9061 BinOps.pop_back(); 9062 9063 if (!Visited.insert(BinOp.getNode()).second) 9064 continue; 9065 9066 PromOps.push_back(BinOp); 9067 9068 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 9069 // The condition of the select is not promoted. 9070 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 9071 continue; 9072 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 9073 continue; 9074 9075 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 9076 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 9077 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 9078 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 9079 isa<ConstantSDNode>(BinOp.getOperand(i))) { 9080 Inputs.push_back(BinOp.getOperand(i)); 9081 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 9082 BinOp.getOperand(i).getOpcode() == ISD::OR || 9083 BinOp.getOperand(i).getOpcode() == ISD::XOR || 9084 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 9085 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 9086 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 9087 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 9088 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 9089 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 9090 BinOps.push_back(BinOp.getOperand(i)); 9091 } else { 9092 // We have an input that is not an extension or another binary 9093 // operation; we'll abort this transformation. 9094 return SDValue(); 9095 } 9096 } 9097 } 9098 9099 // Make sure that this is a self-contained cluster of operations (which 9100 // is not quite the same thing as saying that everything has only one 9101 // use). 9102 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 9103 if (isa<ConstantSDNode>(Inputs[i])) 9104 continue; 9105 9106 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 9107 UE = Inputs[i].getNode()->use_end(); 9108 UI != UE; ++UI) { 9109 SDNode *User = *UI; 9110 if (User != N && !Visited.count(User)) 9111 return SDValue(); 9112 9113 // Make sure that we're not going to promote the non-output-value 9114 // operand(s) or SELECT or SELECT_CC. 9115 // FIXME: Although we could sometimes handle this, and it does occur in 9116 // practice that one of the condition inputs to the select is also one of 9117 // the outputs, we currently can't deal with this. 9118 if (User->getOpcode() == ISD::SELECT) { 9119 if (User->getOperand(0) == Inputs[i]) 9120 return SDValue(); 9121 } else if (User->getOpcode() == ISD::SELECT_CC) { 9122 if (User->getOperand(0) == Inputs[i] || 9123 User->getOperand(1) == Inputs[i]) 9124 return SDValue(); 9125 } 9126 } 9127 } 9128 9129 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 9130 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 9131 UE = PromOps[i].getNode()->use_end(); 9132 UI != UE; ++UI) { 9133 SDNode *User = *UI; 9134 if (User != N && !Visited.count(User)) 9135 return SDValue(); 9136 9137 // Make sure that we're not going to promote the non-output-value 9138 // operand(s) or SELECT or SELECT_CC. 9139 // FIXME: Although we could sometimes handle this, and it does occur in 9140 // practice that one of the condition inputs to the select is also one of 9141 // the outputs, we currently can't deal with this. 9142 if (User->getOpcode() == ISD::SELECT) { 9143 if (User->getOperand(0) == PromOps[i]) 9144 return SDValue(); 9145 } else if (User->getOpcode() == ISD::SELECT_CC) { 9146 if (User->getOperand(0) == PromOps[i] || 9147 User->getOperand(1) == PromOps[i]) 9148 return SDValue(); 9149 } 9150 } 9151 } 9152 9153 // Replace all inputs with the extension operand. 9154 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 9155 // Constants may have users outside the cluster of to-be-promoted nodes, 9156 // and so we need to replace those as we do the promotions. 9157 if (isa<ConstantSDNode>(Inputs[i])) 9158 continue; 9159 else 9160 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 9161 } 9162 9163 // Replace all operations (these are all the same, but have a different 9164 // (i1) return type). DAG.getNode will validate that the types of 9165 // a binary operator match, so go through the list in reverse so that 9166 // we've likely promoted both operands first. Any intermediate truncations or 9167 // extensions disappear. 9168 while (!PromOps.empty()) { 9169 SDValue PromOp = PromOps.back(); 9170 PromOps.pop_back(); 9171 9172 if (PromOp.getOpcode() == ISD::TRUNCATE || 9173 PromOp.getOpcode() == ISD::SIGN_EXTEND || 9174 PromOp.getOpcode() == ISD::ZERO_EXTEND || 9175 PromOp.getOpcode() == ISD::ANY_EXTEND) { 9176 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 9177 PromOp.getOperand(0).getValueType() != MVT::i1) { 9178 // The operand is not yet ready (see comment below). 9179 PromOps.insert(PromOps.begin(), PromOp); 9180 continue; 9181 } 9182 9183 SDValue RepValue = PromOp.getOperand(0); 9184 if (isa<ConstantSDNode>(RepValue)) 9185 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 9186 9187 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 9188 continue; 9189 } 9190 9191 unsigned C; 9192 switch (PromOp.getOpcode()) { 9193 default: C = 0; break; 9194 case ISD::SELECT: C = 1; break; 9195 case ISD::SELECT_CC: C = 2; break; 9196 } 9197 9198 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 9199 PromOp.getOperand(C).getValueType() != MVT::i1) || 9200 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 9201 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 9202 // The to-be-promoted operands of this node have not yet been 9203 // promoted (this should be rare because we're going through the 9204 // list backward, but if one of the operands has several users in 9205 // this cluster of to-be-promoted nodes, it is possible). 9206 PromOps.insert(PromOps.begin(), PromOp); 9207 continue; 9208 } 9209 9210 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 9211 PromOp.getNode()->op_end()); 9212 9213 // If there are any constant inputs, make sure they're replaced now. 9214 for (unsigned i = 0; i < 2; ++i) 9215 if (isa<ConstantSDNode>(Ops[C+i])) 9216 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 9217 9218 DAG.ReplaceAllUsesOfValueWith(PromOp, 9219 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 9220 } 9221 9222 // Now we're left with the initial truncation itself. 9223 if (N->getOpcode() == ISD::TRUNCATE) 9224 return N->getOperand(0); 9225 9226 // Otherwise, this is a comparison. The operands to be compared have just 9227 // changed type (to i1), but everything else is the same. 9228 return SDValue(N, 0); 9229 } 9230 9231 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 9232 DAGCombinerInfo &DCI) const { 9233 SelectionDAG &DAG = DCI.DAG; 9234 SDLoc dl(N); 9235 9236 // If we're tracking CR bits, we need to be careful that we don't have: 9237 // zext(binary-ops(trunc(x), trunc(y))) 9238 // or 9239 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 9240 // such that we're unnecessarily moving things into CR bits that can more 9241 // efficiently stay in GPRs. Note that if we're not certain that the high 9242 // bits are set as required by the final extension, we still may need to do 9243 // some masking to get the proper behavior. 9244 9245 // This same functionality is important on PPC64 when dealing with 9246 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 9247 // the return values of functions. Because it is so similar, it is handled 9248 // here as well. 9249 9250 if (N->getValueType(0) != MVT::i32 && 9251 N->getValueType(0) != MVT::i64) 9252 return SDValue(); 9253 9254 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || 9255 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) 9256 return SDValue(); 9257 9258 if (N->getOperand(0).getOpcode() != ISD::AND && 9259 N->getOperand(0).getOpcode() != ISD::OR && 9260 N->getOperand(0).getOpcode() != ISD::XOR && 9261 N->getOperand(0).getOpcode() != ISD::SELECT && 9262 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 9263 return SDValue(); 9264 9265 SmallVector<SDValue, 4> Inputs; 9266 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 9267 SmallPtrSet<SDNode *, 16> Visited; 9268 9269 // Visit all inputs, collect all binary operations (and, or, xor and 9270 // select) that are all fed by truncations. 9271 while (!BinOps.empty()) { 9272 SDValue BinOp = BinOps.back(); 9273 BinOps.pop_back(); 9274 9275 if (!Visited.insert(BinOp.getNode()).second) 9276 continue; 9277 9278 PromOps.push_back(BinOp); 9279 9280 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 9281 // The condition of the select is not promoted. 9282 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 9283 continue; 9284 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 9285 continue; 9286 9287 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 9288 isa<ConstantSDNode>(BinOp.getOperand(i))) { 9289 Inputs.push_back(BinOp.getOperand(i)); 9290 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 9291 BinOp.getOperand(i).getOpcode() == ISD::OR || 9292 BinOp.getOperand(i).getOpcode() == ISD::XOR || 9293 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 9294 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 9295 BinOps.push_back(BinOp.getOperand(i)); 9296 } else { 9297 // We have an input that is not a truncation or another binary 9298 // operation; we'll abort this transformation. 9299 return SDValue(); 9300 } 9301 } 9302 } 9303 9304 // The operands of a select that must be truncated when the select is 9305 // promoted because the operand is actually part of the to-be-promoted set. 9306 DenseMap<SDNode *, EVT> SelectTruncOp[2]; 9307 9308 // Make sure that this is a self-contained cluster of operations (which 9309 // is not quite the same thing as saying that everything has only one 9310 // use). 9311 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 9312 if (isa<ConstantSDNode>(Inputs[i])) 9313 continue; 9314 9315 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 9316 UE = Inputs[i].getNode()->use_end(); 9317 UI != UE; ++UI) { 9318 SDNode *User = *UI; 9319 if (User != N && !Visited.count(User)) 9320 return SDValue(); 9321 9322 // If we're going to promote the non-output-value operand(s) or SELECT or 9323 // SELECT_CC, record them for truncation. 9324 if (User->getOpcode() == ISD::SELECT) { 9325 if (User->getOperand(0) == Inputs[i]) 9326 SelectTruncOp[0].insert(std::make_pair(User, 9327 User->getOperand(0).getValueType())); 9328 } else if (User->getOpcode() == ISD::SELECT_CC) { 9329 if (User->getOperand(0) == Inputs[i]) 9330 SelectTruncOp[0].insert(std::make_pair(User, 9331 User->getOperand(0).getValueType())); 9332 if (User->getOperand(1) == Inputs[i]) 9333 SelectTruncOp[1].insert(std::make_pair(User, 9334 User->getOperand(1).getValueType())); 9335 } 9336 } 9337 } 9338 9339 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 9340 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 9341 UE = PromOps[i].getNode()->use_end(); 9342 UI != UE; ++UI) { 9343 SDNode *User = *UI; 9344 if (User != N && !Visited.count(User)) 9345 return SDValue(); 9346 9347 // If we're going to promote the non-output-value operand(s) or SELECT or 9348 // SELECT_CC, record them for truncation. 9349 if (User->getOpcode() == ISD::SELECT) { 9350 if (User->getOperand(0) == PromOps[i]) 9351 SelectTruncOp[0].insert(std::make_pair(User, 9352 User->getOperand(0).getValueType())); 9353 } else if (User->getOpcode() == ISD::SELECT_CC) { 9354 if (User->getOperand(0) == PromOps[i]) 9355 SelectTruncOp[0].insert(std::make_pair(User, 9356 User->getOperand(0).getValueType())); 9357 if (User->getOperand(1) == PromOps[i]) 9358 SelectTruncOp[1].insert(std::make_pair(User, 9359 User->getOperand(1).getValueType())); 9360 } 9361 } 9362 } 9363 9364 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 9365 bool ReallyNeedsExt = false; 9366 if (N->getOpcode() != ISD::ANY_EXTEND) { 9367 // If all of the inputs are not already sign/zero extended, then 9368 // we'll still need to do that at the end. 9369 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 9370 if (isa<ConstantSDNode>(Inputs[i])) 9371 continue; 9372 9373 unsigned OpBits = 9374 Inputs[i].getOperand(0).getValueSizeInBits(); 9375 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 9376 9377 if ((N->getOpcode() == ISD::ZERO_EXTEND && 9378 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 9379 APInt::getHighBitsSet(OpBits, 9380 OpBits-PromBits))) || 9381 (N->getOpcode() == ISD::SIGN_EXTEND && 9382 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 9383 (OpBits-(PromBits-1)))) { 9384 ReallyNeedsExt = true; 9385 break; 9386 } 9387 } 9388 } 9389 9390 // Replace all inputs, either with the truncation operand, or a 9391 // truncation or extension to the final output type. 9392 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 9393 // Constant inputs need to be replaced with the to-be-promoted nodes that 9394 // use them because they might have users outside of the cluster of 9395 // promoted nodes. 9396 if (isa<ConstantSDNode>(Inputs[i])) 9397 continue; 9398 9399 SDValue InSrc = Inputs[i].getOperand(0); 9400 if (Inputs[i].getValueType() == N->getValueType(0)) 9401 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 9402 else if (N->getOpcode() == ISD::SIGN_EXTEND) 9403 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 9404 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 9405 else if (N->getOpcode() == ISD::ZERO_EXTEND) 9406 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 9407 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 9408 else 9409 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 9410 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 9411 } 9412 9413 // Replace all operations (these are all the same, but have a different 9414 // (promoted) return type). DAG.getNode will validate that the types of 9415 // a binary operator match, so go through the list in reverse so that 9416 // we've likely promoted both operands first. 9417 while (!PromOps.empty()) { 9418 SDValue PromOp = PromOps.back(); 9419 PromOps.pop_back(); 9420 9421 unsigned C; 9422 switch (PromOp.getOpcode()) { 9423 default: C = 0; break; 9424 case ISD::SELECT: C = 1; break; 9425 case ISD::SELECT_CC: C = 2; break; 9426 } 9427 9428 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 9429 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 9430 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 9431 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 9432 // The to-be-promoted operands of this node have not yet been 9433 // promoted (this should be rare because we're going through the 9434 // list backward, but if one of the operands has several users in 9435 // this cluster of to-be-promoted nodes, it is possible). 9436 PromOps.insert(PromOps.begin(), PromOp); 9437 continue; 9438 } 9439 9440 // For SELECT and SELECT_CC nodes, we do a similar check for any 9441 // to-be-promoted comparison inputs. 9442 if (PromOp.getOpcode() == ISD::SELECT || 9443 PromOp.getOpcode() == ISD::SELECT_CC) { 9444 if ((SelectTruncOp[0].count(PromOp.getNode()) && 9445 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || 9446 (SelectTruncOp[1].count(PromOp.getNode()) && 9447 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { 9448 PromOps.insert(PromOps.begin(), PromOp); 9449 continue; 9450 } 9451 } 9452 9453 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 9454 PromOp.getNode()->op_end()); 9455 9456 // If this node has constant inputs, then they'll need to be promoted here. 9457 for (unsigned i = 0; i < 2; ++i) { 9458 if (!isa<ConstantSDNode>(Ops[C+i])) 9459 continue; 9460 if (Ops[C+i].getValueType() == N->getValueType(0)) 9461 continue; 9462 9463 if (N->getOpcode() == ISD::SIGN_EXTEND) 9464 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 9465 else if (N->getOpcode() == ISD::ZERO_EXTEND) 9466 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 9467 else 9468 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 9469 } 9470 9471 // If we've promoted the comparison inputs of a SELECT or SELECT_CC, 9472 // truncate them again to the original value type. 9473 if (PromOp.getOpcode() == ISD::SELECT || 9474 PromOp.getOpcode() == ISD::SELECT_CC) { 9475 auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); 9476 if (SI0 != SelectTruncOp[0].end()) 9477 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); 9478 auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); 9479 if (SI1 != SelectTruncOp[1].end()) 9480 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); 9481 } 9482 9483 DAG.ReplaceAllUsesOfValueWith(PromOp, 9484 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 9485 } 9486 9487 // Now we're left with the initial extension itself. 9488 if (!ReallyNeedsExt) 9489 return N->getOperand(0); 9490 9491 // To zero extend, just mask off everything except for the first bit (in the 9492 // i1 case). 9493 if (N->getOpcode() == ISD::ZERO_EXTEND) 9494 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 9495 DAG.getConstant(APInt::getLowBitsSet( 9496 N->getValueSizeInBits(0), PromBits), 9497 N->getValueType(0))); 9498 9499 assert(N->getOpcode() == ISD::SIGN_EXTEND && 9500 "Invalid extension type"); 9501 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); 9502 SDValue ShiftCst = 9503 DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy); 9504 return DAG.getNode(ISD::SRA, dl, N->getValueType(0), 9505 DAG.getNode(ISD::SHL, dl, N->getValueType(0), 9506 N->getOperand(0), ShiftCst), ShiftCst); 9507 } 9508 9509 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, 9510 DAGCombinerInfo &DCI) const { 9511 assert((N->getOpcode() == ISD::SINT_TO_FP || 9512 N->getOpcode() == ISD::UINT_TO_FP) && 9513 "Need an int -> FP conversion node here"); 9514 9515 if (!Subtarget.has64BitSupport()) 9516 return SDValue(); 9517 9518 SelectionDAG &DAG = DCI.DAG; 9519 SDLoc dl(N); 9520 SDValue Op(N, 0); 9521 9522 // Don't handle ppc_fp128 here or i1 conversions. 9523 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 9524 return SDValue(); 9525 if (Op.getOperand(0).getValueType() == MVT::i1) 9526 return SDValue(); 9527 9528 // For i32 intermediate values, unfortunately, the conversion functions 9529 // leave the upper 32 bits of the value are undefined. Within the set of 9530 // scalar instructions, we have no method for zero- or sign-extending the 9531 // value. Thus, we cannot handle i32 intermediate values here. 9532 if (Op.getOperand(0).getValueType() == MVT::i32) 9533 return SDValue(); 9534 9535 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 9536 "UINT_TO_FP is supported only with FPCVT"); 9537 9538 // If we have FCFIDS, then use it when converting to single-precision. 9539 // Otherwise, convert to double-precision and then round. 9540 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 9541 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 9542 : PPCISD::FCFIDS) 9543 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 9544 : PPCISD::FCFID); 9545 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 9546 ? MVT::f32 9547 : MVT::f64; 9548 9549 // If we're converting from a float, to an int, and back to a float again, 9550 // then we don't need the store/load pair at all. 9551 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && 9552 Subtarget.hasFPCVT()) || 9553 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { 9554 SDValue Src = Op.getOperand(0).getOperand(0); 9555 if (Src.getValueType() == MVT::f32) { 9556 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 9557 DCI.AddToWorklist(Src.getNode()); 9558 } 9559 9560 unsigned FCTOp = 9561 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 9562 PPCISD::FCTIDUZ; 9563 9564 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); 9565 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); 9566 9567 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { 9568 FP = DAG.getNode(ISD::FP_ROUND, dl, 9569 MVT::f32, FP, DAG.getIntPtrConstant(0)); 9570 DCI.AddToWorklist(FP.getNode()); 9571 } 9572 9573 return FP; 9574 } 9575 9576 return SDValue(); 9577 } 9578 9579 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for 9580 // builtins) into loads with swaps. 9581 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, 9582 DAGCombinerInfo &DCI) const { 9583 SelectionDAG &DAG = DCI.DAG; 9584 SDLoc dl(N); 9585 SDValue Chain; 9586 SDValue Base; 9587 MachineMemOperand *MMO; 9588 9589 switch (N->getOpcode()) { 9590 default: 9591 llvm_unreachable("Unexpected opcode for little endian VSX load"); 9592 case ISD::LOAD: { 9593 LoadSDNode *LD = cast<LoadSDNode>(N); 9594 Chain = LD->getChain(); 9595 Base = LD->getBasePtr(); 9596 MMO = LD->getMemOperand(); 9597 // If the MMO suggests this isn't a load of a full vector, leave 9598 // things alone. For a built-in, we have to make the change for 9599 // correctness, so if there is a size problem that will be a bug. 9600 if (MMO->getSize() < 16) 9601 return SDValue(); 9602 break; 9603 } 9604 case ISD::INTRINSIC_W_CHAIN: { 9605 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 9606 Chain = Intrin->getChain(); 9607 Base = Intrin->getBasePtr(); 9608 MMO = Intrin->getMemOperand(); 9609 break; 9610 } 9611 } 9612 9613 MVT VecTy = N->getValueType(0).getSimpleVT(); 9614 SDValue LoadOps[] = { Chain, Base }; 9615 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, 9616 DAG.getVTList(VecTy, MVT::Other), 9617 LoadOps, VecTy, MMO); 9618 DCI.AddToWorklist(Load.getNode()); 9619 Chain = Load.getValue(1); 9620 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 9621 DAG.getVTList(VecTy, MVT::Other), Chain, Load); 9622 DCI.AddToWorklist(Swap.getNode()); 9623 return Swap; 9624 } 9625 9626 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for 9627 // builtins) into stores with swaps. 9628 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, 9629 DAGCombinerInfo &DCI) const { 9630 SelectionDAG &DAG = DCI.DAG; 9631 SDLoc dl(N); 9632 SDValue Chain; 9633 SDValue Base; 9634 unsigned SrcOpnd; 9635 MachineMemOperand *MMO; 9636 9637 switch (N->getOpcode()) { 9638 default: 9639 llvm_unreachable("Unexpected opcode for little endian VSX store"); 9640 case ISD::STORE: { 9641 StoreSDNode *ST = cast<StoreSDNode>(N); 9642 Chain = ST->getChain(); 9643 Base = ST->getBasePtr(); 9644 MMO = ST->getMemOperand(); 9645 SrcOpnd = 1; 9646 // If the MMO suggests this isn't a store of a full vector, leave 9647 // things alone. For a built-in, we have to make the change for 9648 // correctness, so if there is a size problem that will be a bug. 9649 if (MMO->getSize() < 16) 9650 return SDValue(); 9651 break; 9652 } 9653 case ISD::INTRINSIC_VOID: { 9654 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 9655 Chain = Intrin->getChain(); 9656 // Intrin->getBasePtr() oddly does not get what we want. 9657 Base = Intrin->getOperand(3); 9658 MMO = Intrin->getMemOperand(); 9659 SrcOpnd = 2; 9660 break; 9661 } 9662 } 9663 9664 SDValue Src = N->getOperand(SrcOpnd); 9665 MVT VecTy = Src.getValueType().getSimpleVT(); 9666 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 9667 DAG.getVTList(VecTy, MVT::Other), Chain, Src); 9668 DCI.AddToWorklist(Swap.getNode()); 9669 Chain = Swap.getValue(1); 9670 SDValue StoreOps[] = { Chain, Swap, Base }; 9671 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, 9672 DAG.getVTList(MVT::Other), 9673 StoreOps, VecTy, MMO); 9674 DCI.AddToWorklist(Store.getNode()); 9675 return Store; 9676 } 9677 9678 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 9679 DAGCombinerInfo &DCI) const { 9680 SelectionDAG &DAG = DCI.DAG; 9681 SDLoc dl(N); 9682 switch (N->getOpcode()) { 9683 default: break; 9684 case PPCISD::SHL: 9685 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 9686 if (C->isNullValue()) // 0 << V -> 0. 9687 return N->getOperand(0); 9688 } 9689 break; 9690 case PPCISD::SRL: 9691 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 9692 if (C->isNullValue()) // 0 >>u V -> 0. 9693 return N->getOperand(0); 9694 } 9695 break; 9696 case PPCISD::SRA: 9697 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 9698 if (C->isNullValue() || // 0 >>s V -> 0. 9699 C->isAllOnesValue()) // -1 >>s V -> -1. 9700 return N->getOperand(0); 9701 } 9702 break; 9703 case ISD::SIGN_EXTEND: 9704 case ISD::ZERO_EXTEND: 9705 case ISD::ANY_EXTEND: 9706 return DAGCombineExtBoolTrunc(N, DCI); 9707 case ISD::TRUNCATE: 9708 case ISD::SETCC: 9709 case ISD::SELECT_CC: 9710 return DAGCombineTruncBoolExt(N, DCI); 9711 case ISD::SINT_TO_FP: 9712 case ISD::UINT_TO_FP: 9713 return combineFPToIntToFP(N, DCI); 9714 case ISD::STORE: { 9715 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 9716 if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() && 9717 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 9718 N->getOperand(1).getValueType() == MVT::i32 && 9719 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 9720 SDValue Val = N->getOperand(1).getOperand(0); 9721 if (Val.getValueType() == MVT::f32) { 9722 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 9723 DCI.AddToWorklist(Val.getNode()); 9724 } 9725 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 9726 DCI.AddToWorklist(Val.getNode()); 9727 9728 SDValue Ops[] = { 9729 N->getOperand(0), Val, N->getOperand(2), 9730 DAG.getValueType(N->getOperand(1).getValueType()) 9731 }; 9732 9733 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 9734 DAG.getVTList(MVT::Other), Ops, 9735 cast<StoreSDNode>(N)->getMemoryVT(), 9736 cast<StoreSDNode>(N)->getMemOperand()); 9737 DCI.AddToWorklist(Val.getNode()); 9738 return Val; 9739 } 9740 9741 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 9742 if (cast<StoreSDNode>(N)->isUnindexed() && 9743 N->getOperand(1).getOpcode() == ISD::BSWAP && 9744 N->getOperand(1).getNode()->hasOneUse() && 9745 (N->getOperand(1).getValueType() == MVT::i32 || 9746 N->getOperand(1).getValueType() == MVT::i16 || 9747 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 9748 N->getOperand(1).getValueType() == MVT::i64))) { 9749 SDValue BSwapOp = N->getOperand(1).getOperand(0); 9750 // Do an any-extend to 32-bits if this is a half-word input. 9751 if (BSwapOp.getValueType() == MVT::i16) 9752 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 9753 9754 SDValue Ops[] = { 9755 N->getOperand(0), BSwapOp, N->getOperand(2), 9756 DAG.getValueType(N->getOperand(1).getValueType()) 9757 }; 9758 return 9759 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 9760 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 9761 cast<StoreSDNode>(N)->getMemOperand()); 9762 } 9763 9764 // For little endian, VSX stores require generating xxswapd/lxvd2x. 9765 EVT VT = N->getOperand(1).getValueType(); 9766 if (VT.isSimple()) { 9767 MVT StoreVT = VT.getSimpleVT(); 9768 if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && 9769 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || 9770 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) 9771 return expandVSXStoreForLE(N, DCI); 9772 } 9773 break; 9774 } 9775 case ISD::LOAD: { 9776 LoadSDNode *LD = cast<LoadSDNode>(N); 9777 EVT VT = LD->getValueType(0); 9778 9779 // For little endian, VSX loads require generating lxvd2x/xxswapd. 9780 if (VT.isSimple()) { 9781 MVT LoadVT = VT.getSimpleVT(); 9782 if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && 9783 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || 9784 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) 9785 return expandVSXLoadForLE(N, DCI); 9786 } 9787 9788 EVT MemVT = LD->getMemoryVT(); 9789 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); 9790 unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); 9791 Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); 9792 unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy); 9793 if (LD->isUnindexed() && VT.isVector() && 9794 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && 9795 // P8 and later hardware should just use LOAD. 9796 !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || 9797 VT == MVT::v4i32 || VT == MVT::v4f32)) || 9798 (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && 9799 LD->getAlignment() >= ScalarABIAlignment)) && 9800 LD->getAlignment() < ABIAlignment) { 9801 // This is a type-legal unaligned Altivec or QPX load. 9802 SDValue Chain = LD->getChain(); 9803 SDValue Ptr = LD->getBasePtr(); 9804 bool isLittleEndian = Subtarget.isLittleEndian(); 9805 9806 // This implements the loading of unaligned vectors as described in 9807 // the venerable Apple Velocity Engine overview. Specifically: 9808 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 9809 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 9810 // 9811 // The general idea is to expand a sequence of one or more unaligned 9812 // loads into an alignment-based permutation-control instruction (lvsl 9813 // or lvsr), a series of regular vector loads (which always truncate 9814 // their input address to an aligned address), and a series of 9815 // permutations. The results of these permutations are the requested 9816 // loaded values. The trick is that the last "extra" load is not taken 9817 // from the address you might suspect (sizeof(vector) bytes after the 9818 // last requested load), but rather sizeof(vector) - 1 bytes after the 9819 // last requested vector. The point of this is to avoid a page fault if 9820 // the base address happened to be aligned. This works because if the 9821 // base address is aligned, then adding less than a full vector length 9822 // will cause the last vector in the sequence to be (re)loaded. 9823 // Otherwise, the next vector will be fetched as you might suspect was 9824 // necessary. 9825 9826 // We might be able to reuse the permutation generation from 9827 // a different base address offset from this one by an aligned amount. 9828 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 9829 // optimization later. 9830 Intrinsic::ID Intr, IntrLD, IntrPerm; 9831 MVT PermCntlTy, PermTy, LDTy; 9832 if (Subtarget.hasAltivec()) { 9833 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : 9834 Intrinsic::ppc_altivec_lvsl; 9835 IntrLD = Intrinsic::ppc_altivec_lvx; 9836 IntrPerm = Intrinsic::ppc_altivec_vperm; 9837 PermCntlTy = MVT::v16i8; 9838 PermTy = MVT::v4i32; 9839 LDTy = MVT::v4i32; 9840 } else { 9841 Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : 9842 Intrinsic::ppc_qpx_qvlpcls; 9843 IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : 9844 Intrinsic::ppc_qpx_qvlfs; 9845 IntrPerm = Intrinsic::ppc_qpx_qvfperm; 9846 PermCntlTy = MVT::v4f64; 9847 PermTy = MVT::v4f64; 9848 LDTy = MemVT.getSimpleVT(); 9849 } 9850 9851 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); 9852 9853 // Create the new MMO for the new base load. It is like the original MMO, 9854 // but represents an area in memory almost twice the vector size centered 9855 // on the original address. If the address is unaligned, we might start 9856 // reading up to (sizeof(vector)-1) bytes below the address of the 9857 // original unaligned load. 9858 MachineFunction &MF = DAG.getMachineFunction(); 9859 MachineMemOperand *BaseMMO = 9860 MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1, 9861 2*MemVT.getStoreSize()-1); 9862 9863 // Create the new base load. 9864 SDValue LDXIntID = DAG.getTargetConstant(IntrLD, getPointerTy()); 9865 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; 9866 SDValue BaseLoad = 9867 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 9868 DAG.getVTList(PermTy, MVT::Other), 9869 BaseLoadOps, LDTy, BaseMMO); 9870 9871 // Note that the value of IncOffset (which is provided to the next 9872 // load's pointer info offset value, and thus used to calculate the 9873 // alignment), and the value of IncValue (which is actually used to 9874 // increment the pointer value) are different! This is because we 9875 // require the next load to appear to be aligned, even though it 9876 // is actually offset from the base pointer by a lesser amount. 9877 int IncOffset = VT.getSizeInBits() / 8; 9878 int IncValue = IncOffset; 9879 9880 // Walk (both up and down) the chain looking for another load at the real 9881 // (aligned) offset (the alignment of the other load does not matter in 9882 // this case). If found, then do not use the offset reduction trick, as 9883 // that will prevent the loads from being later combined (as they would 9884 // otherwise be duplicates). 9885 if (!findConsecutiveLoad(LD, DAG)) 9886 --IncValue; 9887 9888 SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); 9889 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 9890 9891 MachineMemOperand *ExtraMMO = 9892 MF.getMachineMemOperand(LD->getMemOperand(), 9893 1, 2*MemVT.getStoreSize()-1); 9894 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; 9895 SDValue ExtraLoad = 9896 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 9897 DAG.getVTList(PermTy, MVT::Other), 9898 ExtraLoadOps, LDTy, ExtraMMO); 9899 9900 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 9901 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 9902 9903 // Because vperm has a big-endian bias, we must reverse the order 9904 // of the input vectors and complement the permute control vector 9905 // when generating little endian code. We have already handled the 9906 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 9907 // and ExtraLoad here. 9908 SDValue Perm; 9909 if (isLittleEndian) 9910 Perm = BuildIntrinsicOp(IntrPerm, 9911 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 9912 else 9913 Perm = BuildIntrinsicOp(IntrPerm, 9914 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 9915 9916 if (VT != PermTy) 9917 Perm = Subtarget.hasAltivec() ? 9918 DAG.getNode(ISD::BITCAST, dl, VT, Perm) : 9919 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX 9920 DAG.getTargetConstant(1, MVT::i64)); 9921 // second argument is 1 because this rounding 9922 // is always exact. 9923 9924 // The output of the permutation is our loaded result, the TokenFactor is 9925 // our new chain. 9926 DCI.CombineTo(N, Perm, TF); 9927 return SDValue(N, 0); 9928 } 9929 } 9930 break; 9931 case ISD::INTRINSIC_WO_CHAIN: { 9932 bool isLittleEndian = Subtarget.isLittleEndian(); 9933 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 9934 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr 9935 : Intrinsic::ppc_altivec_lvsl); 9936 if ((IID == Intr || 9937 IID == Intrinsic::ppc_qpx_qvlpcld || 9938 IID == Intrinsic::ppc_qpx_qvlpcls) && 9939 N->getOperand(1)->getOpcode() == ISD::ADD) { 9940 SDValue Add = N->getOperand(1); 9941 9942 int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? 9943 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; 9944 9945 if (DAG.MaskedValueIsZero( 9946 Add->getOperand(1), 9947 APInt::getAllOnesValue(Bits /* alignment */) 9948 .zext( 9949 Add.getValueType().getScalarType().getSizeInBits()))) { 9950 SDNode *BasePtr = Add->getOperand(0).getNode(); 9951 for (SDNode::use_iterator UI = BasePtr->use_begin(), 9952 UE = BasePtr->use_end(); 9953 UI != UE; ++UI) { 9954 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 9955 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) { 9956 // We've found another LVSL/LVSR, and this address is an aligned 9957 // multiple of that one. The results will be the same, so use the 9958 // one we've just found instead. 9959 9960 return SDValue(*UI, 0); 9961 } 9962 } 9963 } 9964 9965 if (isa<ConstantSDNode>(Add->getOperand(1))) { 9966 SDNode *BasePtr = Add->getOperand(0).getNode(); 9967 for (SDNode::use_iterator UI = BasePtr->use_begin(), 9968 UE = BasePtr->use_end(); UI != UE; ++UI) { 9969 if (UI->getOpcode() == ISD::ADD && 9970 isa<ConstantSDNode>(UI->getOperand(1)) && 9971 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() - 9972 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) % 9973 (1ULL << Bits) == 0) { 9974 SDNode *OtherAdd = *UI; 9975 for (SDNode::use_iterator VI = OtherAdd->use_begin(), 9976 VE = OtherAdd->use_end(); VI != VE; ++VI) { 9977 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 9978 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) { 9979 return SDValue(*VI, 0); 9980 } 9981 } 9982 } 9983 } 9984 } 9985 } 9986 } 9987 9988 break; 9989 case ISD::INTRINSIC_W_CHAIN: { 9990 // For little endian, VSX loads require generating lxvd2x/xxswapd. 9991 if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { 9992 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9993 default: 9994 break; 9995 case Intrinsic::ppc_vsx_lxvw4x: 9996 case Intrinsic::ppc_vsx_lxvd2x: 9997 return expandVSXLoadForLE(N, DCI); 9998 } 9999 } 10000 break; 10001 } 10002 case ISD::INTRINSIC_VOID: { 10003 // For little endian, VSX stores require generating xxswapd/stxvd2x. 10004 if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { 10005 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 10006 default: 10007 break; 10008 case Intrinsic::ppc_vsx_stxvw4x: 10009 case Intrinsic::ppc_vsx_stxvd2x: 10010 return expandVSXStoreForLE(N, DCI); 10011 } 10012 } 10013 break; 10014 } 10015 case ISD::BSWAP: 10016 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 10017 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 10018 N->getOperand(0).hasOneUse() && 10019 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 10020 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 10021 N->getValueType(0) == MVT::i64))) { 10022 SDValue Load = N->getOperand(0); 10023 LoadSDNode *LD = cast<LoadSDNode>(Load); 10024 // Create the byte-swapping load. 10025 SDValue Ops[] = { 10026 LD->getChain(), // Chain 10027 LD->getBasePtr(), // Ptr 10028 DAG.getValueType(N->getValueType(0)) // VT 10029 }; 10030 SDValue BSLoad = 10031 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 10032 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 10033 MVT::i64 : MVT::i32, MVT::Other), 10034 Ops, LD->getMemoryVT(), LD->getMemOperand()); 10035 10036 // If this is an i16 load, insert the truncate. 10037 SDValue ResVal = BSLoad; 10038 if (N->getValueType(0) == MVT::i16) 10039 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 10040 10041 // First, combine the bswap away. This makes the value produced by the 10042 // load dead. 10043 DCI.CombineTo(N, ResVal); 10044 10045 // Next, combine the load away, we give it a bogus result value but a real 10046 // chain result. The result value is dead because the bswap is dead. 10047 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 10048 10049 // Return N so it doesn't get rechecked! 10050 return SDValue(N, 0); 10051 } 10052 10053 break; 10054 case PPCISD::VCMP: { 10055 // If a VCMPo node already exists with exactly the same operands as this 10056 // node, use its result instead of this node (VCMPo computes both a CR6 and 10057 // a normal output). 10058 // 10059 if (!N->getOperand(0).hasOneUse() && 10060 !N->getOperand(1).hasOneUse() && 10061 !N->getOperand(2).hasOneUse()) { 10062 10063 // Scan all of the users of the LHS, looking for VCMPo's that match. 10064 SDNode *VCMPoNode = nullptr; 10065 10066 SDNode *LHSN = N->getOperand(0).getNode(); 10067 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 10068 UI != E; ++UI) 10069 if (UI->getOpcode() == PPCISD::VCMPo && 10070 UI->getOperand(1) == N->getOperand(1) && 10071 UI->getOperand(2) == N->getOperand(2) && 10072 UI->getOperand(0) == N->getOperand(0)) { 10073 VCMPoNode = *UI; 10074 break; 10075 } 10076 10077 // If there is no VCMPo node, or if the flag value has a single use, don't 10078 // transform this. 10079 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 10080 break; 10081 10082 // Look at the (necessarily single) use of the flag value. If it has a 10083 // chain, this transformation is more complex. Note that multiple things 10084 // could use the value result, which we should ignore. 10085 SDNode *FlagUser = nullptr; 10086 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 10087 FlagUser == nullptr; ++UI) { 10088 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 10089 SDNode *User = *UI; 10090 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 10091 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 10092 FlagUser = User; 10093 break; 10094 } 10095 } 10096 } 10097 10098 // If the user is a MFOCRF instruction, we know this is safe. 10099 // Otherwise we give up for right now. 10100 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 10101 return SDValue(VCMPoNode, 0); 10102 } 10103 break; 10104 } 10105 case ISD::BRCOND: { 10106 SDValue Cond = N->getOperand(1); 10107 SDValue Target = N->getOperand(2); 10108 10109 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 10110 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 10111 Intrinsic::ppc_is_decremented_ctr_nonzero) { 10112 10113 // We now need to make the intrinsic dead (it cannot be instruction 10114 // selected). 10115 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 10116 assert(Cond.getNode()->hasOneUse() && 10117 "Counter decrement has more than one use"); 10118 10119 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 10120 N->getOperand(0), Target); 10121 } 10122 } 10123 break; 10124 case ISD::BR_CC: { 10125 // If this is a branch on an altivec predicate comparison, lower this so 10126 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 10127 // lowering is done pre-legalize, because the legalizer lowers the predicate 10128 // compare down to code that is difficult to reassemble. 10129 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 10130 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 10131 10132 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 10133 // value. If so, pass-through the AND to get to the intrinsic. 10134 if (LHS.getOpcode() == ISD::AND && 10135 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 10136 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 10137 Intrinsic::ppc_is_decremented_ctr_nonzero && 10138 isa<ConstantSDNode>(LHS.getOperand(1)) && 10139 !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()-> 10140 isZero()) 10141 LHS = LHS.getOperand(0); 10142 10143 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 10144 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 10145 Intrinsic::ppc_is_decremented_ctr_nonzero && 10146 isa<ConstantSDNode>(RHS)) { 10147 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 10148 "Counter decrement comparison is not EQ or NE"); 10149 10150 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 10151 bool isBDNZ = (CC == ISD::SETEQ && Val) || 10152 (CC == ISD::SETNE && !Val); 10153 10154 // We now need to make the intrinsic dead (it cannot be instruction 10155 // selected). 10156 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 10157 assert(LHS.getNode()->hasOneUse() && 10158 "Counter decrement has more than one use"); 10159 10160 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 10161 N->getOperand(0), N->getOperand(4)); 10162 } 10163 10164 int CompareOpc; 10165 bool isDot; 10166 10167 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 10168 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 10169 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 10170 assert(isDot && "Can't compare against a vector result!"); 10171 10172 // If this is a comparison against something other than 0/1, then we know 10173 // that the condition is never/always true. 10174 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 10175 if (Val != 0 && Val != 1) { 10176 if (CC == ISD::SETEQ) // Cond never true, remove branch. 10177 return N->getOperand(0); 10178 // Always !=, turn it into an unconditional branch. 10179 return DAG.getNode(ISD::BR, dl, MVT::Other, 10180 N->getOperand(0), N->getOperand(4)); 10181 } 10182 10183 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 10184 10185 // Create the PPCISD altivec 'dot' comparison node. 10186 SDValue Ops[] = { 10187 LHS.getOperand(2), // LHS of compare 10188 LHS.getOperand(3), // RHS of compare 10189 DAG.getConstant(CompareOpc, MVT::i32) 10190 }; 10191 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 10192 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 10193 10194 // Unpack the result based on how the target uses it. 10195 PPC::Predicate CompOpc; 10196 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 10197 default: // Can't happen, don't crash on invalid number though. 10198 case 0: // Branch on the value of the EQ bit of CR6. 10199 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 10200 break; 10201 case 1: // Branch on the inverted value of the EQ bit of CR6. 10202 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 10203 break; 10204 case 2: // Branch on the value of the LT bit of CR6. 10205 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 10206 break; 10207 case 3: // Branch on the inverted value of the LT bit of CR6. 10208 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 10209 break; 10210 } 10211 10212 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 10213 DAG.getConstant(CompOpc, MVT::i32), 10214 DAG.getRegister(PPC::CR6, MVT::i32), 10215 N->getOperand(4), CompNode.getValue(1)); 10216 } 10217 break; 10218 } 10219 } 10220 10221 return SDValue(); 10222 } 10223 10224 SDValue 10225 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 10226 SelectionDAG &DAG, 10227 std::vector<SDNode *> *Created) const { 10228 // fold (sdiv X, pow2) 10229 EVT VT = N->getValueType(0); 10230 if (VT == MVT::i64 && !Subtarget.isPPC64()) 10231 return SDValue(); 10232 if ((VT != MVT::i32 && VT != MVT::i64) || 10233 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) 10234 return SDValue(); 10235 10236 SDLoc DL(N); 10237 SDValue N0 = N->getOperand(0); 10238 10239 bool IsNegPow2 = (-Divisor).isPowerOf2(); 10240 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); 10241 SDValue ShiftAmt = DAG.getConstant(Lg2, VT); 10242 10243 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); 10244 if (Created) 10245 Created->push_back(Op.getNode()); 10246 10247 if (IsNegPow2) { 10248 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op); 10249 if (Created) 10250 Created->push_back(Op.getNode()); 10251 } 10252 10253 return Op; 10254 } 10255 10256 //===----------------------------------------------------------------------===// 10257 // Inline Assembly Support 10258 //===----------------------------------------------------------------------===// 10259 10260 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 10261 APInt &KnownZero, 10262 APInt &KnownOne, 10263 const SelectionDAG &DAG, 10264 unsigned Depth) const { 10265 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 10266 switch (Op.getOpcode()) { 10267 default: break; 10268 case PPCISD::LBRX: { 10269 // lhbrx is known to have the top bits cleared out. 10270 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 10271 KnownZero = 0xFFFF0000; 10272 break; 10273 } 10274 case ISD::INTRINSIC_WO_CHAIN: { 10275 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 10276 default: break; 10277 case Intrinsic::ppc_altivec_vcmpbfp_p: 10278 case Intrinsic::ppc_altivec_vcmpeqfp_p: 10279 case Intrinsic::ppc_altivec_vcmpequb_p: 10280 case Intrinsic::ppc_altivec_vcmpequh_p: 10281 case Intrinsic::ppc_altivec_vcmpequw_p: 10282 case Intrinsic::ppc_altivec_vcmpgefp_p: 10283 case Intrinsic::ppc_altivec_vcmpgtfp_p: 10284 case Intrinsic::ppc_altivec_vcmpgtsb_p: 10285 case Intrinsic::ppc_altivec_vcmpgtsh_p: 10286 case Intrinsic::ppc_altivec_vcmpgtsw_p: 10287 case Intrinsic::ppc_altivec_vcmpgtub_p: 10288 case Intrinsic::ppc_altivec_vcmpgtuh_p: 10289 case Intrinsic::ppc_altivec_vcmpgtuw_p: 10290 KnownZero = ~1U; // All bits but the low one are known to be zero. 10291 break; 10292 } 10293 } 10294 } 10295 } 10296 10297 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { 10298 switch (Subtarget.getDarwinDirective()) { 10299 default: break; 10300 case PPC::DIR_970: 10301 case PPC::DIR_PWR4: 10302 case PPC::DIR_PWR5: 10303 case PPC::DIR_PWR5X: 10304 case PPC::DIR_PWR6: 10305 case PPC::DIR_PWR6X: 10306 case PPC::DIR_PWR7: 10307 case PPC::DIR_PWR8: { 10308 if (!ML) 10309 break; 10310 10311 const PPCInstrInfo *TII = Subtarget.getInstrInfo(); 10312 10313 // For small loops (between 5 and 8 instructions), align to a 32-byte 10314 // boundary so that the entire loop fits in one instruction-cache line. 10315 uint64_t LoopSize = 0; 10316 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) 10317 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) 10318 LoopSize += TII->GetInstSizeInBytes(J); 10319 10320 if (LoopSize > 16 && LoopSize <= 32) 10321 return 5; 10322 10323 break; 10324 } 10325 } 10326 10327 return TargetLowering::getPrefLoopAlignment(ML); 10328 } 10329 10330 /// getConstraintType - Given a constraint, return the type of 10331 /// constraint it is for this target. 10332 PPCTargetLowering::ConstraintType 10333 PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 10334 if (Constraint.size() == 1) { 10335 switch (Constraint[0]) { 10336 default: break; 10337 case 'b': 10338 case 'r': 10339 case 'f': 10340 case 'v': 10341 case 'y': 10342 return C_RegisterClass; 10343 case 'Z': 10344 // FIXME: While Z does indicate a memory constraint, it specifically 10345 // indicates an r+r address (used in conjunction with the 'y' modifier 10346 // in the replacement string). Currently, we're forcing the base 10347 // register to be r0 in the asm printer (which is interpreted as zero) 10348 // and forming the complete address in the second register. This is 10349 // suboptimal. 10350 return C_Memory; 10351 } 10352 } else if (Constraint == "wc") { // individual CR bits. 10353 return C_RegisterClass; 10354 } else if (Constraint == "wa" || Constraint == "wd" || 10355 Constraint == "wf" || Constraint == "ws") { 10356 return C_RegisterClass; // VSX registers. 10357 } 10358 return TargetLowering::getConstraintType(Constraint); 10359 } 10360 10361 /// Examine constraint type and operand type and determine a weight value. 10362 /// This object must already have been set up with the operand type 10363 /// and the current alternative constraint selected. 10364 TargetLowering::ConstraintWeight 10365 PPCTargetLowering::getSingleConstraintMatchWeight( 10366 AsmOperandInfo &info, const char *constraint) const { 10367 ConstraintWeight weight = CW_Invalid; 10368 Value *CallOperandVal = info.CallOperandVal; 10369 // If we don't have a value, we can't do a match, 10370 // but allow it at the lowest weight. 10371 if (!CallOperandVal) 10372 return CW_Default; 10373 Type *type = CallOperandVal->getType(); 10374 10375 // Look at the constraint type. 10376 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 10377 return CW_Register; // an individual CR bit. 10378 else if ((StringRef(constraint) == "wa" || 10379 StringRef(constraint) == "wd" || 10380 StringRef(constraint) == "wf") && 10381 type->isVectorTy()) 10382 return CW_Register; 10383 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 10384 return CW_Register; 10385 10386 switch (*constraint) { 10387 default: 10388 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 10389 break; 10390 case 'b': 10391 if (type->isIntegerTy()) 10392 weight = CW_Register; 10393 break; 10394 case 'f': 10395 if (type->isFloatTy()) 10396 weight = CW_Register; 10397 break; 10398 case 'd': 10399 if (type->isDoubleTy()) 10400 weight = CW_Register; 10401 break; 10402 case 'v': 10403 if (type->isVectorTy()) 10404 weight = CW_Register; 10405 break; 10406 case 'y': 10407 weight = CW_Register; 10408 break; 10409 case 'Z': 10410 weight = CW_Memory; 10411 break; 10412 } 10413 return weight; 10414 } 10415 10416 std::pair<unsigned, const TargetRegisterClass *> 10417 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 10418 const std::string &Constraint, 10419 MVT VT) const { 10420 if (Constraint.size() == 1) { 10421 // GCC RS6000 Constraint Letters 10422 switch (Constraint[0]) { 10423 case 'b': // R1-R31 10424 if (VT == MVT::i64 && Subtarget.isPPC64()) 10425 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 10426 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 10427 case 'r': // R0-R31 10428 if (VT == MVT::i64 && Subtarget.isPPC64()) 10429 return std::make_pair(0U, &PPC::G8RCRegClass); 10430 return std::make_pair(0U, &PPC::GPRCRegClass); 10431 case 'f': 10432 if (VT == MVT::f32 || VT == MVT::i32) 10433 return std::make_pair(0U, &PPC::F4RCRegClass); 10434 if (VT == MVT::f64 || VT == MVT::i64) 10435 return std::make_pair(0U, &PPC::F8RCRegClass); 10436 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 10437 return std::make_pair(0U, &PPC::QFRCRegClass); 10438 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 10439 return std::make_pair(0U, &PPC::QSRCRegClass); 10440 break; 10441 case 'v': 10442 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 10443 return std::make_pair(0U, &PPC::QFRCRegClass); 10444 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 10445 return std::make_pair(0U, &PPC::QSRCRegClass); 10446 return std::make_pair(0U, &PPC::VRRCRegClass); 10447 case 'y': // crrc 10448 return std::make_pair(0U, &PPC::CRRCRegClass); 10449 } 10450 } else if (Constraint == "wc") { // an individual CR bit. 10451 return std::make_pair(0U, &PPC::CRBITRCRegClass); 10452 } else if (Constraint == "wa" || Constraint == "wd" || 10453 Constraint == "wf") { 10454 return std::make_pair(0U, &PPC::VSRCRegClass); 10455 } else if (Constraint == "ws") { 10456 return std::make_pair(0U, &PPC::VSFRCRegClass); 10457 } 10458 10459 std::pair<unsigned, const TargetRegisterClass *> R = 10460 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 10461 10462 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 10463 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 10464 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 10465 // register. 10466 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 10467 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 10468 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 10469 PPC::GPRCRegClass.contains(R.first)) 10470 return std::make_pair(TRI->getMatchingSuperReg(R.first, 10471 PPC::sub_32, &PPC::G8RCRegClass), 10472 &PPC::G8RCRegClass); 10473 10474 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 10475 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 10476 R.first = PPC::CR0; 10477 R.second = &PPC::CRRCRegClass; 10478 } 10479 10480 return R; 10481 } 10482 10483 10484 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 10485 /// vector. If it is invalid, don't add anything to Ops. 10486 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 10487 std::string &Constraint, 10488 std::vector<SDValue>&Ops, 10489 SelectionDAG &DAG) const { 10490 SDValue Result; 10491 10492 // Only support length 1 constraints. 10493 if (Constraint.length() > 1) return; 10494 10495 char Letter = Constraint[0]; 10496 switch (Letter) { 10497 default: break; 10498 case 'I': 10499 case 'J': 10500 case 'K': 10501 case 'L': 10502 case 'M': 10503 case 'N': 10504 case 'O': 10505 case 'P': { 10506 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 10507 if (!CST) return; // Must be an immediate to match. 10508 int64_t Value = CST->getSExtValue(); 10509 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 10510 // numbers are printed as such. 10511 switch (Letter) { 10512 default: llvm_unreachable("Unknown constraint letter!"); 10513 case 'I': // "I" is a signed 16-bit constant. 10514 if (isInt<16>(Value)) 10515 Result = DAG.getTargetConstant(Value, TCVT); 10516 break; 10517 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 10518 if (isShiftedUInt<16, 16>(Value)) 10519 Result = DAG.getTargetConstant(Value, TCVT); 10520 break; 10521 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 10522 if (isShiftedInt<16, 16>(Value)) 10523 Result = DAG.getTargetConstant(Value, TCVT); 10524 break; 10525 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 10526 if (isUInt<16>(Value)) 10527 Result = DAG.getTargetConstant(Value, TCVT); 10528 break; 10529 case 'M': // "M" is a constant that is greater than 31. 10530 if (Value > 31) 10531 Result = DAG.getTargetConstant(Value, TCVT); 10532 break; 10533 case 'N': // "N" is a positive constant that is an exact power of two. 10534 if (Value > 0 && isPowerOf2_64(Value)) 10535 Result = DAG.getTargetConstant(Value, TCVT); 10536 break; 10537 case 'O': // "O" is the constant zero. 10538 if (Value == 0) 10539 Result = DAG.getTargetConstant(Value, TCVT); 10540 break; 10541 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 10542 if (isInt<16>(-Value)) 10543 Result = DAG.getTargetConstant(Value, TCVT); 10544 break; 10545 } 10546 break; 10547 } 10548 } 10549 10550 if (Result.getNode()) { 10551 Ops.push_back(Result); 10552 return; 10553 } 10554 10555 // Handle standard constraint letters. 10556 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 10557 } 10558 10559 // isLegalAddressingMode - Return true if the addressing mode represented 10560 // by AM is legal for this target, for a load/store of the specified type. 10561 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 10562 Type *Ty) const { 10563 // PPC does not allow r+i addressing modes for vectors! 10564 if (Ty->isVectorTy() && AM.BaseOffs != 0) 10565 return false; 10566 10567 // PPC allows a sign-extended 16-bit immediate field. 10568 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 10569 return false; 10570 10571 // No global is ever allowed as a base. 10572 if (AM.BaseGV) 10573 return false; 10574 10575 // PPC only support r+r, 10576 switch (AM.Scale) { 10577 case 0: // "r+i" or just "i", depending on HasBaseReg. 10578 break; 10579 case 1: 10580 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 10581 return false; 10582 // Otherwise we have r+r or r+i. 10583 break; 10584 case 2: 10585 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 10586 return false; 10587 // Allow 2*r as r+r. 10588 break; 10589 default: 10590 // No other scales are supported. 10591 return false; 10592 } 10593 10594 return true; 10595 } 10596 10597 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 10598 SelectionDAG &DAG) const { 10599 MachineFunction &MF = DAG.getMachineFunction(); 10600 MachineFrameInfo *MFI = MF.getFrameInfo(); 10601 MFI->setReturnAddressIsTaken(true); 10602 10603 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 10604 return SDValue(); 10605 10606 SDLoc dl(Op); 10607 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 10608 10609 // Make sure the function does not optimize away the store of the RA to 10610 // the stack. 10611 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 10612 FuncInfo->setLRStoreRequired(); 10613 bool isPPC64 = Subtarget.isPPC64(); 10614 10615 if (Depth > 0) { 10616 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 10617 SDValue Offset = 10618 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), 10619 isPPC64 ? MVT::i64 : MVT::i32); 10620 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 10621 DAG.getNode(ISD::ADD, dl, getPointerTy(), 10622 FrameAddr, Offset), 10623 MachinePointerInfo(), false, false, false, 0); 10624 } 10625 10626 // Just load the return address off the stack. 10627 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 10628 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 10629 RetAddrFI, MachinePointerInfo(), false, false, false, 0); 10630 } 10631 10632 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 10633 SelectionDAG &DAG) const { 10634 SDLoc dl(Op); 10635 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 10636 10637 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 10638 bool isPPC64 = PtrVT == MVT::i64; 10639 10640 MachineFunction &MF = DAG.getMachineFunction(); 10641 MachineFrameInfo *MFI = MF.getFrameInfo(); 10642 MFI->setFrameAddressIsTaken(true); 10643 10644 // Naked functions never have a frame pointer, and so we use r1. For all 10645 // other functions, this decision must be delayed until during PEI. 10646 unsigned FrameReg; 10647 if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) 10648 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 10649 else 10650 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 10651 10652 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 10653 PtrVT); 10654 while (Depth--) 10655 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 10656 FrameAddr, MachinePointerInfo(), false, false, 10657 false, 0); 10658 return FrameAddr; 10659 } 10660 10661 // FIXME? Maybe this could be a TableGen attribute on some registers and 10662 // this table could be generated automatically from RegInfo. 10663 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, 10664 EVT VT) const { 10665 bool isPPC64 = Subtarget.isPPC64(); 10666 bool isDarwinABI = Subtarget.isDarwinABI(); 10667 10668 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 10669 (!isPPC64 && VT != MVT::i32)) 10670 report_fatal_error("Invalid register global variable type"); 10671 10672 bool is64Bit = isPPC64 && VT == MVT::i64; 10673 unsigned Reg = StringSwitch<unsigned>(RegName) 10674 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 10675 .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) 10676 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 10677 (is64Bit ? PPC::X13 : PPC::R13)) 10678 .Default(0); 10679 10680 if (Reg) 10681 return Reg; 10682 report_fatal_error("Invalid register name global variable"); 10683 } 10684 10685 bool 10686 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 10687 // The PowerPC target isn't yet aware of offsets. 10688 return false; 10689 } 10690 10691 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 10692 const CallInst &I, 10693 unsigned Intrinsic) const { 10694 10695 switch (Intrinsic) { 10696 case Intrinsic::ppc_qpx_qvlfd: 10697 case Intrinsic::ppc_qpx_qvlfs: 10698 case Intrinsic::ppc_qpx_qvlfcd: 10699 case Intrinsic::ppc_qpx_qvlfcs: 10700 case Intrinsic::ppc_qpx_qvlfiwa: 10701 case Intrinsic::ppc_qpx_qvlfiwz: 10702 case Intrinsic::ppc_altivec_lvx: 10703 case Intrinsic::ppc_altivec_lvxl: 10704 case Intrinsic::ppc_altivec_lvebx: 10705 case Intrinsic::ppc_altivec_lvehx: 10706 case Intrinsic::ppc_altivec_lvewx: 10707 case Intrinsic::ppc_vsx_lxvd2x: 10708 case Intrinsic::ppc_vsx_lxvw4x: { 10709 EVT VT; 10710 switch (Intrinsic) { 10711 case Intrinsic::ppc_altivec_lvebx: 10712 VT = MVT::i8; 10713 break; 10714 case Intrinsic::ppc_altivec_lvehx: 10715 VT = MVT::i16; 10716 break; 10717 case Intrinsic::ppc_altivec_lvewx: 10718 VT = MVT::i32; 10719 break; 10720 case Intrinsic::ppc_vsx_lxvd2x: 10721 VT = MVT::v2f64; 10722 break; 10723 case Intrinsic::ppc_qpx_qvlfd: 10724 VT = MVT::v4f64; 10725 break; 10726 case Intrinsic::ppc_qpx_qvlfs: 10727 VT = MVT::v4f32; 10728 break; 10729 case Intrinsic::ppc_qpx_qvlfcd: 10730 VT = MVT::v2f64; 10731 break; 10732 case Intrinsic::ppc_qpx_qvlfcs: 10733 VT = MVT::v2f32; 10734 break; 10735 default: 10736 VT = MVT::v4i32; 10737 break; 10738 } 10739 10740 Info.opc = ISD::INTRINSIC_W_CHAIN; 10741 Info.memVT = VT; 10742 Info.ptrVal = I.getArgOperand(0); 10743 Info.offset = -VT.getStoreSize()+1; 10744 Info.size = 2*VT.getStoreSize()-1; 10745 Info.align = 1; 10746 Info.vol = false; 10747 Info.readMem = true; 10748 Info.writeMem = false; 10749 return true; 10750 } 10751 case Intrinsic::ppc_qpx_qvlfda: 10752 case Intrinsic::ppc_qpx_qvlfsa: 10753 case Intrinsic::ppc_qpx_qvlfcda: 10754 case Intrinsic::ppc_qpx_qvlfcsa: 10755 case Intrinsic::ppc_qpx_qvlfiwaa: 10756 case Intrinsic::ppc_qpx_qvlfiwza: { 10757 EVT VT; 10758 switch (Intrinsic) { 10759 case Intrinsic::ppc_qpx_qvlfda: 10760 VT = MVT::v4f64; 10761 break; 10762 case Intrinsic::ppc_qpx_qvlfsa: 10763 VT = MVT::v4f32; 10764 break; 10765 case Intrinsic::ppc_qpx_qvlfcda: 10766 VT = MVT::v2f64; 10767 break; 10768 case Intrinsic::ppc_qpx_qvlfcsa: 10769 VT = MVT::v2f32; 10770 break; 10771 default: 10772 VT = MVT::v4i32; 10773 break; 10774 } 10775 10776 Info.opc = ISD::INTRINSIC_W_CHAIN; 10777 Info.memVT = VT; 10778 Info.ptrVal = I.getArgOperand(0); 10779 Info.offset = 0; 10780 Info.size = VT.getStoreSize(); 10781 Info.align = 1; 10782 Info.vol = false; 10783 Info.readMem = true; 10784 Info.writeMem = false; 10785 return true; 10786 } 10787 case Intrinsic::ppc_qpx_qvstfd: 10788 case Intrinsic::ppc_qpx_qvstfs: 10789 case Intrinsic::ppc_qpx_qvstfcd: 10790 case Intrinsic::ppc_qpx_qvstfcs: 10791 case Intrinsic::ppc_qpx_qvstfiw: 10792 case Intrinsic::ppc_altivec_stvx: 10793 case Intrinsic::ppc_altivec_stvxl: 10794 case Intrinsic::ppc_altivec_stvebx: 10795 case Intrinsic::ppc_altivec_stvehx: 10796 case Intrinsic::ppc_altivec_stvewx: 10797 case Intrinsic::ppc_vsx_stxvd2x: 10798 case Intrinsic::ppc_vsx_stxvw4x: { 10799 EVT VT; 10800 switch (Intrinsic) { 10801 case Intrinsic::ppc_altivec_stvebx: 10802 VT = MVT::i8; 10803 break; 10804 case Intrinsic::ppc_altivec_stvehx: 10805 VT = MVT::i16; 10806 break; 10807 case Intrinsic::ppc_altivec_stvewx: 10808 VT = MVT::i32; 10809 break; 10810 case Intrinsic::ppc_vsx_stxvd2x: 10811 VT = MVT::v2f64; 10812 break; 10813 case Intrinsic::ppc_qpx_qvstfd: 10814 VT = MVT::v4f64; 10815 break; 10816 case Intrinsic::ppc_qpx_qvstfs: 10817 VT = MVT::v4f32; 10818 break; 10819 case Intrinsic::ppc_qpx_qvstfcd: 10820 VT = MVT::v2f64; 10821 break; 10822 case Intrinsic::ppc_qpx_qvstfcs: 10823 VT = MVT::v2f32; 10824 break; 10825 default: 10826 VT = MVT::v4i32; 10827 break; 10828 } 10829 10830 Info.opc = ISD::INTRINSIC_VOID; 10831 Info.memVT = VT; 10832 Info.ptrVal = I.getArgOperand(1); 10833 Info.offset = -VT.getStoreSize()+1; 10834 Info.size = 2*VT.getStoreSize()-1; 10835 Info.align = 1; 10836 Info.vol = false; 10837 Info.readMem = false; 10838 Info.writeMem = true; 10839 return true; 10840 } 10841 case Intrinsic::ppc_qpx_qvstfda: 10842 case Intrinsic::ppc_qpx_qvstfsa: 10843 case Intrinsic::ppc_qpx_qvstfcda: 10844 case Intrinsic::ppc_qpx_qvstfcsa: 10845 case Intrinsic::ppc_qpx_qvstfiwa: { 10846 EVT VT; 10847 switch (Intrinsic) { 10848 case Intrinsic::ppc_qpx_qvstfda: 10849 VT = MVT::v4f64; 10850 break; 10851 case Intrinsic::ppc_qpx_qvstfsa: 10852 VT = MVT::v4f32; 10853 break; 10854 case Intrinsic::ppc_qpx_qvstfcda: 10855 VT = MVT::v2f64; 10856 break; 10857 case Intrinsic::ppc_qpx_qvstfcsa: 10858 VT = MVT::v2f32; 10859 break; 10860 default: 10861 VT = MVT::v4i32; 10862 break; 10863 } 10864 10865 Info.opc = ISD::INTRINSIC_VOID; 10866 Info.memVT = VT; 10867 Info.ptrVal = I.getArgOperand(1); 10868 Info.offset = 0; 10869 Info.size = VT.getStoreSize(); 10870 Info.align = 1; 10871 Info.vol = false; 10872 Info.readMem = false; 10873 Info.writeMem = true; 10874 return true; 10875 } 10876 default: 10877 break; 10878 } 10879 10880 return false; 10881 } 10882 10883 /// getOptimalMemOpType - Returns the target specific optimal type for load 10884 /// and store operations as a result of memset, memcpy, and memmove 10885 /// lowering. If DstAlign is zero that means it's safe to destination 10886 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 10887 /// means there isn't a need to check it against alignment requirement, 10888 /// probably because the source does not need to be loaded. If 'IsMemset' is 10889 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 10890 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 10891 /// source is constant so it does not need to be loaded. 10892 /// It returns EVT::Other if the type should be determined using generic 10893 /// target-independent logic. 10894 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 10895 unsigned DstAlign, unsigned SrcAlign, 10896 bool IsMemset, bool ZeroMemset, 10897 bool MemcpyStrSrc, 10898 MachineFunction &MF) const { 10899 const Function *F = MF.getFunction(); 10900 // When expanding a memset, require at least two QPX instructions to cover 10901 // the cost of loading the value to be stored from the constant pool. 10902 if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && 10903 (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && 10904 !F->hasFnAttribute(Attribute::NoImplicitFloat)) { 10905 return MVT::v4f64; 10906 } 10907 10908 // We should use Altivec/VSX loads and stores when available. For unaligned 10909 // addresses, unaligned VSX loads are only fast starting with the P8. 10910 if (Subtarget.hasAltivec() && Size >= 16 && 10911 (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || 10912 ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) 10913 return MVT::v4i32; 10914 10915 if (Subtarget.isPPC64()) { 10916 return MVT::i64; 10917 } 10918 10919 return MVT::i32; 10920 } 10921 10922 /// \brief Returns true if it is beneficial to convert a load of a constant 10923 /// to just the constant itself. 10924 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 10925 Type *Ty) const { 10926 assert(Ty->isIntegerTy()); 10927 10928 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 10929 if (BitSize == 0 || BitSize > 64) 10930 return false; 10931 return true; 10932 } 10933 10934 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 10935 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 10936 return false; 10937 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 10938 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 10939 return NumBits1 == 64 && NumBits2 == 32; 10940 } 10941 10942 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 10943 if (!VT1.isInteger() || !VT2.isInteger()) 10944 return false; 10945 unsigned NumBits1 = VT1.getSizeInBits(); 10946 unsigned NumBits2 = VT2.getSizeInBits(); 10947 return NumBits1 == 64 && NumBits2 == 32; 10948 } 10949 10950 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 10951 // Generally speaking, zexts are not free, but they are free when they can be 10952 // folded with other operations. 10953 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) { 10954 EVT MemVT = LD->getMemoryVT(); 10955 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || 10956 (Subtarget.isPPC64() && MemVT == MVT::i32)) && 10957 (LD->getExtensionType() == ISD::NON_EXTLOAD || 10958 LD->getExtensionType() == ISD::ZEXTLOAD)) 10959 return true; 10960 } 10961 10962 // FIXME: Add other cases... 10963 // - 32-bit shifts with a zext to i64 10964 // - zext after ctlz, bswap, etc. 10965 // - zext after and by a constant mask 10966 10967 return TargetLowering::isZExtFree(Val, VT2); 10968 } 10969 10970 bool PPCTargetLowering::isFPExtFree(EVT VT) const { 10971 assert(VT.isFloatingPoint()); 10972 return true; 10973 } 10974 10975 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 10976 return isInt<16>(Imm) || isUInt<16>(Imm); 10977 } 10978 10979 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 10980 return isInt<16>(Imm) || isUInt<16>(Imm); 10981 } 10982 10983 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 10984 unsigned, 10985 unsigned, 10986 bool *Fast) const { 10987 if (DisablePPCUnaligned) 10988 return false; 10989 10990 // PowerPC supports unaligned memory access for simple non-vector types. 10991 // Although accessing unaligned addresses is not as efficient as accessing 10992 // aligned addresses, it is generally more efficient than manual expansion, 10993 // and generally only traps for software emulation when crossing page 10994 // boundaries. 10995 10996 if (!VT.isSimple()) 10997 return false; 10998 10999 if (VT.getSimpleVT().isVector()) { 11000 if (Subtarget.hasVSX()) { 11001 if (VT != MVT::v2f64 && VT != MVT::v2i64 && 11002 VT != MVT::v4f32 && VT != MVT::v4i32) 11003 return false; 11004 } else { 11005 return false; 11006 } 11007 } 11008 11009 if (VT == MVT::ppcf128) 11010 return false; 11011 11012 if (Fast) 11013 *Fast = true; 11014 11015 return true; 11016 } 11017 11018 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 11019 VT = VT.getScalarType(); 11020 11021 if (!VT.isSimple()) 11022 return false; 11023 11024 switch (VT.getSimpleVT().SimpleTy) { 11025 case MVT::f32: 11026 case MVT::f64: 11027 return true; 11028 default: 11029 break; 11030 } 11031 11032 return false; 11033 } 11034 11035 const MCPhysReg * 11036 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { 11037 // LR is a callee-save register, but we must treat it as clobbered by any call 11038 // site. Hence we include LR in the scratch registers, which are in turn added 11039 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies 11040 // to CTR, which is used by any indirect call. 11041 static const MCPhysReg ScratchRegs[] = { 11042 PPC::X12, PPC::LR8, PPC::CTR8, 0 11043 }; 11044 11045 return ScratchRegs; 11046 } 11047 11048 bool 11049 PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 11050 EVT VT , unsigned DefinedValues) const { 11051 if (VT == MVT::v2i64) 11052 return false; 11053 11054 if (Subtarget.hasQPX()) { 11055 if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1) 11056 return true; 11057 } 11058 11059 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 11060 } 11061 11062 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 11063 if (DisableILPPref || Subtarget.enableMachineScheduler()) 11064 return TargetLowering::getSchedulingPreference(N); 11065 11066 return Sched::ILP; 11067 } 11068 11069 // Create a fast isel object. 11070 FastISel * 11071 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 11072 const TargetLibraryInfo *LibInfo) const { 11073 return PPC::createFastISel(FuncInfo, LibInfo); 11074 } 11075