1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PPCISelLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCISelLowering.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPCCallingConv.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCPerfectShuffle.h" 19 #include "PPCTargetMachine.h" 20 #include "PPCTargetObjectFile.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/ADT/StringSwitch.h" 24 #include "llvm/ADT/Triple.h" 25 #include "llvm/CodeGen/CallingConvLower.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineLoopInfo.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/CodeGen/SelectionDAG.h" 32 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 33 #include "llvm/IR/CallingConv.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/Function.h" 37 #include "llvm/IR/Intrinsics.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/ErrorHandling.h" 40 #include "llvm/Support/Format.h" 41 #include "llvm/Support/MathExtras.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include "llvm/Target/TargetOptions.h" 44 45 using namespace llvm; 46 47 #define DEBUG_TYPE "ppc-lowering" 48 49 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 50 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 51 52 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 53 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 54 55 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 56 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 57 58 static cl::opt<bool> DisableSCO("disable-ppc-sco", cl::init(true), 59 cl::desc("disable sibling call optimization on ppc"), cl::Hidden); 60 61 STATISTIC(NumTailCalls, "Number of tail calls"); 62 STATISTIC(NumSiblingCalls, "Number of sibling calls"); 63 64 // FIXME: Remove this once the bug has been fixed! 65 extern cl::opt<bool> ANDIGlueBug; 66 67 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, 68 const PPCSubtarget &STI) 69 : TargetLowering(TM), Subtarget(STI) { 70 // Use _setjmp/_longjmp instead of setjmp/longjmp. 71 setUseUnderscoreSetJmp(true); 72 setUseUnderscoreLongJmp(true); 73 74 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 75 // arguments are at least 4/8 bytes aligned. 76 bool isPPC64 = Subtarget.isPPC64(); 77 setMinStackArgumentAlignment(isPPC64 ? 8:4); 78 79 // Set up the register classes. 80 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 81 if (!Subtarget.useSoftFloat()) { 82 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 83 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 84 } 85 86 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 87 for (MVT VT : MVT::integer_valuetypes()) { 88 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 89 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 90 } 91 92 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 93 94 // PowerPC has pre-inc load and store's. 95 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 96 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 97 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 98 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 99 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 100 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); 101 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); 102 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 103 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 104 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 105 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 106 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 107 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); 108 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); 109 110 if (Subtarget.useCRBits()) { 111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 112 113 if (isPPC64 || Subtarget.hasFPCVT()) { 114 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 115 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 116 isPPC64 ? MVT::i64 : MVT::i32); 117 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 118 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, 119 isPPC64 ? MVT::i64 : MVT::i32); 120 } else { 121 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 122 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 123 } 124 125 // PowerPC does not support direct load / store of condition registers 126 setOperationAction(ISD::LOAD, MVT::i1, Custom); 127 setOperationAction(ISD::STORE, MVT::i1, Custom); 128 129 // FIXME: Remove this once the ANDI glue bug is fixed: 130 if (ANDIGlueBug) 131 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 132 133 for (MVT VT : MVT::integer_valuetypes()) { 134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 135 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 136 setTruncStoreAction(VT, MVT::i1, Expand); 137 } 138 139 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 140 } 141 142 // This is used in the ppcf128->int sequence. Note it has different semantics 143 // from FP_ROUND: that rounds to nearest, this rounds to zero. 144 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 145 146 // We do not currently implement these libm ops for PowerPC. 147 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 148 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 149 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 150 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 151 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 152 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 153 154 // PowerPC has no SREM/UREM instructions 155 setOperationAction(ISD::SREM, MVT::i32, Expand); 156 setOperationAction(ISD::UREM, MVT::i32, Expand); 157 setOperationAction(ISD::SREM, MVT::i64, Expand); 158 setOperationAction(ISD::UREM, MVT::i64, Expand); 159 160 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 161 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 162 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 163 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 164 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 165 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 166 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 167 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 168 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 169 170 // We don't support sin/cos/sqrt/fmod/pow 171 setOperationAction(ISD::FSIN , MVT::f64, Expand); 172 setOperationAction(ISD::FCOS , MVT::f64, Expand); 173 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 174 setOperationAction(ISD::FREM , MVT::f64, Expand); 175 setOperationAction(ISD::FPOW , MVT::f64, Expand); 176 setOperationAction(ISD::FMA , MVT::f64, Legal); 177 setOperationAction(ISD::FSIN , MVT::f32, Expand); 178 setOperationAction(ISD::FCOS , MVT::f32, Expand); 179 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 180 setOperationAction(ISD::FREM , MVT::f32, Expand); 181 setOperationAction(ISD::FPOW , MVT::f32, Expand); 182 setOperationAction(ISD::FMA , MVT::f32, Legal); 183 184 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 185 186 // If we're enabling GP optimizations, use hardware square root 187 if (!Subtarget.hasFSQRT() && 188 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && 189 Subtarget.hasFRE())) 190 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 191 192 if (!Subtarget.hasFSQRT() && 193 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && 194 Subtarget.hasFRES())) 195 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 196 197 if (Subtarget.hasFCPSGN()) { 198 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 199 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 200 } else { 201 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 202 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 203 } 204 205 if (Subtarget.hasFPRND()) { 206 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 207 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 208 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 209 setOperationAction(ISD::FROUND, MVT::f64, Legal); 210 211 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 212 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 213 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 214 setOperationAction(ISD::FROUND, MVT::f32, Legal); 215 } 216 217 // PowerPC does not have BSWAP, CTPOP or CTTZ 218 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 219 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 220 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 221 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 222 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 223 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 224 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 225 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 226 227 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { 228 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 229 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 230 } else { 231 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 232 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 233 } 234 235 // PowerPC does not have ROTR 236 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 237 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 238 239 if (!Subtarget.useCRBits()) { 240 // PowerPC does not have Select 241 setOperationAction(ISD::SELECT, MVT::i32, Expand); 242 setOperationAction(ISD::SELECT, MVT::i64, Expand); 243 setOperationAction(ISD::SELECT, MVT::f32, Expand); 244 setOperationAction(ISD::SELECT, MVT::f64, Expand); 245 } 246 247 // PowerPC wants to turn select_cc of FP into fsel when possible. 248 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 249 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 250 251 // PowerPC wants to optimize integer setcc a bit 252 if (!Subtarget.useCRBits()) 253 setOperationAction(ISD::SETCC, MVT::i32, Custom); 254 255 // PowerPC does not have BRCOND which requires SetCC 256 if (!Subtarget.useCRBits()) 257 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 258 259 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 260 261 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 262 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 263 264 // PowerPC does not have [U|S]INT_TO_FP 265 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 266 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 267 268 if (Subtarget.hasDirectMove() && isPPC64) { 269 setOperationAction(ISD::BITCAST, MVT::f32, Legal); 270 setOperationAction(ISD::BITCAST, MVT::i32, Legal); 271 setOperationAction(ISD::BITCAST, MVT::i64, Legal); 272 setOperationAction(ISD::BITCAST, MVT::f64, Legal); 273 } else { 274 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 275 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 276 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 277 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 278 } 279 280 // We cannot sextinreg(i1). Expand to shifts. 281 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 282 283 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 284 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 285 // support continuation, user-level threading, and etc.. As a result, no 286 // other SjLj exception interfaces are implemented and please don't build 287 // your own exception handling based on them. 288 // LLVM/Clang supports zero-cost DWARF exception handling. 289 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 290 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 291 292 // We want to legalize GlobalAddress and ConstantPool nodes into the 293 // appropriate instructions to materialize the address. 294 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 295 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 296 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 297 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 298 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 299 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 300 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 301 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 302 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 303 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 304 305 // TRAP is legal. 306 setOperationAction(ISD::TRAP, MVT::Other, Legal); 307 308 // TRAMPOLINE is custom lowered. 309 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 310 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 311 312 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 313 setOperationAction(ISD::VASTART , MVT::Other, Custom); 314 315 if (Subtarget.isSVR4ABI()) { 316 if (isPPC64) { 317 // VAARG always uses double-word chunks, so promote anything smaller. 318 setOperationAction(ISD::VAARG, MVT::i1, Promote); 319 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 320 setOperationAction(ISD::VAARG, MVT::i8, Promote); 321 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 322 setOperationAction(ISD::VAARG, MVT::i16, Promote); 323 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 324 setOperationAction(ISD::VAARG, MVT::i32, Promote); 325 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 326 setOperationAction(ISD::VAARG, MVT::Other, Expand); 327 } else { 328 // VAARG is custom lowered with the 32-bit SVR4 ABI. 329 setOperationAction(ISD::VAARG, MVT::Other, Custom); 330 setOperationAction(ISD::VAARG, MVT::i64, Custom); 331 } 332 } else 333 setOperationAction(ISD::VAARG, MVT::Other, Expand); 334 335 if (Subtarget.isSVR4ABI() && !isPPC64) 336 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 337 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 338 else 339 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 340 341 // Use the default implementation. 342 setOperationAction(ISD::VAEND , MVT::Other, Expand); 343 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 344 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 345 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 346 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 347 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); 348 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); 349 350 // We want to custom lower some of our intrinsics. 351 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 352 353 // To handle counter-based loop conditions. 354 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 355 356 // Comparisons that require checking two conditions. 357 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 358 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 359 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 360 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 361 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 362 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 363 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 364 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 365 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 366 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 367 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 368 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 369 370 if (Subtarget.has64BitSupport()) { 371 // They also have instructions for converting between i64 and fp. 372 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 373 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 374 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 375 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 376 // This is just the low 32 bits of a (signed) fp->i64 conversion. 377 // We cannot do this with Promote because i64 is not a legal type. 378 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 379 380 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 381 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 382 } else { 383 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 384 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 385 } 386 387 // With the instructions enabled under FPCVT, we can do everything. 388 if (Subtarget.hasFPCVT()) { 389 if (Subtarget.has64BitSupport()) { 390 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 391 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 392 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 393 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 394 } 395 396 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 397 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 398 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 399 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 400 } 401 402 if (Subtarget.use64BitRegs()) { 403 // 64-bit PowerPC implementations can support i64 types directly 404 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 405 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 406 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 407 // 64-bit PowerPC wants to expand i128 shifts itself. 408 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 409 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 410 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 411 } else { 412 // 32-bit PowerPC wants to expand i64 shifts itself. 413 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 414 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 415 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 416 } 417 418 if (Subtarget.hasAltivec()) { 419 // First set operation action for all vector types to expand. Then we 420 // will selectively turn on ones that can be effectively codegen'd. 421 for (MVT VT : MVT::vector_valuetypes()) { 422 // add/sub are legal for all supported vector VT's. 423 setOperationAction(ISD::ADD, VT, Legal); 424 setOperationAction(ISD::SUB, VT, Legal); 425 426 // Vector instructions introduced in P8 427 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { 428 setOperationAction(ISD::CTPOP, VT, Legal); 429 setOperationAction(ISD::CTLZ, VT, Legal); 430 } 431 else { 432 setOperationAction(ISD::CTPOP, VT, Expand); 433 setOperationAction(ISD::CTLZ, VT, Expand); 434 } 435 436 // We promote all shuffles to v16i8. 437 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 438 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 439 440 // We promote all non-typed operations to v4i32. 441 setOperationAction(ISD::AND , VT, Promote); 442 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 443 setOperationAction(ISD::OR , VT, Promote); 444 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 445 setOperationAction(ISD::XOR , VT, Promote); 446 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 447 setOperationAction(ISD::LOAD , VT, Promote); 448 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 449 setOperationAction(ISD::SELECT, VT, Promote); 450 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 451 setOperationAction(ISD::SELECT_CC, VT, Promote); 452 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); 453 setOperationAction(ISD::STORE, VT, Promote); 454 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 455 456 // No other operations are legal. 457 setOperationAction(ISD::MUL , VT, Expand); 458 setOperationAction(ISD::SDIV, VT, Expand); 459 setOperationAction(ISD::SREM, VT, Expand); 460 setOperationAction(ISD::UDIV, VT, Expand); 461 setOperationAction(ISD::UREM, VT, Expand); 462 setOperationAction(ISD::FDIV, VT, Expand); 463 setOperationAction(ISD::FREM, VT, Expand); 464 setOperationAction(ISD::FNEG, VT, Expand); 465 setOperationAction(ISD::FSQRT, VT, Expand); 466 setOperationAction(ISD::FLOG, VT, Expand); 467 setOperationAction(ISD::FLOG10, VT, Expand); 468 setOperationAction(ISD::FLOG2, VT, Expand); 469 setOperationAction(ISD::FEXP, VT, Expand); 470 setOperationAction(ISD::FEXP2, VT, Expand); 471 setOperationAction(ISD::FSIN, VT, Expand); 472 setOperationAction(ISD::FCOS, VT, Expand); 473 setOperationAction(ISD::FABS, VT, Expand); 474 setOperationAction(ISD::FPOWI, VT, Expand); 475 setOperationAction(ISD::FFLOOR, VT, Expand); 476 setOperationAction(ISD::FCEIL, VT, Expand); 477 setOperationAction(ISD::FTRUNC, VT, Expand); 478 setOperationAction(ISD::FRINT, VT, Expand); 479 setOperationAction(ISD::FNEARBYINT, VT, Expand); 480 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 481 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 482 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 483 setOperationAction(ISD::MULHU, VT, Expand); 484 setOperationAction(ISD::MULHS, VT, Expand); 485 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 486 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 487 setOperationAction(ISD::UDIVREM, VT, Expand); 488 setOperationAction(ISD::SDIVREM, VT, Expand); 489 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 490 setOperationAction(ISD::FPOW, VT, Expand); 491 setOperationAction(ISD::BSWAP, VT, Expand); 492 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 493 setOperationAction(ISD::CTTZ, VT, Expand); 494 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 495 setOperationAction(ISD::VSELECT, VT, Expand); 496 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 497 setOperationAction(ISD::ROTL, VT, Expand); 498 setOperationAction(ISD::ROTR, VT, Expand); 499 500 for (MVT InnerVT : MVT::vector_valuetypes()) { 501 setTruncStoreAction(VT, InnerVT, Expand); 502 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 503 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 504 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 505 } 506 } 507 508 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 509 // with merges, splats, etc. 510 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 511 512 setOperationAction(ISD::AND , MVT::v4i32, Legal); 513 setOperationAction(ISD::OR , MVT::v4i32, Legal); 514 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 515 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 516 setOperationAction(ISD::SELECT, MVT::v4i32, 517 Subtarget.useCRBits() ? Legal : Expand); 518 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 519 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 520 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 521 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 522 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 523 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 524 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 525 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 526 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 527 528 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 529 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 530 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 531 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 532 533 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 534 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 535 536 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 537 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 538 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 539 } 540 541 if (Subtarget.hasP8Altivec()) 542 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 543 else 544 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 545 546 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 547 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 548 549 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 550 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 551 552 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 553 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 554 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 555 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 556 557 // Altivec does not contain unordered floating-point compare instructions 558 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 559 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 560 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 561 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 562 563 if (Subtarget.hasVSX()) { 564 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 565 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 566 if (Subtarget.hasP8Vector()) { 567 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 568 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); 569 } 570 if (Subtarget.hasDirectMove() && isPPC64) { 571 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); 572 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); 573 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); 574 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); 575 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); 576 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); 577 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); 578 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 579 } 580 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 581 582 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 583 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 584 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 585 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 586 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 587 588 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 589 590 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 591 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 592 593 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 594 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 595 596 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 597 setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); 598 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); 599 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 600 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); 601 602 // Share the Altivec comparison restrictions. 603 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 604 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 605 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 606 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 607 608 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 609 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 610 611 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 612 613 if (Subtarget.hasP8Vector()) 614 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); 615 616 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 617 618 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); 619 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 620 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 621 622 if (Subtarget.hasP8Altivec()) { 623 setOperationAction(ISD::SHL, MVT::v2i64, Legal); 624 setOperationAction(ISD::SRA, MVT::v2i64, Legal); 625 setOperationAction(ISD::SRL, MVT::v2i64, Legal); 626 627 setOperationAction(ISD::SETCC, MVT::v2i64, Legal); 628 } 629 else { 630 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 631 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 632 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 633 634 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 635 636 // VSX v2i64 only supports non-arithmetic operations. 637 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 638 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 639 } 640 641 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 642 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 643 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 644 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 645 646 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 647 648 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 649 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 650 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 651 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 652 653 // Vector operation legalization checks the result type of 654 // SIGN_EXTEND_INREG, overall legalization checks the inner type. 655 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); 656 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); 657 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); 658 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); 659 660 setOperationAction(ISD::FNEG, MVT::v4f32, Legal); 661 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 662 setOperationAction(ISD::FABS, MVT::v4f32, Legal); 663 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 664 665 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 666 } 667 668 if (Subtarget.hasP8Altivec()) { 669 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); 670 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); 671 } 672 } 673 674 if (Subtarget.hasQPX()) { 675 setOperationAction(ISD::FADD, MVT::v4f64, Legal); 676 setOperationAction(ISD::FSUB, MVT::v4f64, Legal); 677 setOperationAction(ISD::FMUL, MVT::v4f64, Legal); 678 setOperationAction(ISD::FREM, MVT::v4f64, Expand); 679 680 setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); 681 setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); 682 683 setOperationAction(ISD::LOAD , MVT::v4f64, Custom); 684 setOperationAction(ISD::STORE , MVT::v4f64, Custom); 685 686 setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); 687 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); 688 689 if (!Subtarget.useCRBits()) 690 setOperationAction(ISD::SELECT, MVT::v4f64, Expand); 691 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); 692 693 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); 694 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); 695 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); 696 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); 697 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); 698 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); 699 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); 700 701 setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); 702 setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); 703 704 setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); 705 setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); 706 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); 707 708 setOperationAction(ISD::FNEG , MVT::v4f64, Legal); 709 setOperationAction(ISD::FABS , MVT::v4f64, Legal); 710 setOperationAction(ISD::FSIN , MVT::v4f64, Expand); 711 setOperationAction(ISD::FCOS , MVT::v4f64, Expand); 712 setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); 713 setOperationAction(ISD::FPOW , MVT::v4f64, Expand); 714 setOperationAction(ISD::FLOG , MVT::v4f64, Expand); 715 setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); 716 setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); 717 setOperationAction(ISD::FEXP , MVT::v4f64, Expand); 718 setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); 719 720 setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); 721 setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); 722 723 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); 724 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); 725 726 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); 727 728 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 729 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 730 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 731 setOperationAction(ISD::FREM, MVT::v4f32, Expand); 732 733 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); 734 setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); 735 736 setOperationAction(ISD::LOAD , MVT::v4f32, Custom); 737 setOperationAction(ISD::STORE , MVT::v4f32, Custom); 738 739 if (!Subtarget.useCRBits()) 740 setOperationAction(ISD::SELECT, MVT::v4f32, Expand); 741 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 742 743 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); 744 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); 745 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); 746 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); 747 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); 748 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 749 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 750 751 setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); 752 setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); 753 754 setOperationAction(ISD::FNEG , MVT::v4f32, Legal); 755 setOperationAction(ISD::FABS , MVT::v4f32, Legal); 756 setOperationAction(ISD::FSIN , MVT::v4f32, Expand); 757 setOperationAction(ISD::FCOS , MVT::v4f32, Expand); 758 setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); 759 setOperationAction(ISD::FPOW , MVT::v4f32, Expand); 760 setOperationAction(ISD::FLOG , MVT::v4f32, Expand); 761 setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); 762 setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); 763 setOperationAction(ISD::FEXP , MVT::v4f32, Expand); 764 setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); 765 766 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 767 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 768 769 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); 770 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); 771 772 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); 773 774 setOperationAction(ISD::AND , MVT::v4i1, Legal); 775 setOperationAction(ISD::OR , MVT::v4i1, Legal); 776 setOperationAction(ISD::XOR , MVT::v4i1, Legal); 777 778 if (!Subtarget.useCRBits()) 779 setOperationAction(ISD::SELECT, MVT::v4i1, Expand); 780 setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); 781 782 setOperationAction(ISD::LOAD , MVT::v4i1, Custom); 783 setOperationAction(ISD::STORE , MVT::v4i1, Custom); 784 785 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); 786 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); 787 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); 788 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); 789 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); 790 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); 791 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); 792 793 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); 794 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); 795 796 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); 797 798 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); 799 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); 800 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); 801 setOperationAction(ISD::FROUND, MVT::v4f64, Legal); 802 803 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 804 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 805 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 806 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 807 808 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); 809 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 810 811 // These need to set FE_INEXACT, and so cannot be vectorized here. 812 setOperationAction(ISD::FRINT, MVT::v4f64, Expand); 813 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 814 815 if (TM.Options.UnsafeFPMath) { 816 setOperationAction(ISD::FDIV, MVT::v4f64, Legal); 817 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); 818 819 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 820 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 821 } else { 822 setOperationAction(ISD::FDIV, MVT::v4f64, Expand); 823 setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); 824 825 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 826 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 827 } 828 } 829 830 if (Subtarget.has64BitSupport()) 831 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 832 833 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); 834 835 if (!isPPC64) { 836 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 837 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 838 } 839 840 setBooleanContents(ZeroOrOneBooleanContent); 841 842 if (Subtarget.hasAltivec()) { 843 // Altivec instructions set fields to all zeros or all ones. 844 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 845 } 846 847 if (!isPPC64) { 848 // These libcalls are not available in 32-bit. 849 setLibcallName(RTLIB::SHL_I128, nullptr); 850 setLibcallName(RTLIB::SRL_I128, nullptr); 851 setLibcallName(RTLIB::SRA_I128, nullptr); 852 } 853 854 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); 855 856 // We have target-specific dag combine patterns for the following nodes: 857 setTargetDAGCombine(ISD::SINT_TO_FP); 858 if (Subtarget.hasFPCVT()) 859 setTargetDAGCombine(ISD::UINT_TO_FP); 860 setTargetDAGCombine(ISD::LOAD); 861 setTargetDAGCombine(ISD::STORE); 862 setTargetDAGCombine(ISD::BR_CC); 863 if (Subtarget.useCRBits()) 864 setTargetDAGCombine(ISD::BRCOND); 865 setTargetDAGCombine(ISD::BSWAP); 866 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 867 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 868 setTargetDAGCombine(ISD::INTRINSIC_VOID); 869 870 setTargetDAGCombine(ISD::SIGN_EXTEND); 871 setTargetDAGCombine(ISD::ZERO_EXTEND); 872 setTargetDAGCombine(ISD::ANY_EXTEND); 873 874 if (Subtarget.useCRBits()) { 875 setTargetDAGCombine(ISD::TRUNCATE); 876 setTargetDAGCombine(ISD::SETCC); 877 setTargetDAGCombine(ISD::SELECT_CC); 878 } 879 880 // Use reciprocal estimates. 881 if (TM.Options.UnsafeFPMath) { 882 setTargetDAGCombine(ISD::FDIV); 883 setTargetDAGCombine(ISD::FSQRT); 884 } 885 886 // Darwin long double math library functions have $LDBL128 appended. 887 if (Subtarget.isDarwin()) { 888 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 889 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 890 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 891 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 892 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 893 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 894 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 895 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 896 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 897 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 898 } 899 900 // With 32 condition bits, we don't need to sink (and duplicate) compares 901 // aggressively in CodeGenPrep. 902 if (Subtarget.useCRBits()) { 903 setHasMultipleConditionRegisters(); 904 setJumpIsExpensive(); 905 } 906 907 setMinFunctionAlignment(2); 908 if (Subtarget.isDarwin()) 909 setPrefFunctionAlignment(4); 910 911 switch (Subtarget.getDarwinDirective()) { 912 default: break; 913 case PPC::DIR_970: 914 case PPC::DIR_A2: 915 case PPC::DIR_E500mc: 916 case PPC::DIR_E5500: 917 case PPC::DIR_PWR4: 918 case PPC::DIR_PWR5: 919 case PPC::DIR_PWR5X: 920 case PPC::DIR_PWR6: 921 case PPC::DIR_PWR6X: 922 case PPC::DIR_PWR7: 923 case PPC::DIR_PWR8: 924 setPrefFunctionAlignment(4); 925 setPrefLoopAlignment(4); 926 break; 927 } 928 929 930 if (Subtarget.enableMachineScheduler()) 931 setSchedulingPreference(Sched::Source); 932 else 933 setSchedulingPreference(Sched::Hybrid); 934 935 computeRegisterProperties(STI.getRegisterInfo()); 936 937 // The Freescale cores do better with aggressive inlining of memcpy and 938 // friends. GCC uses same threshold of 128 bytes (= 32 word stores). 939 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 940 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 941 MaxStoresPerMemset = 32; 942 MaxStoresPerMemsetOptSize = 16; 943 MaxStoresPerMemcpy = 32; 944 MaxStoresPerMemcpyOptSize = 8; 945 MaxStoresPerMemmove = 32; 946 MaxStoresPerMemmoveOptSize = 8; 947 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { 948 // The A2 also benefits from (very) aggressive inlining of memcpy and 949 // friends. The overhead of a the function call, even when warm, can be 950 // over one hundred cycles. 951 MaxStoresPerMemset = 128; 952 MaxStoresPerMemcpy = 128; 953 MaxStoresPerMemmove = 128; 954 } 955 } 956 957 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 958 /// the desired ByVal argument alignment. 959 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 960 unsigned MaxMaxAlign) { 961 if (MaxAlign == MaxMaxAlign) 962 return; 963 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 964 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 965 MaxAlign = 32; 966 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 967 MaxAlign = 16; 968 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 969 unsigned EltAlign = 0; 970 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 971 if (EltAlign > MaxAlign) 972 MaxAlign = EltAlign; 973 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 974 for (auto *EltTy : STy->elements()) { 975 unsigned EltAlign = 0; 976 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); 977 if (EltAlign > MaxAlign) 978 MaxAlign = EltAlign; 979 if (MaxAlign == MaxMaxAlign) 980 break; 981 } 982 } 983 } 984 985 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 986 /// function arguments in the caller parameter area. 987 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, 988 const DataLayout &DL) const { 989 // Darwin passes everything on 4 byte boundary. 990 if (Subtarget.isDarwin()) 991 return 4; 992 993 // 16byte and wider vectors are passed on 16byte boundary. 994 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 995 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 996 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 997 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 998 return Align; 999 } 1000 1001 bool PPCTargetLowering::useSoftFloat() const { 1002 return Subtarget.useSoftFloat(); 1003 } 1004 1005 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 1006 switch ((PPCISD::NodeType)Opcode) { 1007 case PPCISD::FIRST_NUMBER: break; 1008 case PPCISD::FSEL: return "PPCISD::FSEL"; 1009 case PPCISD::FCFID: return "PPCISD::FCFID"; 1010 case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; 1011 case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; 1012 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; 1013 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 1014 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 1015 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; 1016 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; 1017 case PPCISD::FRE: return "PPCISD::FRE"; 1018 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 1019 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 1020 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 1021 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 1022 case PPCISD::VPERM: return "PPCISD::VPERM"; 1023 case PPCISD::CMPB: return "PPCISD::CMPB"; 1024 case PPCISD::Hi: return "PPCISD::Hi"; 1025 case PPCISD::Lo: return "PPCISD::Lo"; 1026 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 1027 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 1028 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; 1029 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 1030 case PPCISD::SRL: return "PPCISD::SRL"; 1031 case PPCISD::SRA: return "PPCISD::SRA"; 1032 case PPCISD::SHL: return "PPCISD::SHL"; 1033 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; 1034 case PPCISD::CALL: return "PPCISD::CALL"; 1035 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 1036 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 1037 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 1038 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; 1039 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 1040 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; 1041 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 1042 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 1043 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 1044 case PPCISD::MFVSR: return "PPCISD::MFVSR"; 1045 case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; 1046 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; 1047 case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; 1048 case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; 1049 case PPCISD::VCMP: return "PPCISD::VCMP"; 1050 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 1051 case PPCISD::LBRX: return "PPCISD::LBRX"; 1052 case PPCISD::STBRX: return "PPCISD::STBRX"; 1053 case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; 1054 case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; 1055 case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; 1056 case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; 1057 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 1058 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 1059 case PPCISD::BDZ: return "PPCISD::BDZ"; 1060 case PPCISD::MFFS: return "PPCISD::MFFS"; 1061 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 1062 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 1063 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 1064 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 1065 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 1066 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; 1067 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 1068 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 1069 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 1070 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 1071 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 1072 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 1073 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; 1074 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 1075 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 1076 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 1077 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; 1078 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 1079 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 1080 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 1081 case PPCISD::SC: return "PPCISD::SC"; 1082 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; 1083 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; 1084 case PPCISD::RFEBB: return "PPCISD::RFEBB"; 1085 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; 1086 case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; 1087 case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; 1088 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; 1089 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; 1090 case PPCISD::QBFLT: return "PPCISD::QBFLT"; 1091 case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; 1092 } 1093 return nullptr; 1094 } 1095 1096 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, 1097 EVT VT) const { 1098 if (!VT.isVector()) 1099 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 1100 1101 if (Subtarget.hasQPX()) 1102 return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); 1103 1104 return VT.changeVectorElementTypeToInteger(); 1105 } 1106 1107 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { 1108 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); 1109 return true; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // Node matching predicates, for use by the tblgen matching code. 1114 //===----------------------------------------------------------------------===// 1115 1116 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 1117 static bool isFloatingPointZero(SDValue Op) { 1118 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 1119 return CFP->getValueAPF().isZero(); 1120 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 1121 // Maybe this has already been legalized into the constant pool? 1122 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 1123 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 1124 return CFP->getValueAPF().isZero(); 1125 } 1126 return false; 1127 } 1128 1129 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 1130 /// true if Op is undef or if it matches the specified value. 1131 static bool isConstantOrUndef(int Op, int Val) { 1132 return Op < 0 || Op == Val; 1133 } 1134 1135 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 1136 /// VPKUHUM instruction. 1137 /// The ShuffleKind distinguishes between big-endian operations with 1138 /// two different inputs (0), either-endian operations with two identical 1139 /// inputs (1), and little-endian operations with two different inputs (2). 1140 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1141 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1142 SelectionDAG &DAG) { 1143 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1144 if (ShuffleKind == 0) { 1145 if (IsLE) 1146 return false; 1147 for (unsigned i = 0; i != 16; ++i) 1148 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 1149 return false; 1150 } else if (ShuffleKind == 2) { 1151 if (!IsLE) 1152 return false; 1153 for (unsigned i = 0; i != 16; ++i) 1154 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 1155 return false; 1156 } else if (ShuffleKind == 1) { 1157 unsigned j = IsLE ? 0 : 1; 1158 for (unsigned i = 0; i != 8; ++i) 1159 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 1160 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 1161 return false; 1162 } 1163 return true; 1164 } 1165 1166 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 1167 /// VPKUWUM instruction. 1168 /// The ShuffleKind distinguishes between big-endian operations with 1169 /// two different inputs (0), either-endian operations with two identical 1170 /// inputs (1), and little-endian operations with two different inputs (2). 1171 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1172 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1173 SelectionDAG &DAG) { 1174 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1175 if (ShuffleKind == 0) { 1176 if (IsLE) 1177 return false; 1178 for (unsigned i = 0; i != 16; i += 2) 1179 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 1180 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 1181 return false; 1182 } else if (ShuffleKind == 2) { 1183 if (!IsLE) 1184 return false; 1185 for (unsigned i = 0; i != 16; i += 2) 1186 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1187 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 1188 return false; 1189 } else if (ShuffleKind == 1) { 1190 unsigned j = IsLE ? 0 : 2; 1191 for (unsigned i = 0; i != 8; i += 2) 1192 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1193 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1194 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1195 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 1196 return false; 1197 } 1198 return true; 1199 } 1200 1201 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a 1202 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the 1203 /// current subtarget. 1204 /// 1205 /// The ShuffleKind distinguishes between big-endian operations with 1206 /// two different inputs (0), either-endian operations with two identical 1207 /// inputs (1), and little-endian operations with two different inputs (2). 1208 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1209 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1210 SelectionDAG &DAG) { 1211 const PPCSubtarget& Subtarget = 1212 static_cast<const PPCSubtarget&>(DAG.getSubtarget()); 1213 if (!Subtarget.hasP8Vector()) 1214 return false; 1215 1216 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1217 if (ShuffleKind == 0) { 1218 if (IsLE) 1219 return false; 1220 for (unsigned i = 0; i != 16; i += 4) 1221 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || 1222 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || 1223 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || 1224 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) 1225 return false; 1226 } else if (ShuffleKind == 2) { 1227 if (!IsLE) 1228 return false; 1229 for (unsigned i = 0; i != 16; i += 4) 1230 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1231 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || 1232 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || 1233 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) 1234 return false; 1235 } else if (ShuffleKind == 1) { 1236 unsigned j = IsLE ? 0 : 4; 1237 for (unsigned i = 0; i != 8; i += 4) 1238 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1239 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1240 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || 1241 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || 1242 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1243 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || 1244 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || 1245 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) 1246 return false; 1247 } 1248 return true; 1249 } 1250 1251 /// isVMerge - Common function, used to match vmrg* shuffles. 1252 /// 1253 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 1254 unsigned LHSStart, unsigned RHSStart) { 1255 if (N->getValueType(0) != MVT::v16i8) 1256 return false; 1257 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 1258 "Unsupported merge size!"); 1259 1260 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 1261 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 1262 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 1263 LHSStart+j+i*UnitSize) || 1264 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 1265 RHSStart+j+i*UnitSize)) 1266 return false; 1267 } 1268 return true; 1269 } 1270 1271 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 1272 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 1273 /// The ShuffleKind distinguishes between big-endian merges with two 1274 /// different inputs (0), either-endian merges with two identical inputs (1), 1275 /// and little-endian merges with two different inputs (2). For the latter, 1276 /// the input operands are swapped (see PPCInstrAltivec.td). 1277 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1278 unsigned ShuffleKind, SelectionDAG &DAG) { 1279 if (DAG.getDataLayout().isLittleEndian()) { 1280 if (ShuffleKind == 1) // unary 1281 return isVMerge(N, UnitSize, 0, 0); 1282 else if (ShuffleKind == 2) // swapped 1283 return isVMerge(N, UnitSize, 0, 16); 1284 else 1285 return false; 1286 } else { 1287 if (ShuffleKind == 1) // unary 1288 return isVMerge(N, UnitSize, 8, 8); 1289 else if (ShuffleKind == 0) // normal 1290 return isVMerge(N, UnitSize, 8, 24); 1291 else 1292 return false; 1293 } 1294 } 1295 1296 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 1297 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 1298 /// The ShuffleKind distinguishes between big-endian merges with two 1299 /// different inputs (0), either-endian merges with two identical inputs (1), 1300 /// and little-endian merges with two different inputs (2). For the latter, 1301 /// the input operands are swapped (see PPCInstrAltivec.td). 1302 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1303 unsigned ShuffleKind, SelectionDAG &DAG) { 1304 if (DAG.getDataLayout().isLittleEndian()) { 1305 if (ShuffleKind == 1) // unary 1306 return isVMerge(N, UnitSize, 8, 8); 1307 else if (ShuffleKind == 2) // swapped 1308 return isVMerge(N, UnitSize, 8, 24); 1309 else 1310 return false; 1311 } else { 1312 if (ShuffleKind == 1) // unary 1313 return isVMerge(N, UnitSize, 0, 0); 1314 else if (ShuffleKind == 0) // normal 1315 return isVMerge(N, UnitSize, 0, 16); 1316 else 1317 return false; 1318 } 1319 } 1320 1321 /** 1322 * \brief Common function used to match vmrgew and vmrgow shuffles 1323 * 1324 * The indexOffset determines whether to look for even or odd words in 1325 * the shuffle mask. This is based on the of the endianness of the target 1326 * machine. 1327 * - Little Endian: 1328 * - Use offset of 0 to check for odd elements 1329 * - Use offset of 4 to check for even elements 1330 * - Big Endian: 1331 * - Use offset of 0 to check for even elements 1332 * - Use offset of 4 to check for odd elements 1333 * A detailed description of the vector element ordering for little endian and 1334 * big endian can be found at 1335 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html 1336 * Targeting your applications - what little endian and big endian IBM XL C/C++ 1337 * compiler differences mean to you 1338 * 1339 * The mask to the shuffle vector instruction specifies the indices of the 1340 * elements from the two input vectors to place in the result. The elements are 1341 * numbered in array-access order, starting with the first vector. These vectors 1342 * are always of type v16i8, thus each vector will contain 16 elements of size 1343 * 8. More info on the shuffle vector can be found in the 1344 * http://llvm.org/docs/LangRef.html#shufflevector-instruction 1345 * Language Reference. 1346 * 1347 * The RHSStartValue indicates whether the same input vectors are used (unary) 1348 * or two different input vectors are used, based on the following: 1349 * - If the instruction uses the same vector for both inputs, the range of the 1350 * indices will be 0 to 15. In this case, the RHSStart value passed should 1351 * be 0. 1352 * - If the instruction has two different vectors then the range of the 1353 * indices will be 0 to 31. In this case, the RHSStart value passed should 1354 * be 16 (indices 0-15 specify elements in the first vector while indices 16 1355 * to 31 specify elements in the second vector). 1356 * 1357 * \param[in] N The shuffle vector SD Node to analyze 1358 * \param[in] IndexOffset Specifies whether to look for even or odd elements 1359 * \param[in] RHSStartValue Specifies the starting index for the righthand input 1360 * vector to the shuffle_vector instruction 1361 * \return true iff this shuffle vector represents an even or odd word merge 1362 */ 1363 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, 1364 unsigned RHSStartValue) { 1365 if (N->getValueType(0) != MVT::v16i8) 1366 return false; 1367 1368 for (unsigned i = 0; i < 2; ++i) 1369 for (unsigned j = 0; j < 4; ++j) 1370 if (!isConstantOrUndef(N->getMaskElt(i*4+j), 1371 i*RHSStartValue+j+IndexOffset) || 1372 !isConstantOrUndef(N->getMaskElt(i*4+j+8), 1373 i*RHSStartValue+j+IndexOffset+8)) 1374 return false; 1375 return true; 1376 } 1377 1378 /** 1379 * \brief Determine if the specified shuffle mask is suitable for the vmrgew or 1380 * vmrgow instructions. 1381 * 1382 * \param[in] N The shuffle vector SD Node to analyze 1383 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) 1384 * \param[in] ShuffleKind Identify the type of merge: 1385 * - 0 = big-endian merge with two different inputs; 1386 * - 1 = either-endian merge with two identical inputs; 1387 * - 2 = little-endian merge with two different inputs (inputs are swapped for 1388 * little-endian merges). 1389 * \param[in] DAG The current SelectionDAG 1390 * \return true iff this shuffle mask 1391 */ 1392 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, 1393 unsigned ShuffleKind, SelectionDAG &DAG) { 1394 if (DAG.getDataLayout().isLittleEndian()) { 1395 unsigned indexOffset = CheckEven ? 4 : 0; 1396 if (ShuffleKind == 1) // Unary 1397 return isVMerge(N, indexOffset, 0); 1398 else if (ShuffleKind == 2) // swapped 1399 return isVMerge(N, indexOffset, 16); 1400 else 1401 return false; 1402 } 1403 else { 1404 unsigned indexOffset = CheckEven ? 0 : 4; 1405 if (ShuffleKind == 1) // Unary 1406 return isVMerge(N, indexOffset, 0); 1407 else if (ShuffleKind == 0) // Normal 1408 return isVMerge(N, indexOffset, 16); 1409 else 1410 return false; 1411 } 1412 return false; 1413 } 1414 1415 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 1416 /// amount, otherwise return -1. 1417 /// The ShuffleKind distinguishes between big-endian operations with two 1418 /// different inputs (0), either-endian operations with two identical inputs 1419 /// (1), and little-endian operations with two different inputs (2). For the 1420 /// latter, the input operands are swapped (see PPCInstrAltivec.td). 1421 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 1422 SelectionDAG &DAG) { 1423 if (N->getValueType(0) != MVT::v16i8) 1424 return -1; 1425 1426 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1427 1428 // Find the first non-undef value in the shuffle mask. 1429 unsigned i; 1430 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1431 /*search*/; 1432 1433 if (i == 16) return -1; // all undef. 1434 1435 // Otherwise, check to see if the rest of the elements are consecutively 1436 // numbered from this value. 1437 unsigned ShiftAmt = SVOp->getMaskElt(i); 1438 if (ShiftAmt < i) return -1; 1439 1440 ShiftAmt -= i; 1441 bool isLE = DAG.getDataLayout().isLittleEndian(); 1442 1443 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1444 // Check the rest of the elements to see if they are consecutive. 1445 for (++i; i != 16; ++i) 1446 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1447 return -1; 1448 } else if (ShuffleKind == 1) { 1449 // Check the rest of the elements to see if they are consecutive. 1450 for (++i; i != 16; ++i) 1451 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1452 return -1; 1453 } else 1454 return -1; 1455 1456 if (isLE) 1457 ShiftAmt = 16 - ShiftAmt; 1458 1459 return ShiftAmt; 1460 } 1461 1462 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1463 /// specifies a splat of a single element that is suitable for input to 1464 /// VSPLTB/VSPLTH/VSPLTW. 1465 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1466 assert(N->getValueType(0) == MVT::v16i8 && 1467 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1468 1469 // The consecutive indices need to specify an element, not part of two 1470 // different elements. So abandon ship early if this isn't the case. 1471 if (N->getMaskElt(0) % EltSize != 0) 1472 return false; 1473 1474 // This is a splat operation if each element of the permute is the same, and 1475 // if the value doesn't reference the second vector. 1476 unsigned ElementBase = N->getMaskElt(0); 1477 1478 // FIXME: Handle UNDEF elements too! 1479 if (ElementBase >= 16) 1480 return false; 1481 1482 // Check that the indices are consecutive, in the case of a multi-byte element 1483 // splatted with a v16i8 mask. 1484 for (unsigned i = 1; i != EltSize; ++i) 1485 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1486 return false; 1487 1488 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1489 if (N->getMaskElt(i) < 0) continue; 1490 for (unsigned j = 0; j != EltSize; ++j) 1491 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1492 return false; 1493 } 1494 return true; 1495 } 1496 1497 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 1498 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 1499 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 1500 SelectionDAG &DAG) { 1501 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1502 assert(isSplatShuffleMask(SVOp, EltSize)); 1503 if (DAG.getDataLayout().isLittleEndian()) 1504 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 1505 else 1506 return SVOp->getMaskElt(0) / EltSize; 1507 } 1508 1509 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 1510 /// by using a vspltis[bhw] instruction of the specified element size, return 1511 /// the constant being splatted. The ByteSize field indicates the number of 1512 /// bytes of each element [124] -> [bhw]. 1513 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 1514 SDValue OpVal(nullptr, 0); 1515 1516 // If ByteSize of the splat is bigger than the element size of the 1517 // build_vector, then we have a case where we are checking for a splat where 1518 // multiple elements of the buildvector are folded together into a single 1519 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 1520 unsigned EltSize = 16/N->getNumOperands(); 1521 if (EltSize < ByteSize) { 1522 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 1523 SDValue UniquedVals[4]; 1524 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 1525 1526 // See if all of the elements in the buildvector agree across. 1527 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1528 if (N->getOperand(i).isUndef()) continue; 1529 // If the element isn't a constant, bail fully out. 1530 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 1531 1532 1533 if (!UniquedVals[i&(Multiple-1)].getNode()) 1534 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 1535 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 1536 return SDValue(); // no match. 1537 } 1538 1539 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 1540 // either constant or undef values that are identical for each chunk. See 1541 // if these chunks can form into a larger vspltis*. 1542 1543 // Check to see if all of the leading entries are either 0 or -1. If 1544 // neither, then this won't fit into the immediate field. 1545 bool LeadingZero = true; 1546 bool LeadingOnes = true; 1547 for (unsigned i = 0; i != Multiple-1; ++i) { 1548 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 1549 1550 LeadingZero &= isNullConstant(UniquedVals[i]); 1551 LeadingOnes &= isAllOnesConstant(UniquedVals[i]); 1552 } 1553 // Finally, check the least significant entry. 1554 if (LeadingZero) { 1555 if (!UniquedVals[Multiple-1].getNode()) 1556 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef 1557 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 1558 if (Val < 16) // 0,0,0,4 -> vspltisw(4) 1559 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 1560 } 1561 if (LeadingOnes) { 1562 if (!UniquedVals[Multiple-1].getNode()) 1563 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef 1564 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 1565 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 1566 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 1567 } 1568 1569 return SDValue(); 1570 } 1571 1572 // Check to see if this buildvec has a single non-undef value in its elements. 1573 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1574 if (N->getOperand(i).isUndef()) continue; 1575 if (!OpVal.getNode()) 1576 OpVal = N->getOperand(i); 1577 else if (OpVal != N->getOperand(i)) 1578 return SDValue(); 1579 } 1580 1581 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 1582 1583 unsigned ValSizeInBytes = EltSize; 1584 uint64_t Value = 0; 1585 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1586 Value = CN->getZExtValue(); 1587 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1588 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 1589 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 1590 } 1591 1592 // If the splat value is larger than the element value, then we can never do 1593 // this splat. The only case that we could fit the replicated bits into our 1594 // immediate field for would be zero, and we prefer to use vxor for it. 1595 if (ValSizeInBytes < ByteSize) return SDValue(); 1596 1597 // If the element value is larger than the splat value, check if it consists 1598 // of a repeated bit pattern of size ByteSize. 1599 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) 1600 return SDValue(); 1601 1602 // Properly sign extend the value. 1603 int MaskVal = SignExtend32(Value, ByteSize * 8); 1604 1605 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 1606 if (MaskVal == 0) return SDValue(); 1607 1608 // Finally, if this value fits in a 5 bit sext field, return it 1609 if (SignExtend32<5>(MaskVal) == MaskVal) 1610 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); 1611 return SDValue(); 1612 } 1613 1614 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift 1615 /// amount, otherwise return -1. 1616 int PPC::isQVALIGNIShuffleMask(SDNode *N) { 1617 EVT VT = N->getValueType(0); 1618 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) 1619 return -1; 1620 1621 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1622 1623 // Find the first non-undef value in the shuffle mask. 1624 unsigned i; 1625 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) 1626 /*search*/; 1627 1628 if (i == 4) return -1; // all undef. 1629 1630 // Otherwise, check to see if the rest of the elements are consecutively 1631 // numbered from this value. 1632 unsigned ShiftAmt = SVOp->getMaskElt(i); 1633 if (ShiftAmt < i) return -1; 1634 ShiftAmt -= i; 1635 1636 // Check the rest of the elements to see if they are consecutive. 1637 for (++i; i != 4; ++i) 1638 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1639 return -1; 1640 1641 return ShiftAmt; 1642 } 1643 1644 //===----------------------------------------------------------------------===// 1645 // Addressing Mode Selection 1646 //===----------------------------------------------------------------------===// 1647 1648 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 1649 /// or 64-bit immediate, and if the value can be accurately represented as a 1650 /// sign extension from a 16-bit value. If so, this returns true and the 1651 /// immediate. 1652 static bool isIntS16Immediate(SDNode *N, short &Imm) { 1653 if (!isa<ConstantSDNode>(N)) 1654 return false; 1655 1656 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1657 if (N->getValueType(0) == MVT::i32) 1658 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1659 else 1660 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1661 } 1662 static bool isIntS16Immediate(SDValue Op, short &Imm) { 1663 return isIntS16Immediate(Op.getNode(), Imm); 1664 } 1665 1666 /// SelectAddressRegReg - Given the specified addressed, check to see if it 1667 /// can be represented as an indexed [r+r] operation. Returns false if it 1668 /// can be more efficiently represented with [r+imm]. 1669 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1670 SDValue &Index, 1671 SelectionDAG &DAG) const { 1672 short imm = 0; 1673 if (N.getOpcode() == ISD::ADD) { 1674 if (isIntS16Immediate(N.getOperand(1), imm)) 1675 return false; // r+i 1676 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1677 return false; // r+i 1678 1679 Base = N.getOperand(0); 1680 Index = N.getOperand(1); 1681 return true; 1682 } else if (N.getOpcode() == ISD::OR) { 1683 if (isIntS16Immediate(N.getOperand(1), imm)) 1684 return false; // r+i can fold it if we can. 1685 1686 // If this is an or of disjoint bitfields, we can codegen this as an add 1687 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1688 // disjoint. 1689 APInt LHSKnownZero, LHSKnownOne; 1690 APInt RHSKnownZero, RHSKnownOne; 1691 DAG.computeKnownBits(N.getOperand(0), 1692 LHSKnownZero, LHSKnownOne); 1693 1694 if (LHSKnownZero.getBoolValue()) { 1695 DAG.computeKnownBits(N.getOperand(1), 1696 RHSKnownZero, RHSKnownOne); 1697 // If all of the bits are known zero on the LHS or RHS, the add won't 1698 // carry. 1699 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1700 Base = N.getOperand(0); 1701 Index = N.getOperand(1); 1702 return true; 1703 } 1704 } 1705 } 1706 1707 return false; 1708 } 1709 1710 // If we happen to be doing an i64 load or store into a stack slot that has 1711 // less than a 4-byte alignment, then the frame-index elimination may need to 1712 // use an indexed load or store instruction (because the offset may not be a 1713 // multiple of 4). The extra register needed to hold the offset comes from the 1714 // register scavenger, and it is possible that the scavenger will need to use 1715 // an emergency spill slot. As a result, we need to make sure that a spill slot 1716 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1717 // stack slot. 1718 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1719 // FIXME: This does not handle the LWA case. 1720 if (VT != MVT::i64) 1721 return; 1722 1723 // NOTE: We'll exclude negative FIs here, which come from argument 1724 // lowering, because there are no known test cases triggering this problem 1725 // using packed structures (or similar). We can remove this exclusion if 1726 // we find such a test case. The reason why this is so test-case driven is 1727 // because this entire 'fixup' is only to prevent crashes (from the 1728 // register scavenger) on not-really-valid inputs. For example, if we have: 1729 // %a = alloca i1 1730 // %b = bitcast i1* %a to i64* 1731 // store i64* a, i64 b 1732 // then the store should really be marked as 'align 1', but is not. If it 1733 // were marked as 'align 1' then the indexed form would have been 1734 // instruction-selected initially, and the problem this 'fixup' is preventing 1735 // won't happen regardless. 1736 if (FrameIdx < 0) 1737 return; 1738 1739 MachineFunction &MF = DAG.getMachineFunction(); 1740 MachineFrameInfo *MFI = MF.getFrameInfo(); 1741 1742 unsigned Align = MFI->getObjectAlignment(FrameIdx); 1743 if (Align >= 4) 1744 return; 1745 1746 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1747 FuncInfo->setHasNonRISpills(); 1748 } 1749 1750 /// Returns true if the address N can be represented by a base register plus 1751 /// a signed 16-bit displacement [r+imm], and if it is not better 1752 /// represented as reg+reg. If Aligned is true, only accept displacements 1753 /// suitable for STD and friends, i.e. multiples of 4. 1754 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1755 SDValue &Base, 1756 SelectionDAG &DAG, 1757 bool Aligned) const { 1758 // FIXME dl should come from parent load or store, not from address 1759 SDLoc dl(N); 1760 // If this can be more profitably realized as r+r, fail. 1761 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1762 return false; 1763 1764 if (N.getOpcode() == ISD::ADD) { 1765 short imm = 0; 1766 if (isIntS16Immediate(N.getOperand(1), imm) && 1767 (!Aligned || (imm & 3) == 0)) { 1768 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 1769 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1770 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1771 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1772 } else { 1773 Base = N.getOperand(0); 1774 } 1775 return true; // [r+i] 1776 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1777 // Match LOAD (ADD (X, Lo(G))). 1778 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1779 && "Cannot handle constant offsets yet!"); 1780 Disp = N.getOperand(1).getOperand(0); // The global address. 1781 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1782 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1783 Disp.getOpcode() == ISD::TargetConstantPool || 1784 Disp.getOpcode() == ISD::TargetJumpTable); 1785 Base = N.getOperand(0); 1786 return true; // [&g+r] 1787 } 1788 } else if (N.getOpcode() == ISD::OR) { 1789 short imm = 0; 1790 if (isIntS16Immediate(N.getOperand(1), imm) && 1791 (!Aligned || (imm & 3) == 0)) { 1792 // If this is an or of disjoint bitfields, we can codegen this as an add 1793 // (for better address arithmetic) if the LHS and RHS of the OR are 1794 // provably disjoint. 1795 APInt LHSKnownZero, LHSKnownOne; 1796 DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1797 1798 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1799 // If all of the bits are known zero on the LHS or RHS, the add won't 1800 // carry. 1801 if (FrameIndexSDNode *FI = 1802 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1803 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1804 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1805 } else { 1806 Base = N.getOperand(0); 1807 } 1808 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 1809 return true; 1810 } 1811 } 1812 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1813 // Loading from a constant address. 1814 1815 // If this address fits entirely in a 16-bit sext immediate field, codegen 1816 // this as "d, 0" 1817 short Imm; 1818 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1819 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); 1820 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1821 CN->getValueType(0)); 1822 return true; 1823 } 1824 1825 // Handle 32-bit sext immediates with LIS + addr mode. 1826 if ((CN->getValueType(0) == MVT::i32 || 1827 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1828 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1829 int Addr = (int)CN->getZExtValue(); 1830 1831 // Otherwise, break this down into an LIS + disp. 1832 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); 1833 1834 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, 1835 MVT::i32); 1836 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1837 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1838 return true; 1839 } 1840 } 1841 1842 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); 1843 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1844 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1845 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1846 } else 1847 Base = N; 1848 return true; // [r+0] 1849 } 1850 1851 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1852 /// represented as an indexed [r+r] operation. 1853 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1854 SDValue &Index, 1855 SelectionDAG &DAG) const { 1856 // Check to see if we can easily represent this as an [r+r] address. This 1857 // will fail if it thinks that the address is more profitably represented as 1858 // reg+imm, e.g. where imm = 0. 1859 if (SelectAddressRegReg(N, Base, Index, DAG)) 1860 return true; 1861 1862 // If the operand is an addition, always emit this as [r+r], since this is 1863 // better (for code size, and execution, as the memop does the add for free) 1864 // than emitting an explicit add. 1865 if (N.getOpcode() == ISD::ADD) { 1866 Base = N.getOperand(0); 1867 Index = N.getOperand(1); 1868 return true; 1869 } 1870 1871 // Otherwise, do it the hard way, using R0 as the base register. 1872 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1873 N.getValueType()); 1874 Index = N; 1875 return true; 1876 } 1877 1878 /// getPreIndexedAddressParts - returns true by value, base pointer and 1879 /// offset pointer and addressing mode by reference if the node's address 1880 /// can be legally represented as pre-indexed load / store address. 1881 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1882 SDValue &Offset, 1883 ISD::MemIndexedMode &AM, 1884 SelectionDAG &DAG) const { 1885 if (DisablePPCPreinc) return false; 1886 1887 bool isLoad = true; 1888 SDValue Ptr; 1889 EVT VT; 1890 unsigned Alignment; 1891 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1892 Ptr = LD->getBasePtr(); 1893 VT = LD->getMemoryVT(); 1894 Alignment = LD->getAlignment(); 1895 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1896 Ptr = ST->getBasePtr(); 1897 VT = ST->getMemoryVT(); 1898 Alignment = ST->getAlignment(); 1899 isLoad = false; 1900 } else 1901 return false; 1902 1903 // PowerPC doesn't have preinc load/store instructions for vectors (except 1904 // for QPX, which does have preinc r+r forms). 1905 if (VT.isVector()) { 1906 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { 1907 return false; 1908 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { 1909 AM = ISD::PRE_INC; 1910 return true; 1911 } 1912 } 1913 1914 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 1915 1916 // Common code will reject creating a pre-inc form if the base pointer 1917 // is a frame index, or if N is a store and the base pointer is either 1918 // the same as or a predecessor of the value being stored. Check for 1919 // those situations here, and try with swapped Base/Offset instead. 1920 bool Swap = false; 1921 1922 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 1923 Swap = true; 1924 else if (!isLoad) { 1925 SDValue Val = cast<StoreSDNode>(N)->getValue(); 1926 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 1927 Swap = true; 1928 } 1929 1930 if (Swap) 1931 std::swap(Base, Offset); 1932 1933 AM = ISD::PRE_INC; 1934 return true; 1935 } 1936 1937 // LDU/STU can only handle immediates that are a multiple of 4. 1938 if (VT != MVT::i64) { 1939 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 1940 return false; 1941 } else { 1942 // LDU/STU need an address with at least 4-byte alignment. 1943 if (Alignment < 4) 1944 return false; 1945 1946 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 1947 return false; 1948 } 1949 1950 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1951 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1952 // sext i32 to i64 when addr mode is r+i. 1953 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1954 LD->getExtensionType() == ISD::SEXTLOAD && 1955 isa<ConstantSDNode>(Offset)) 1956 return false; 1957 } 1958 1959 AM = ISD::PRE_INC; 1960 return true; 1961 } 1962 1963 //===----------------------------------------------------------------------===// 1964 // LowerOperation implementation 1965 //===----------------------------------------------------------------------===// 1966 1967 /// GetLabelAccessInfo - Return true if we should reference labels using a 1968 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1969 static bool GetLabelAccessInfo(const TargetMachine &TM, 1970 const PPCSubtarget &Subtarget, 1971 unsigned &HiOpFlags, unsigned &LoOpFlags, 1972 const GlobalValue *GV = nullptr) { 1973 HiOpFlags = PPCII::MO_HA; 1974 LoOpFlags = PPCII::MO_LO; 1975 1976 // Don't use the pic base if not in PIC relocation model. 1977 bool isPIC = TM.getRelocationModel() == Reloc::PIC_; 1978 1979 if (isPIC) { 1980 HiOpFlags |= PPCII::MO_PIC_FLAG; 1981 LoOpFlags |= PPCII::MO_PIC_FLAG; 1982 } 1983 1984 // If this is a reference to a global value that requires a non-lazy-ptr, make 1985 // sure that instruction lowering adds it. 1986 if (GV && Subtarget.hasLazyResolverStub(GV)) { 1987 HiOpFlags |= PPCII::MO_NLP_FLAG; 1988 LoOpFlags |= PPCII::MO_NLP_FLAG; 1989 1990 if (GV->hasHiddenVisibility()) { 1991 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1992 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1993 } 1994 } 1995 1996 return isPIC; 1997 } 1998 1999 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 2000 SelectionDAG &DAG) { 2001 SDLoc DL(HiPart); 2002 EVT PtrVT = HiPart.getValueType(); 2003 SDValue Zero = DAG.getConstant(0, DL, PtrVT); 2004 2005 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 2006 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 2007 2008 // With PIC, the first instruction is actually "GR+hi(&G)". 2009 if (isPIC) 2010 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 2011 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 2012 2013 // Generate non-pic code that has direct accesses to the constant pool. 2014 // The address of the global is just (hi(&g)+lo(&g)). 2015 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 2016 } 2017 2018 static void setUsesTOCBasePtr(MachineFunction &MF) { 2019 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2020 FuncInfo->setUsesTOCBasePtr(); 2021 } 2022 2023 static void setUsesTOCBasePtr(SelectionDAG &DAG) { 2024 setUsesTOCBasePtr(DAG.getMachineFunction()); 2025 } 2026 2027 static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit, 2028 SDValue GA) { 2029 EVT VT = Is64Bit ? MVT::i64 : MVT::i32; 2030 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : 2031 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); 2032 2033 SDValue Ops[] = { GA, Reg }; 2034 return DAG.getMemIntrinsicNode( 2035 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, 2036 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true, 2037 false, 0); 2038 } 2039 2040 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 2041 SelectionDAG &DAG) const { 2042 EVT PtrVT = Op.getValueType(); 2043 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2044 const Constant *C = CP->getConstVal(); 2045 2046 // 64-bit SVR4 ABI code is always position-independent. 2047 // The actual address of the GlobalValue is stored in the TOC. 2048 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2049 setUsesTOCBasePtr(DAG); 2050 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 2051 return getTOCEntry(DAG, SDLoc(CP), true, GA); 2052 } 2053 2054 unsigned MOHiFlag, MOLoFlag; 2055 bool isPIC = 2056 GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag); 2057 2058 if (isPIC && Subtarget.isSVR4ABI()) { 2059 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 2060 PPCII::MO_PIC_FLAG); 2061 return getTOCEntry(DAG, SDLoc(CP), false, GA); 2062 } 2063 2064 SDValue CPIHi = 2065 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 2066 SDValue CPILo = 2067 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 2068 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 2069 } 2070 2071 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 2072 EVT PtrVT = Op.getValueType(); 2073 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2074 2075 // 64-bit SVR4 ABI code is always position-independent. 2076 // The actual address of the GlobalValue is stored in the TOC. 2077 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2078 setUsesTOCBasePtr(DAG); 2079 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 2080 return getTOCEntry(DAG, SDLoc(JT), true, GA); 2081 } 2082 2083 unsigned MOHiFlag, MOLoFlag; 2084 bool isPIC = 2085 GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag); 2086 2087 if (isPIC && Subtarget.isSVR4ABI()) { 2088 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 2089 PPCII::MO_PIC_FLAG); 2090 return getTOCEntry(DAG, SDLoc(GA), false, GA); 2091 } 2092 2093 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 2094 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 2095 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 2096 } 2097 2098 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 2099 SelectionDAG &DAG) const { 2100 EVT PtrVT = Op.getValueType(); 2101 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 2102 const BlockAddress *BA = BASDN->getBlockAddress(); 2103 2104 // 64-bit SVR4 ABI code is always position-independent. 2105 // The actual BlockAddress is stored in the TOC. 2106 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2107 setUsesTOCBasePtr(DAG); 2108 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 2109 return getTOCEntry(DAG, SDLoc(BASDN), true, GA); 2110 } 2111 2112 unsigned MOHiFlag, MOLoFlag; 2113 bool isPIC = 2114 GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag); 2115 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 2116 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 2117 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 2118 } 2119 2120 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 2121 SelectionDAG &DAG) const { 2122 2123 // FIXME: TLS addresses currently use medium model code sequences, 2124 // which is the most useful form. Eventually support for small and 2125 // large models could be added if users need it, at the cost of 2126 // additional complexity. 2127 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2128 if (DAG.getTarget().Options.EmulatedTLS) 2129 return LowerToTLSEmulatedModel(GA, DAG); 2130 2131 SDLoc dl(GA); 2132 const GlobalValue *GV = GA->getGlobal(); 2133 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2134 bool is64bit = Subtarget.isPPC64(); 2135 const Module *M = DAG.getMachineFunction().getFunction()->getParent(); 2136 PICLevel::Level picLevel = M->getPICLevel(); 2137 2138 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 2139 2140 if (Model == TLSModel::LocalExec) { 2141 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2142 PPCII::MO_TPREL_HA); 2143 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2144 PPCII::MO_TPREL_LO); 2145 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 2146 is64bit ? MVT::i64 : MVT::i32); 2147 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 2148 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 2149 } 2150 2151 if (Model == TLSModel::InitialExec) { 2152 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2153 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2154 PPCII::MO_TLS); 2155 SDValue GOTPtr; 2156 if (is64bit) { 2157 setUsesTOCBasePtr(DAG); 2158 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2159 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 2160 PtrVT, GOTReg, TGA); 2161 } else 2162 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 2163 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 2164 PtrVT, TGA, GOTPtr); 2165 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 2166 } 2167 2168 if (Model == TLSModel::GeneralDynamic) { 2169 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2170 SDValue GOTPtr; 2171 if (is64bit) { 2172 setUsesTOCBasePtr(DAG); 2173 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2174 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 2175 GOTReg, TGA); 2176 } else { 2177 if (picLevel == PICLevel::Small) 2178 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2179 else 2180 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2181 } 2182 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, 2183 GOTPtr, TGA, TGA); 2184 } 2185 2186 if (Model == TLSModel::LocalDynamic) { 2187 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2188 SDValue GOTPtr; 2189 if (is64bit) { 2190 setUsesTOCBasePtr(DAG); 2191 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2192 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 2193 GOTReg, TGA); 2194 } else { 2195 if (picLevel == PICLevel::Small) 2196 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2197 else 2198 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2199 } 2200 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, 2201 PtrVT, GOTPtr, TGA, TGA); 2202 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, 2203 PtrVT, TLSAddr, TGA); 2204 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 2205 } 2206 2207 llvm_unreachable("Unknown TLS model!"); 2208 } 2209 2210 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 2211 SelectionDAG &DAG) const { 2212 EVT PtrVT = Op.getValueType(); 2213 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 2214 SDLoc DL(GSDN); 2215 const GlobalValue *GV = GSDN->getGlobal(); 2216 2217 // 64-bit SVR4 ABI code is always position-independent. 2218 // The actual address of the GlobalValue is stored in the TOC. 2219 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2220 setUsesTOCBasePtr(DAG); 2221 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 2222 return getTOCEntry(DAG, DL, true, GA); 2223 } 2224 2225 unsigned MOHiFlag, MOLoFlag; 2226 bool isPIC = 2227 GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV); 2228 2229 if (isPIC && Subtarget.isSVR4ABI()) { 2230 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 2231 GSDN->getOffset(), 2232 PPCII::MO_PIC_FLAG); 2233 return getTOCEntry(DAG, DL, false, GA); 2234 } 2235 2236 SDValue GAHi = 2237 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 2238 SDValue GALo = 2239 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 2240 2241 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 2242 2243 // If the global reference is actually to a non-lazy-pointer, we have to do an 2244 // extra load to get the address of the global. 2245 if (MOHiFlag & PPCII::MO_NLP_FLAG) 2246 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 2247 false, false, false, 0); 2248 return Ptr; 2249 } 2250 2251 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 2252 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2253 SDLoc dl(Op); 2254 2255 if (Op.getValueType() == MVT::v2i64) { 2256 // When the operands themselves are v2i64 values, we need to do something 2257 // special because VSX has no underlying comparison operations for these. 2258 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 2259 // Equality can be handled by casting to the legal type for Altivec 2260 // comparisons, everything else needs to be expanded. 2261 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2262 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 2263 DAG.getSetCC(dl, MVT::v4i32, 2264 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 2265 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 2266 CC)); 2267 } 2268 2269 return SDValue(); 2270 } 2271 2272 // We handle most of these in the usual way. 2273 return Op; 2274 } 2275 2276 // If we're comparing for equality to zero, expose the fact that this is 2277 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 2278 // fold the new nodes. 2279 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 2280 if (C->isNullValue() && CC == ISD::SETEQ) { 2281 EVT VT = Op.getOperand(0).getValueType(); 2282 SDValue Zext = Op.getOperand(0); 2283 if (VT.bitsLT(MVT::i32)) { 2284 VT = MVT::i32; 2285 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 2286 } 2287 unsigned Log2b = Log2_32(VT.getSizeInBits()); 2288 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 2289 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 2290 DAG.getConstant(Log2b, dl, MVT::i32)); 2291 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 2292 } 2293 // Leave comparisons against 0 and -1 alone for now, since they're usually 2294 // optimized. FIXME: revisit this when we can custom lower all setcc 2295 // optimizations. 2296 if (C->isAllOnesValue() || C->isNullValue()) 2297 return SDValue(); 2298 } 2299 2300 // If we have an integer seteq/setne, turn it into a compare against zero 2301 // by xor'ing the rhs with the lhs, which is faster than setting a 2302 // condition register, reading it back out, and masking the correct bit. The 2303 // normal approach here uses sub to do this instead of xor. Using xor exposes 2304 // the result to other bit-twiddling opportunities. 2305 EVT LHSVT = Op.getOperand(0).getValueType(); 2306 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 2307 EVT VT = Op.getValueType(); 2308 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 2309 Op.getOperand(1)); 2310 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); 2311 } 2312 return SDValue(); 2313 } 2314 2315 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 2316 const PPCSubtarget &Subtarget) const { 2317 SDNode *Node = Op.getNode(); 2318 EVT VT = Node->getValueType(0); 2319 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 2320 SDValue InChain = Node->getOperand(0); 2321 SDValue VAListPtr = Node->getOperand(1); 2322 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 2323 SDLoc dl(Node); 2324 2325 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 2326 2327 // gpr_index 2328 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2329 VAListPtr, MachinePointerInfo(SV), MVT::i8, 2330 false, false, false, 0); 2331 InChain = GprIndex.getValue(1); 2332 2333 if (VT == MVT::i64) { 2334 // Check if GprIndex is even 2335 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 2336 DAG.getConstant(1, dl, MVT::i32)); 2337 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 2338 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); 2339 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 2340 DAG.getConstant(1, dl, MVT::i32)); 2341 // Align GprIndex to be even if it isn't 2342 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 2343 GprIndex); 2344 } 2345 2346 // fpr index is 1 byte after gpr 2347 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2348 DAG.getConstant(1, dl, MVT::i32)); 2349 2350 // fpr 2351 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2352 FprPtr, MachinePointerInfo(SV), MVT::i8, 2353 false, false, false, 0); 2354 InChain = FprIndex.getValue(1); 2355 2356 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2357 DAG.getConstant(8, dl, MVT::i32)); 2358 2359 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2360 DAG.getConstant(4, dl, MVT::i32)); 2361 2362 // areas 2363 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 2364 MachinePointerInfo(), false, false, 2365 false, 0); 2366 InChain = OverflowArea.getValue(1); 2367 2368 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 2369 MachinePointerInfo(), false, false, 2370 false, 0); 2371 InChain = RegSaveArea.getValue(1); 2372 2373 // select overflow_area if index > 8 2374 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 2375 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); 2376 2377 // adjustment constant gpr_index * 4/8 2378 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 2379 VT.isInteger() ? GprIndex : FprIndex, 2380 DAG.getConstant(VT.isInteger() ? 4 : 8, dl, 2381 MVT::i32)); 2382 2383 // OurReg = RegSaveArea + RegConstant 2384 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 2385 RegConstant); 2386 2387 // Floating types are 32 bytes into RegSaveArea 2388 if (VT.isFloatingPoint()) 2389 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 2390 DAG.getConstant(32, dl, MVT::i32)); 2391 2392 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 2393 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 2394 VT.isInteger() ? GprIndex : FprIndex, 2395 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, 2396 MVT::i32)); 2397 2398 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 2399 VT.isInteger() ? VAListPtr : FprPtr, 2400 MachinePointerInfo(SV), 2401 MVT::i8, false, false, 0); 2402 2403 // determine if we should load from reg_save_area or overflow_area 2404 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 2405 2406 // increase overflow_area by 4/8 if gpr/fpr > 8 2407 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 2408 DAG.getConstant(VT.isInteger() ? 4 : 8, 2409 dl, MVT::i32)); 2410 2411 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 2412 OverflowAreaPlusN); 2413 2414 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 2415 OverflowAreaPtr, 2416 MachinePointerInfo(), 2417 MVT::i32, false, false, 0); 2418 2419 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 2420 false, false, false, 0); 2421 } 2422 2423 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, 2424 const PPCSubtarget &Subtarget) const { 2425 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 2426 2427 // We have to copy the entire va_list struct: 2428 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 2429 return DAG.getMemcpy(Op.getOperand(0), Op, 2430 Op.getOperand(1), Op.getOperand(2), 2431 DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, 2432 false, MachinePointerInfo(), MachinePointerInfo()); 2433 } 2434 2435 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 2436 SelectionDAG &DAG) const { 2437 return Op.getOperand(0); 2438 } 2439 2440 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 2441 SelectionDAG &DAG) const { 2442 SDValue Chain = Op.getOperand(0); 2443 SDValue Trmp = Op.getOperand(1); // trampoline 2444 SDValue FPtr = Op.getOperand(2); // nested function 2445 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 2446 SDLoc dl(Op); 2447 2448 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 2449 bool isPPC64 = (PtrVT == MVT::i64); 2450 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 2451 2452 TargetLowering::ArgListTy Args; 2453 TargetLowering::ArgListEntry Entry; 2454 2455 Entry.Ty = IntPtrTy; 2456 Entry.Node = Trmp; Args.push_back(Entry); 2457 2458 // TrampSize == (isPPC64 ? 48 : 40); 2459 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, 2460 isPPC64 ? MVT::i64 : MVT::i32); 2461 Args.push_back(Entry); 2462 2463 Entry.Node = FPtr; Args.push_back(Entry); 2464 Entry.Node = Nest; Args.push_back(Entry); 2465 2466 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 2467 TargetLowering::CallLoweringInfo CLI(DAG); 2468 CLI.setDebugLoc(dl).setChain(Chain) 2469 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 2470 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 2471 std::move(Args), 0); 2472 2473 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2474 return CallResult.second; 2475 } 2476 2477 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 2478 const PPCSubtarget &Subtarget) const { 2479 MachineFunction &MF = DAG.getMachineFunction(); 2480 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2481 2482 SDLoc dl(Op); 2483 2484 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 2485 // vastart just stores the address of the VarArgsFrameIndex slot into the 2486 // memory location argument. 2487 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); 2488 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2489 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2490 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2491 MachinePointerInfo(SV), 2492 false, false, 0); 2493 } 2494 2495 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 2496 // We suppose the given va_list is already allocated. 2497 // 2498 // typedef struct { 2499 // char gpr; /* index into the array of 8 GPRs 2500 // * stored in the register save area 2501 // * gpr=0 corresponds to r3, 2502 // * gpr=1 to r4, etc. 2503 // */ 2504 // char fpr; /* index into the array of 8 FPRs 2505 // * stored in the register save area 2506 // * fpr=0 corresponds to f1, 2507 // * fpr=1 to f2, etc. 2508 // */ 2509 // char *overflow_arg_area; 2510 // /* location on stack that holds 2511 // * the next overflow argument 2512 // */ 2513 // char *reg_save_area; 2514 // /* where r3:r10 and f1:f8 (if saved) 2515 // * are stored 2516 // */ 2517 // } va_list[1]; 2518 2519 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); 2520 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); 2521 2522 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); 2523 2524 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 2525 PtrVT); 2526 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2527 PtrVT); 2528 2529 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 2530 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); 2531 2532 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 2533 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); 2534 2535 uint64_t FPROffset = 1; 2536 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); 2537 2538 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2539 2540 // Store first byte : number of int regs 2541 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 2542 Op.getOperand(1), 2543 MachinePointerInfo(SV), 2544 MVT::i8, false, false, 0); 2545 uint64_t nextOffset = FPROffset; 2546 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 2547 ConstFPROffset); 2548 2549 // Store second byte : number of float regs 2550 SDValue secondStore = 2551 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 2552 MachinePointerInfo(SV, nextOffset), MVT::i8, 2553 false, false, 0); 2554 nextOffset += StackOffset; 2555 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 2556 2557 // Store second word : arguments given on stack 2558 SDValue thirdStore = 2559 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 2560 MachinePointerInfo(SV, nextOffset), 2561 false, false, 0); 2562 nextOffset += FrameOffset; 2563 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 2564 2565 // Store third word : arguments given in registers 2566 return DAG.getStore(thirdStore, dl, FR, nextPtr, 2567 MachinePointerInfo(SV, nextOffset), 2568 false, false, 0); 2569 2570 } 2571 2572 #include "PPCGenCallingConv.inc" 2573 2574 // Function whose sole purpose is to kill compiler warnings 2575 // stemming from unused functions included from PPCGenCallingConv.inc. 2576 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 2577 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 2578 } 2579 2580 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 2581 CCValAssign::LocInfo &LocInfo, 2582 ISD::ArgFlagsTy &ArgFlags, 2583 CCState &State) { 2584 return true; 2585 } 2586 2587 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 2588 MVT &LocVT, 2589 CCValAssign::LocInfo &LocInfo, 2590 ISD::ArgFlagsTy &ArgFlags, 2591 CCState &State) { 2592 static const MCPhysReg ArgRegs[] = { 2593 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2594 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2595 }; 2596 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2597 2598 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2599 2600 // Skip one register if the first unallocated register has an even register 2601 // number and there are still argument registers available which have not been 2602 // allocated yet. RegNum is actually an index into ArgRegs, which means we 2603 // need to skip a register if RegNum is odd. 2604 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 2605 State.AllocateReg(ArgRegs[RegNum]); 2606 } 2607 2608 // Always return false here, as this function only makes sure that the first 2609 // unallocated register has an odd register number and does not actually 2610 // allocate a register for the current argument. 2611 return false; 2612 } 2613 2614 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 2615 MVT &LocVT, 2616 CCValAssign::LocInfo &LocInfo, 2617 ISD::ArgFlagsTy &ArgFlags, 2618 CCState &State) { 2619 static const MCPhysReg ArgRegs[] = { 2620 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2621 PPC::F8 2622 }; 2623 2624 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2625 2626 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2627 2628 // If there is only one Floating-point register left we need to put both f64 2629 // values of a split ppc_fp128 value on the stack. 2630 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 2631 State.AllocateReg(ArgRegs[RegNum]); 2632 } 2633 2634 // Always return false here, as this function only makes sure that the two f64 2635 // values a ppc_fp128 value is split into are both passed in registers or both 2636 // passed on the stack and does not actually allocate a register for the 2637 // current argument. 2638 return false; 2639 } 2640 2641 /// FPR - The set of FP registers that should be allocated for arguments, 2642 /// on Darwin. 2643 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, 2644 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, 2645 PPC::F11, PPC::F12, PPC::F13}; 2646 2647 /// QFPR - The set of QPX registers that should be allocated for arguments. 2648 static const MCPhysReg QFPR[] = { 2649 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, 2650 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; 2651 2652 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 2653 /// the stack. 2654 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 2655 unsigned PtrByteSize) { 2656 unsigned ArgSize = ArgVT.getStoreSize(); 2657 if (Flags.isByVal()) 2658 ArgSize = Flags.getByValSize(); 2659 2660 // Round up to multiples of the pointer size, except for array members, 2661 // which are always packed. 2662 if (!Flags.isInConsecutiveRegs()) 2663 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2664 2665 return ArgSize; 2666 } 2667 2668 /// CalculateStackSlotAlignment - Calculates the alignment of this argument 2669 /// on the stack. 2670 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 2671 ISD::ArgFlagsTy Flags, 2672 unsigned PtrByteSize) { 2673 unsigned Align = PtrByteSize; 2674 2675 // Altivec parameters are padded to a 16 byte boundary. 2676 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2677 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2678 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 2679 ArgVT == MVT::v1i128) 2680 Align = 16; 2681 // QPX vector types stored in double-precision are padded to a 32 byte 2682 // boundary. 2683 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) 2684 Align = 32; 2685 2686 // ByVal parameters are aligned as requested. 2687 if (Flags.isByVal()) { 2688 unsigned BVAlign = Flags.getByValAlign(); 2689 if (BVAlign > PtrByteSize) { 2690 if (BVAlign % PtrByteSize != 0) 2691 llvm_unreachable( 2692 "ByVal alignment is not a multiple of the pointer size"); 2693 2694 Align = BVAlign; 2695 } 2696 } 2697 2698 // Array members are always packed to their original alignment. 2699 if (Flags.isInConsecutiveRegs()) { 2700 // If the array member was split into multiple registers, the first 2701 // needs to be aligned to the size of the full type. (Except for 2702 // ppcf128, which is only aligned as its f64 components.) 2703 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 2704 Align = OrigVT.getStoreSize(); 2705 else 2706 Align = ArgVT.getStoreSize(); 2707 } 2708 2709 return Align; 2710 } 2711 2712 /// CalculateStackSlotUsed - Return whether this argument will use its 2713 /// stack slot (instead of being passed in registers). ArgOffset, 2714 /// AvailableFPRs, and AvailableVRs must hold the current argument 2715 /// position, and will be updated to account for this argument. 2716 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 2717 ISD::ArgFlagsTy Flags, 2718 unsigned PtrByteSize, 2719 unsigned LinkageSize, 2720 unsigned ParamAreaSize, 2721 unsigned &ArgOffset, 2722 unsigned &AvailableFPRs, 2723 unsigned &AvailableVRs, bool HasQPX) { 2724 bool UseMemory = false; 2725 2726 // Respect alignment of argument on the stack. 2727 unsigned Align = 2728 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 2729 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2730 // If there's no space left in the argument save area, we must 2731 // use memory (this check also catches zero-sized arguments). 2732 if (ArgOffset >= LinkageSize + ParamAreaSize) 2733 UseMemory = true; 2734 2735 // Allocate argument on the stack. 2736 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2737 if (Flags.isInConsecutiveRegsLast()) 2738 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2739 // If we overran the argument save area, we must use memory 2740 // (this check catches arguments passed partially in memory) 2741 if (ArgOffset > LinkageSize + ParamAreaSize) 2742 UseMemory = true; 2743 2744 // However, if the argument is actually passed in an FPR or a VR, 2745 // we don't use memory after all. 2746 if (!Flags.isByVal()) { 2747 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || 2748 // QPX registers overlap with the scalar FP registers. 2749 (HasQPX && (ArgVT == MVT::v4f32 || 2750 ArgVT == MVT::v4f64 || 2751 ArgVT == MVT::v4i1))) 2752 if (AvailableFPRs > 0) { 2753 --AvailableFPRs; 2754 return false; 2755 } 2756 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2757 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2758 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 2759 ArgVT == MVT::v1i128) 2760 if (AvailableVRs > 0) { 2761 --AvailableVRs; 2762 return false; 2763 } 2764 } 2765 2766 return UseMemory; 2767 } 2768 2769 /// EnsureStackAlignment - Round stack frame size up from NumBytes to 2770 /// ensure minimum alignment required for target. 2771 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, 2772 unsigned NumBytes) { 2773 unsigned TargetAlign = Lowering->getStackAlignment(); 2774 unsigned AlignMask = TargetAlign - 1; 2775 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2776 return NumBytes; 2777 } 2778 2779 SDValue 2780 PPCTargetLowering::LowerFormalArguments(SDValue Chain, 2781 CallingConv::ID CallConv, bool isVarArg, 2782 const SmallVectorImpl<ISD::InputArg> 2783 &Ins, 2784 SDLoc dl, SelectionDAG &DAG, 2785 SmallVectorImpl<SDValue> &InVals) 2786 const { 2787 if (Subtarget.isSVR4ABI()) { 2788 if (Subtarget.isPPC64()) 2789 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 2790 dl, DAG, InVals); 2791 else 2792 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 2793 dl, DAG, InVals); 2794 } else { 2795 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 2796 dl, DAG, InVals); 2797 } 2798 } 2799 2800 SDValue 2801 PPCTargetLowering::LowerFormalArguments_32SVR4( 2802 SDValue Chain, 2803 CallingConv::ID CallConv, bool isVarArg, 2804 const SmallVectorImpl<ISD::InputArg> 2805 &Ins, 2806 SDLoc dl, SelectionDAG &DAG, 2807 SmallVectorImpl<SDValue> &InVals) const { 2808 2809 // 32-bit SVR4 ABI Stack Frame Layout: 2810 // +-----------------------------------+ 2811 // +--> | Back chain | 2812 // | +-----------------------------------+ 2813 // | | Floating-point register save area | 2814 // | +-----------------------------------+ 2815 // | | General register save area | 2816 // | +-----------------------------------+ 2817 // | | CR save word | 2818 // | +-----------------------------------+ 2819 // | | VRSAVE save word | 2820 // | +-----------------------------------+ 2821 // | | Alignment padding | 2822 // | +-----------------------------------+ 2823 // | | Vector register save area | 2824 // | +-----------------------------------+ 2825 // | | Local variable space | 2826 // | +-----------------------------------+ 2827 // | | Parameter list area | 2828 // | +-----------------------------------+ 2829 // | | LR save word | 2830 // | +-----------------------------------+ 2831 // SP--> +--- | Back chain | 2832 // +-----------------------------------+ 2833 // 2834 // Specifications: 2835 // System V Application Binary Interface PowerPC Processor Supplement 2836 // AltiVec Technology Programming Interface Manual 2837 2838 MachineFunction &MF = DAG.getMachineFunction(); 2839 MachineFrameInfo *MFI = MF.getFrameInfo(); 2840 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2841 2842 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); 2843 // Potential tail calls could cause overwriting of argument stack slots. 2844 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2845 (CallConv == CallingConv::Fast)); 2846 unsigned PtrByteSize = 4; 2847 2848 // Assign locations to all of the incoming arguments. 2849 SmallVector<CCValAssign, 16> ArgLocs; 2850 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 2851 *DAG.getContext()); 2852 2853 // Reserve space for the linkage area on the stack. 2854 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 2855 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 2856 2857 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 2858 2859 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2860 CCValAssign &VA = ArgLocs[i]; 2861 2862 // Arguments stored in registers. 2863 if (VA.isRegLoc()) { 2864 const TargetRegisterClass *RC; 2865 EVT ValVT = VA.getValVT(); 2866 2867 switch (ValVT.getSimpleVT().SimpleTy) { 2868 default: 2869 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 2870 case MVT::i1: 2871 case MVT::i32: 2872 RC = &PPC::GPRCRegClass; 2873 break; 2874 case MVT::f32: 2875 if (Subtarget.hasP8Vector()) 2876 RC = &PPC::VSSRCRegClass; 2877 else 2878 RC = &PPC::F4RCRegClass; 2879 break; 2880 case MVT::f64: 2881 if (Subtarget.hasVSX()) 2882 RC = &PPC::VSFRCRegClass; 2883 else 2884 RC = &PPC::F8RCRegClass; 2885 break; 2886 case MVT::v16i8: 2887 case MVT::v8i16: 2888 case MVT::v4i32: 2889 RC = &PPC::VRRCRegClass; 2890 break; 2891 case MVT::v4f32: 2892 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; 2893 break; 2894 case MVT::v2f64: 2895 case MVT::v2i64: 2896 RC = &PPC::VSHRCRegClass; 2897 break; 2898 case MVT::v4f64: 2899 RC = &PPC::QFRCRegClass; 2900 break; 2901 case MVT::v4i1: 2902 RC = &PPC::QBRCRegClass; 2903 break; 2904 } 2905 2906 // Transform the arguments stored in physical registers into virtual ones. 2907 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2908 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 2909 ValVT == MVT::i1 ? MVT::i32 : ValVT); 2910 2911 if (ValVT == MVT::i1) 2912 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 2913 2914 InVals.push_back(ArgValue); 2915 } else { 2916 // Argument stored in memory. 2917 assert(VA.isMemLoc()); 2918 2919 unsigned ArgSize = VA.getLocVT().getStoreSize(); 2920 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 2921 isImmutable); 2922 2923 // Create load nodes to retrieve arguments from the stack. 2924 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2925 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2926 MachinePointerInfo(), 2927 false, false, false, 0)); 2928 } 2929 } 2930 2931 // Assign locations to all of the incoming aggregate by value arguments. 2932 // Aggregates passed by value are stored in the local variable space of the 2933 // caller's stack frame, right above the parameter list area. 2934 SmallVector<CCValAssign, 16> ByValArgLocs; 2935 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2936 ByValArgLocs, *DAG.getContext()); 2937 2938 // Reserve stack space for the allocations in CCInfo. 2939 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 2940 2941 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 2942 2943 // Area that is at least reserved in the caller of this function. 2944 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 2945 MinReservedArea = std::max(MinReservedArea, LinkageSize); 2946 2947 // Set the size that is at least reserved in caller of this function. Tail 2948 // call optimized function's reserved stack space needs to be aligned so that 2949 // taking the difference between two stack areas will result in an aligned 2950 // stack. 2951 MinReservedArea = 2952 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 2953 FuncInfo->setMinReservedArea(MinReservedArea); 2954 2955 SmallVector<SDValue, 8> MemOps; 2956 2957 // If the function takes variable number of arguments, make a frame index for 2958 // the start of the first vararg value... for expansion of llvm.va_start. 2959 if (isVarArg) { 2960 static const MCPhysReg GPArgRegs[] = { 2961 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2962 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2963 }; 2964 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 2965 2966 static const MCPhysReg FPArgRegs[] = { 2967 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2968 PPC::F8 2969 }; 2970 unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 2971 2972 if (Subtarget.useSoftFloat()) 2973 NumFPArgRegs = 0; 2974 2975 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); 2976 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); 2977 2978 // Make room for NumGPArgRegs and NumFPArgRegs. 2979 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 2980 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; 2981 2982 FuncInfo->setVarArgsStackOffset( 2983 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2984 CCInfo.getNextStackOffset(), true)); 2985 2986 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 2987 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2988 2989 // The fixed integer arguments of a variadic function are stored to the 2990 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 2991 // the result of va_next. 2992 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 2993 // Get an existing live-in vreg, or add a new one. 2994 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 2995 if (!VReg) 2996 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 2997 2998 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2999 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3000 MachinePointerInfo(), false, false, 0); 3001 MemOps.push_back(Store); 3002 // Increment the address by four for the next argument to store 3003 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3004 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3005 } 3006 3007 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 3008 // is set. 3009 // The double arguments are stored to the VarArgsFrameIndex 3010 // on the stack. 3011 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 3012 // Get an existing live-in vreg, or add a new one. 3013 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 3014 if (!VReg) 3015 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 3016 3017 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 3018 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3019 MachinePointerInfo(), false, false, 0); 3020 MemOps.push_back(Store); 3021 // Increment the address by eight for the next argument to store 3022 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, 3023 PtrVT); 3024 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3025 } 3026 } 3027 3028 if (!MemOps.empty()) 3029 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3030 3031 return Chain; 3032 } 3033 3034 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3035 // value to MVT::i64 and then truncate to the correct register size. 3036 SDValue 3037 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 3038 SelectionDAG &DAG, SDValue ArgVal, 3039 SDLoc dl) const { 3040 if (Flags.isSExt()) 3041 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 3042 DAG.getValueType(ObjectVT)); 3043 else if (Flags.isZExt()) 3044 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 3045 DAG.getValueType(ObjectVT)); 3046 3047 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 3048 } 3049 3050 SDValue 3051 PPCTargetLowering::LowerFormalArguments_64SVR4( 3052 SDValue Chain, 3053 CallingConv::ID CallConv, bool isVarArg, 3054 const SmallVectorImpl<ISD::InputArg> 3055 &Ins, 3056 SDLoc dl, SelectionDAG &DAG, 3057 SmallVectorImpl<SDValue> &InVals) const { 3058 // TODO: add description of PPC stack frame format, or at least some docs. 3059 // 3060 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3061 bool isLittleEndian = Subtarget.isLittleEndian(); 3062 MachineFunction &MF = DAG.getMachineFunction(); 3063 MachineFrameInfo *MFI = MF.getFrameInfo(); 3064 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3065 3066 assert(!(CallConv == CallingConv::Fast && isVarArg) && 3067 "fastcc not supported on varargs functions"); 3068 3069 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); 3070 // Potential tail calls could cause overwriting of argument stack slots. 3071 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3072 (CallConv == CallingConv::Fast)); 3073 unsigned PtrByteSize = 8; 3074 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3075 3076 static const MCPhysReg GPR[] = { 3077 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3078 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3079 }; 3080 static const MCPhysReg VR[] = { 3081 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3082 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3083 }; 3084 static const MCPhysReg VSRH[] = { 3085 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 3086 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 3087 }; 3088 3089 const unsigned Num_GPR_Regs = array_lengthof(GPR); 3090 const unsigned Num_FPR_Regs = 13; 3091 const unsigned Num_VR_Regs = array_lengthof(VR); 3092 const unsigned Num_QFPR_Regs = Num_FPR_Regs; 3093 3094 // Do a first pass over the arguments to determine whether the ABI 3095 // guarantees that our caller has allocated the parameter save area 3096 // on its stack frame. In the ELFv1 ABI, this is always the case; 3097 // in the ELFv2 ABI, it is true if this is a vararg function or if 3098 // any parameter is located in a stack slot. 3099 3100 bool HasParameterArea = !isELFv2ABI || isVarArg; 3101 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 3102 unsigned NumBytes = LinkageSize; 3103 unsigned AvailableFPRs = Num_FPR_Regs; 3104 unsigned AvailableVRs = Num_VR_Regs; 3105 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 3106 if (Ins[i].Flags.isNest()) 3107 continue; 3108 3109 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 3110 PtrByteSize, LinkageSize, ParamAreaSize, 3111 NumBytes, AvailableFPRs, AvailableVRs, 3112 Subtarget.hasQPX())) 3113 HasParameterArea = true; 3114 } 3115 3116 // Add DAG nodes to load the arguments or copy them out of registers. On 3117 // entry to a function on PPC, the arguments start after the linkage area, 3118 // although the first ones are often in registers. 3119 3120 unsigned ArgOffset = LinkageSize; 3121 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3122 unsigned &QFPR_idx = FPR_idx; 3123 SmallVector<SDValue, 8> MemOps; 3124 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3125 unsigned CurArgIdx = 0; 3126 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3127 SDValue ArgVal; 3128 bool needsLoad = false; 3129 EVT ObjectVT = Ins[ArgNo].VT; 3130 EVT OrigVT = Ins[ArgNo].ArgVT; 3131 unsigned ObjSize = ObjectVT.getStoreSize(); 3132 unsigned ArgSize = ObjSize; 3133 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3134 if (Ins[ArgNo].isOrigArg()) { 3135 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3136 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3137 } 3138 // We re-align the argument offset for each argument, except when using the 3139 // fast calling convention, when we need to make sure we do that only when 3140 // we'll actually use a stack slot. 3141 unsigned CurArgOffset, Align; 3142 auto ComputeArgOffset = [&]() { 3143 /* Respect alignment of argument on the stack. */ 3144 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); 3145 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 3146 CurArgOffset = ArgOffset; 3147 }; 3148 3149 if (CallConv != CallingConv::Fast) { 3150 ComputeArgOffset(); 3151 3152 /* Compute GPR index associated with argument offset. */ 3153 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3154 GPR_idx = std::min(GPR_idx, Num_GPR_Regs); 3155 } 3156 3157 // FIXME the codegen can be much improved in some cases. 3158 // We do not have to keep everything in memory. 3159 if (Flags.isByVal()) { 3160 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3161 3162 if (CallConv == CallingConv::Fast) 3163 ComputeArgOffset(); 3164 3165 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3166 ObjSize = Flags.getByValSize(); 3167 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3168 // Empty aggregate parameters do not take up registers. Examples: 3169 // struct { } a; 3170 // union { } b; 3171 // int c[0]; 3172 // etc. However, we have to provide a place-holder in InVals, so 3173 // pretend we have an 8-byte item at the current address for that 3174 // purpose. 3175 if (!ObjSize) { 3176 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 3177 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3178 InVals.push_back(FIN); 3179 continue; 3180 } 3181 3182 // Create a stack object covering all stack doublewords occupied 3183 // by the argument. If the argument is (fully or partially) on 3184 // the stack, or if the argument is fully in registers but the 3185 // caller has allocated the parameter save anyway, we can refer 3186 // directly to the caller's stack frame. Otherwise, create a 3187 // local copy in our own frame. 3188 int FI; 3189 if (HasParameterArea || 3190 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) 3191 FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true); 3192 else 3193 FI = MFI->CreateStackObject(ArgSize, Align, false); 3194 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3195 3196 // Handle aggregates smaller than 8 bytes. 3197 if (ObjSize < PtrByteSize) { 3198 // The value of the object is its address, which differs from the 3199 // address of the enclosing doubleword on big-endian systems. 3200 SDValue Arg = FIN; 3201 if (!isLittleEndian) { 3202 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); 3203 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); 3204 } 3205 InVals.push_back(Arg); 3206 3207 if (GPR_idx != Num_GPR_Regs) { 3208 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3209 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3210 SDValue Store; 3211 3212 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 3213 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 3214 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 3215 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, 3216 MachinePointerInfo(&*FuncArg), ObjType, 3217 false, false, 0); 3218 } else { 3219 // For sizes that don't fit a truncating store (3, 5, 6, 7), 3220 // store the whole register as-is to the parameter save area 3221 // slot. 3222 Store = 3223 DAG.getStore(Val.getValue(1), dl, Val, FIN, 3224 MachinePointerInfo(&*FuncArg), false, false, 0); 3225 } 3226 3227 MemOps.push_back(Store); 3228 } 3229 // Whether we copied from a register or not, advance the offset 3230 // into the parameter save area by a full doubleword. 3231 ArgOffset += PtrByteSize; 3232 continue; 3233 } 3234 3235 // The value of the object is its address, which is the address of 3236 // its first stack doubleword. 3237 InVals.push_back(FIN); 3238 3239 // Store whatever pieces of the object are in registers to memory. 3240 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3241 if (GPR_idx == Num_GPR_Regs) 3242 break; 3243 3244 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3245 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3246 SDValue Addr = FIN; 3247 if (j) { 3248 SDValue Off = DAG.getConstant(j, dl, PtrVT); 3249 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); 3250 } 3251 SDValue Store = 3252 DAG.getStore(Val.getValue(1), dl, Val, Addr, 3253 MachinePointerInfo(&*FuncArg, j), false, false, 0); 3254 MemOps.push_back(Store); 3255 ++GPR_idx; 3256 } 3257 ArgOffset += ArgSize; 3258 continue; 3259 } 3260 3261 switch (ObjectVT.getSimpleVT().SimpleTy) { 3262 default: llvm_unreachable("Unhandled argument type!"); 3263 case MVT::i1: 3264 case MVT::i32: 3265 case MVT::i64: 3266 if (Flags.isNest()) { 3267 // The 'nest' parameter, if any, is passed in R11. 3268 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); 3269 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3270 3271 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3272 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3273 3274 break; 3275 } 3276 3277 // These can be scalar arguments or elements of an integer array type 3278 // passed directly. Clang may use those instead of "byval" aggregate 3279 // types to avoid forcing arguments to memory unnecessarily. 3280 if (GPR_idx != Num_GPR_Regs) { 3281 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3282 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3283 3284 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3285 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3286 // value to MVT::i64 and then truncate to the correct register size. 3287 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3288 } else { 3289 if (CallConv == CallingConv::Fast) 3290 ComputeArgOffset(); 3291 3292 needsLoad = true; 3293 ArgSize = PtrByteSize; 3294 } 3295 if (CallConv != CallingConv::Fast || needsLoad) 3296 ArgOffset += 8; 3297 break; 3298 3299 case MVT::f32: 3300 case MVT::f64: 3301 // These can be scalar arguments or elements of a float array type 3302 // passed directly. The latter are used to implement ELFv2 homogenous 3303 // float aggregates. 3304 if (FPR_idx != Num_FPR_Regs) { 3305 unsigned VReg; 3306 3307 if (ObjectVT == MVT::f32) 3308 VReg = MF.addLiveIn(FPR[FPR_idx], 3309 Subtarget.hasP8Vector() 3310 ? &PPC::VSSRCRegClass 3311 : &PPC::F4RCRegClass); 3312 else 3313 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() 3314 ? &PPC::VSFRCRegClass 3315 : &PPC::F8RCRegClass); 3316 3317 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3318 ++FPR_idx; 3319 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { 3320 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 3321 // once we support fp <-> gpr moves. 3322 3323 // This can only ever happen in the presence of f32 array types, 3324 // since otherwise we never run out of FPRs before running out 3325 // of GPRs. 3326 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3327 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3328 3329 if (ObjectVT == MVT::f32) { 3330 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) 3331 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, 3332 DAG.getConstant(32, dl, MVT::i32)); 3333 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 3334 } 3335 3336 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); 3337 } else { 3338 if (CallConv == CallingConv::Fast) 3339 ComputeArgOffset(); 3340 3341 needsLoad = true; 3342 } 3343 3344 // When passing an array of floats, the array occupies consecutive 3345 // space in the argument area; only round up to the next doubleword 3346 // at the end of the array. Otherwise, each float takes 8 bytes. 3347 if (CallConv != CallingConv::Fast || needsLoad) { 3348 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; 3349 ArgOffset += ArgSize; 3350 if (Flags.isInConsecutiveRegsLast()) 3351 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3352 } 3353 break; 3354 case MVT::v4f32: 3355 case MVT::v4i32: 3356 case MVT::v8i16: 3357 case MVT::v16i8: 3358 case MVT::v2f64: 3359 case MVT::v2i64: 3360 case MVT::v1i128: 3361 if (!Subtarget.hasQPX()) { 3362 // These can be scalar arguments or elements of a vector array type 3363 // passed directly. The latter are used to implement ELFv2 homogenous 3364 // vector aggregates. 3365 if (VR_idx != Num_VR_Regs) { 3366 unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? 3367 MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : 3368 MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3369 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3370 ++VR_idx; 3371 } else { 3372 if (CallConv == CallingConv::Fast) 3373 ComputeArgOffset(); 3374 3375 needsLoad = true; 3376 } 3377 if (CallConv != CallingConv::Fast || needsLoad) 3378 ArgOffset += 16; 3379 break; 3380 } // not QPX 3381 3382 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && 3383 "Invalid QPX parameter type"); 3384 /* fall through */ 3385 3386 case MVT::v4f64: 3387 case MVT::v4i1: 3388 // QPX vectors are treated like their scalar floating-point subregisters 3389 // (except that they're larger). 3390 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; 3391 if (QFPR_idx != Num_QFPR_Regs) { 3392 const TargetRegisterClass *RC; 3393 switch (ObjectVT.getSimpleVT().SimpleTy) { 3394 case MVT::v4f64: RC = &PPC::QFRCRegClass; break; 3395 case MVT::v4f32: RC = &PPC::QSRCRegClass; break; 3396 default: RC = &PPC::QBRCRegClass; break; 3397 } 3398 3399 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); 3400 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3401 ++QFPR_idx; 3402 } else { 3403 if (CallConv == CallingConv::Fast) 3404 ComputeArgOffset(); 3405 needsLoad = true; 3406 } 3407 if (CallConv != CallingConv::Fast || needsLoad) 3408 ArgOffset += Sz; 3409 break; 3410 } 3411 3412 // We need to load the argument to a virtual register if we determined 3413 // above that we ran out of physical registers of the appropriate type. 3414 if (needsLoad) { 3415 if (ObjSize < ArgSize && !isLittleEndian) 3416 CurArgOffset += ArgSize - ObjSize; 3417 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable); 3418 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3419 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 3420 false, false, false, 0); 3421 } 3422 3423 InVals.push_back(ArgVal); 3424 } 3425 3426 // Area that is at least reserved in the caller of this function. 3427 unsigned MinReservedArea; 3428 if (HasParameterArea) 3429 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); 3430 else 3431 MinReservedArea = LinkageSize; 3432 3433 // Set the size that is at least reserved in caller of this function. Tail 3434 // call optimized functions' reserved stack space needs to be aligned so that 3435 // taking the difference between two stack areas will result in an aligned 3436 // stack. 3437 MinReservedArea = 3438 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3439 FuncInfo->setMinReservedArea(MinReservedArea); 3440 3441 // If the function takes variable number of arguments, make a frame index for 3442 // the start of the first vararg value... for expansion of llvm.va_start. 3443 if (isVarArg) { 3444 int Depth = ArgOffset; 3445 3446 FuncInfo->setVarArgsFrameIndex( 3447 MFI->CreateFixedObject(PtrByteSize, Depth, true)); 3448 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3449 3450 // If this function is vararg, store any remaining integer argument regs 3451 // to their spots on the stack so that they may be loaded by deferencing the 3452 // result of va_next. 3453 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3454 GPR_idx < Num_GPR_Regs; ++GPR_idx) { 3455 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3456 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3457 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3458 MachinePointerInfo(), false, false, 0); 3459 MemOps.push_back(Store); 3460 // Increment the address by four for the next argument to store 3461 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); 3462 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3463 } 3464 } 3465 3466 if (!MemOps.empty()) 3467 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3468 3469 return Chain; 3470 } 3471 3472 SDValue 3473 PPCTargetLowering::LowerFormalArguments_Darwin( 3474 SDValue Chain, 3475 CallingConv::ID CallConv, bool isVarArg, 3476 const SmallVectorImpl<ISD::InputArg> 3477 &Ins, 3478 SDLoc dl, SelectionDAG &DAG, 3479 SmallVectorImpl<SDValue> &InVals) const { 3480 // TODO: add description of PPC stack frame format, or at least some docs. 3481 // 3482 MachineFunction &MF = DAG.getMachineFunction(); 3483 MachineFrameInfo *MFI = MF.getFrameInfo(); 3484 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3485 3486 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); 3487 bool isPPC64 = PtrVT == MVT::i64; 3488 // Potential tail calls could cause overwriting of argument stack slots. 3489 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3490 (CallConv == CallingConv::Fast)); 3491 unsigned PtrByteSize = isPPC64 ? 8 : 4; 3492 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3493 unsigned ArgOffset = LinkageSize; 3494 // Area that is at least reserved in caller of this function. 3495 unsigned MinReservedArea = ArgOffset; 3496 3497 static const MCPhysReg GPR_32[] = { // 32-bit registers. 3498 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3499 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3500 }; 3501 static const MCPhysReg GPR_64[] = { // 64-bit registers. 3502 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3503 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3504 }; 3505 static const MCPhysReg VR[] = { 3506 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3507 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3508 }; 3509 3510 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 3511 const unsigned Num_FPR_Regs = 13; 3512 const unsigned Num_VR_Regs = array_lengthof( VR); 3513 3514 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3515 3516 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 3517 3518 // In 32-bit non-varargs functions, the stack space for vectors is after the 3519 // stack space for non-vectors. We do not use this space unless we have 3520 // too many vectors to fit in registers, something that only occurs in 3521 // constructed examples:), but we have to walk the arglist to figure 3522 // that out...for the pathological case, compute VecArgOffset as the 3523 // start of the vector parameter area. Computing VecArgOffset is the 3524 // entire point of the following loop. 3525 unsigned VecArgOffset = ArgOffset; 3526 if (!isVarArg && !isPPC64) { 3527 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 3528 ++ArgNo) { 3529 EVT ObjectVT = Ins[ArgNo].VT; 3530 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3531 3532 if (Flags.isByVal()) { 3533 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 3534 unsigned ObjSize = Flags.getByValSize(); 3535 unsigned ArgSize = 3536 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3537 VecArgOffset += ArgSize; 3538 continue; 3539 } 3540 3541 switch(ObjectVT.getSimpleVT().SimpleTy) { 3542 default: llvm_unreachable("Unhandled argument type!"); 3543 case MVT::i1: 3544 case MVT::i32: 3545 case MVT::f32: 3546 VecArgOffset += 4; 3547 break; 3548 case MVT::i64: // PPC64 3549 case MVT::f64: 3550 // FIXME: We are guaranteed to be !isPPC64 at this point. 3551 // Does MVT::i64 apply? 3552 VecArgOffset += 8; 3553 break; 3554 case MVT::v4f32: 3555 case MVT::v4i32: 3556 case MVT::v8i16: 3557 case MVT::v16i8: 3558 // Nothing to do, we're only looking at Nonvector args here. 3559 break; 3560 } 3561 } 3562 } 3563 // We've found where the vector parameter area in memory is. Skip the 3564 // first 12 parameters; these don't use that memory. 3565 VecArgOffset = ((VecArgOffset+15)/16)*16; 3566 VecArgOffset += 12*16; 3567 3568 // Add DAG nodes to load the arguments or copy them out of registers. On 3569 // entry to a function on PPC, the arguments start after the linkage area, 3570 // although the first ones are often in registers. 3571 3572 SmallVector<SDValue, 8> MemOps; 3573 unsigned nAltivecParamsAtEnd = 0; 3574 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3575 unsigned CurArgIdx = 0; 3576 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3577 SDValue ArgVal; 3578 bool needsLoad = false; 3579 EVT ObjectVT = Ins[ArgNo].VT; 3580 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 3581 unsigned ArgSize = ObjSize; 3582 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3583 if (Ins[ArgNo].isOrigArg()) { 3584 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3585 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3586 } 3587 unsigned CurArgOffset = ArgOffset; 3588 3589 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 3590 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 3591 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 3592 if (isVarArg || isPPC64) { 3593 MinReservedArea = ((MinReservedArea+15)/16)*16; 3594 MinReservedArea += CalculateStackSlotSize(ObjectVT, 3595 Flags, 3596 PtrByteSize); 3597 } else nAltivecParamsAtEnd++; 3598 } else 3599 // Calculate min reserved area. 3600 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 3601 Flags, 3602 PtrByteSize); 3603 3604 // FIXME the codegen can be much improved in some cases. 3605 // We do not have to keep everything in memory. 3606 if (Flags.isByVal()) { 3607 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3608 3609 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3610 ObjSize = Flags.getByValSize(); 3611 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3612 // Objects of size 1 and 2 are right justified, everything else is 3613 // left justified. This means the memory address is adjusted forwards. 3614 if (ObjSize==1 || ObjSize==2) { 3615 CurArgOffset = CurArgOffset + (4 - ObjSize); 3616 } 3617 // The value of the object is its address. 3618 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true); 3619 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3620 InVals.push_back(FIN); 3621 if (ObjSize==1 || ObjSize==2) { 3622 if (GPR_idx != Num_GPR_Regs) { 3623 unsigned VReg; 3624 if (isPPC64) 3625 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3626 else 3627 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3628 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3629 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 3630 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 3631 MachinePointerInfo(&*FuncArg), 3632 ObjType, false, false, 0); 3633 MemOps.push_back(Store); 3634 ++GPR_idx; 3635 } 3636 3637 ArgOffset += PtrByteSize; 3638 3639 continue; 3640 } 3641 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3642 // Store whatever pieces of the object are in registers 3643 // to memory. ArgOffset will be the address of the beginning 3644 // of the object. 3645 if (GPR_idx != Num_GPR_Regs) { 3646 unsigned VReg; 3647 if (isPPC64) 3648 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3649 else 3650 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3651 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 3652 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3653 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3654 SDValue Store = 3655 DAG.getStore(Val.getValue(1), dl, Val, FIN, 3656 MachinePointerInfo(&*FuncArg, j), false, false, 0); 3657 MemOps.push_back(Store); 3658 ++GPR_idx; 3659 ArgOffset += PtrByteSize; 3660 } else { 3661 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 3662 break; 3663 } 3664 } 3665 continue; 3666 } 3667 3668 switch (ObjectVT.getSimpleVT().SimpleTy) { 3669 default: llvm_unreachable("Unhandled argument type!"); 3670 case MVT::i1: 3671 case MVT::i32: 3672 if (!isPPC64) { 3673 if (GPR_idx != Num_GPR_Regs) { 3674 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3675 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 3676 3677 if (ObjectVT == MVT::i1) 3678 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 3679 3680 ++GPR_idx; 3681 } else { 3682 needsLoad = true; 3683 ArgSize = PtrByteSize; 3684 } 3685 // All int arguments reserve stack space in the Darwin ABI. 3686 ArgOffset += PtrByteSize; 3687 break; 3688 } 3689 // FALLTHROUGH 3690 case MVT::i64: // PPC64 3691 if (GPR_idx != Num_GPR_Regs) { 3692 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3693 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3694 3695 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3696 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3697 // value to MVT::i64 and then truncate to the correct register size. 3698 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3699 3700 ++GPR_idx; 3701 } else { 3702 needsLoad = true; 3703 ArgSize = PtrByteSize; 3704 } 3705 // All int arguments reserve stack space in the Darwin ABI. 3706 ArgOffset += 8; 3707 break; 3708 3709 case MVT::f32: 3710 case MVT::f64: 3711 // Every 4 bytes of argument space consumes one of the GPRs available for 3712 // argument passing. 3713 if (GPR_idx != Num_GPR_Regs) { 3714 ++GPR_idx; 3715 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 3716 ++GPR_idx; 3717 } 3718 if (FPR_idx != Num_FPR_Regs) { 3719 unsigned VReg; 3720 3721 if (ObjectVT == MVT::f32) 3722 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 3723 else 3724 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 3725 3726 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3727 ++FPR_idx; 3728 } else { 3729 needsLoad = true; 3730 } 3731 3732 // All FP arguments reserve stack space in the Darwin ABI. 3733 ArgOffset += isPPC64 ? 8 : ObjSize; 3734 break; 3735 case MVT::v4f32: 3736 case MVT::v4i32: 3737 case MVT::v8i16: 3738 case MVT::v16i8: 3739 // Note that vector arguments in registers don't reserve stack space, 3740 // except in varargs functions. 3741 if (VR_idx != Num_VR_Regs) { 3742 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3743 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3744 if (isVarArg) { 3745 while ((ArgOffset % 16) != 0) { 3746 ArgOffset += PtrByteSize; 3747 if (GPR_idx != Num_GPR_Regs) 3748 GPR_idx++; 3749 } 3750 ArgOffset += 16; 3751 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 3752 } 3753 ++VR_idx; 3754 } else { 3755 if (!isVarArg && !isPPC64) { 3756 // Vectors go after all the nonvectors. 3757 CurArgOffset = VecArgOffset; 3758 VecArgOffset += 16; 3759 } else { 3760 // Vectors are aligned. 3761 ArgOffset = ((ArgOffset+15)/16)*16; 3762 CurArgOffset = ArgOffset; 3763 ArgOffset += 16; 3764 } 3765 needsLoad = true; 3766 } 3767 break; 3768 } 3769 3770 // We need to load the argument to a virtual register if we determined above 3771 // that we ran out of physical registers of the appropriate type. 3772 if (needsLoad) { 3773 int FI = MFI->CreateFixedObject(ObjSize, 3774 CurArgOffset + (ArgSize - ObjSize), 3775 isImmutable); 3776 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3777 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 3778 false, false, false, 0); 3779 } 3780 3781 InVals.push_back(ArgVal); 3782 } 3783 3784 // Allow for Altivec parameters at the end, if needed. 3785 if (nAltivecParamsAtEnd) { 3786 MinReservedArea = ((MinReservedArea+15)/16)*16; 3787 MinReservedArea += 16*nAltivecParamsAtEnd; 3788 } 3789 3790 // Area that is at least reserved in the caller of this function. 3791 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 3792 3793 // Set the size that is at least reserved in caller of this function. Tail 3794 // call optimized functions' reserved stack space needs to be aligned so that 3795 // taking the difference between two stack areas will result in an aligned 3796 // stack. 3797 MinReservedArea = 3798 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3799 FuncInfo->setMinReservedArea(MinReservedArea); 3800 3801 // If the function takes variable number of arguments, make a frame index for 3802 // the start of the first vararg value... for expansion of llvm.va_start. 3803 if (isVarArg) { 3804 int Depth = ArgOffset; 3805 3806 FuncInfo->setVarArgsFrameIndex( 3807 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 3808 Depth, true)); 3809 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3810 3811 // If this function is vararg, store any remaining integer argument regs 3812 // to their spots on the stack so that they may be loaded by deferencing the 3813 // result of va_next. 3814 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 3815 unsigned VReg; 3816 3817 if (isPPC64) 3818 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3819 else 3820 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3821 3822 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3823 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3824 MachinePointerInfo(), false, false, 0); 3825 MemOps.push_back(Store); 3826 // Increment the address by four for the next argument to store 3827 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3828 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3829 } 3830 } 3831 3832 if (!MemOps.empty()) 3833 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3834 3835 return Chain; 3836 } 3837 3838 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 3839 /// adjusted to accommodate the arguments for the tailcall. 3840 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 3841 unsigned ParamSize) { 3842 3843 if (!isTailCall) return 0; 3844 3845 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 3846 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 3847 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 3848 // Remember only if the new adjustement is bigger. 3849 if (SPDiff < FI->getTailCallSPDelta()) 3850 FI->setTailCallSPDelta(SPDiff); 3851 3852 return SPDiff; 3853 } 3854 3855 static bool isFunctionGlobalAddress(SDValue Callee); 3856 3857 static bool 3858 resideInSameModule(SDValue Callee, Reloc::Model RelMod) { 3859 // If !G, Callee can be an external symbol. 3860 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 3861 if (!G) return false; 3862 3863 const GlobalValue *GV = G->getGlobal(); 3864 3865 if (GV->isDeclaration()) return false; 3866 3867 switch(GV->getLinkage()) { 3868 default: llvm_unreachable("unknow linkage type"); 3869 case GlobalValue::AvailableExternallyLinkage: 3870 case GlobalValue::ExternalWeakLinkage: 3871 return false; 3872 3873 // Callee with weak linkage is allowed if it has hidden or protected 3874 // visibility 3875 case GlobalValue::LinkOnceAnyLinkage: 3876 case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions 3877 case GlobalValue::WeakAnyLinkage: 3878 case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation 3879 if (GV->hasDefaultVisibility()) 3880 return false; 3881 3882 case GlobalValue::ExternalLinkage: 3883 case GlobalValue::InternalLinkage: 3884 case GlobalValue::PrivateLinkage: 3885 break; 3886 } 3887 3888 // With '-fPIC', calling default visiblity function need insert 'nop' after 3889 // function call, no matter that function resides in same module or not, so 3890 // we treat it as in different module. 3891 if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility()) 3892 return false; 3893 3894 return true; 3895 } 3896 3897 static bool 3898 needStackSlotPassParameters(const PPCSubtarget &Subtarget, 3899 const SmallVectorImpl<ISD::OutputArg> &Outs) { 3900 assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); 3901 3902 const unsigned PtrByteSize = 8; 3903 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3904 3905 static const MCPhysReg GPR[] = { 3906 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3907 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3908 }; 3909 static const MCPhysReg VR[] = { 3910 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3911 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3912 }; 3913 3914 const unsigned NumGPRs = array_lengthof(GPR); 3915 const unsigned NumFPRs = 13; 3916 const unsigned NumVRs = array_lengthof(VR); 3917 const unsigned ParamAreaSize = NumGPRs * PtrByteSize; 3918 3919 unsigned NumBytes = LinkageSize; 3920 unsigned AvailableFPRs = NumFPRs; 3921 unsigned AvailableVRs = NumVRs; 3922 3923 for (const ISD::OutputArg& Param : Outs) { 3924 if (Param.Flags.isNest()) continue; 3925 3926 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, 3927 PtrByteSize, LinkageSize, ParamAreaSize, 3928 NumBytes, AvailableFPRs, AvailableVRs, 3929 Subtarget.hasQPX())) 3930 return true; 3931 } 3932 return false; 3933 } 3934 3935 static bool 3936 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) { 3937 if (CS->arg_size() != CallerFn->getArgumentList().size()) 3938 return false; 3939 3940 ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin(); 3941 ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end(); 3942 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); 3943 3944 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { 3945 const Value* CalleeArg = *CalleeArgIter; 3946 const Value* CallerArg = &(*CallerArgIter); 3947 if (CalleeArg == CallerArg) 3948 continue; 3949 3950 // e.g. @caller([4 x i64] %a, [4 x i64] %b) { 3951 // tail call @callee([4 x i64] undef, [4 x i64] %b) 3952 // } 3953 // 1st argument of callee is undef and has the same type as caller. 3954 if (CalleeArg->getType() == CallerArg->getType() && 3955 isa<UndefValue>(CalleeArg)) 3956 continue; 3957 3958 return false; 3959 } 3960 3961 return true; 3962 } 3963 3964 bool 3965 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( 3966 SDValue Callee, 3967 CallingConv::ID CalleeCC, 3968 ImmutableCallSite *CS, 3969 bool isVarArg, 3970 const SmallVectorImpl<ISD::OutputArg> &Outs, 3971 const SmallVectorImpl<ISD::InputArg> &Ins, 3972 SelectionDAG& DAG) const { 3973 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; 3974 3975 if (DisableSCO && !TailCallOpt) return false; 3976 3977 // Variadic argument functions are not supported. 3978 if (isVarArg) return false; 3979 3980 MachineFunction &MF = DAG.getMachineFunction(); 3981 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 3982 3983 // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has 3984 // the same calling convention 3985 if (CallerCC != CalleeCC) return false; 3986 3987 // SCO support C calling convention 3988 if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C) 3989 return false; 3990 3991 // Functions containing by val parameters are not supported. 3992 if (std::any_of(Ins.begin(), Ins.end(), 3993 [](const ISD::InputArg& IA) { return IA.Flags.isByVal(); })) 3994 return false; 3995 3996 // No TCO/SCO on indirect call because Caller have to restore its TOC 3997 if (!isFunctionGlobalAddress(Callee) && 3998 !isa<ExternalSymbolSDNode>(Callee)) 3999 return false; 4000 4001 // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI 4002 // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another 4003 // module. 4004 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 4005 if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel())) 4006 return false; 4007 4008 // TCO allows altering callee ABI, so we don't have to check further. 4009 if (CalleeCC == CallingConv::Fast && TailCallOpt) 4010 return true; 4011 4012 if (DisableSCO) return false; 4013 4014 // If callee use the same argument list that caller is using, then we can 4015 // apply SCO on this case. If it is not, then we need to check if callee needs 4016 // stack for passing arguments. 4017 if (!hasSameArgumentList(MF.getFunction(), CS) && 4018 needStackSlotPassParameters(Subtarget, Outs)) { 4019 return false; 4020 } 4021 4022 return true; 4023 } 4024 4025 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 4026 /// for tail call optimization. Targets which want to do tail call 4027 /// optimization should implement this function. 4028 bool 4029 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 4030 CallingConv::ID CalleeCC, 4031 bool isVarArg, 4032 const SmallVectorImpl<ISD::InputArg> &Ins, 4033 SelectionDAG& DAG) const { 4034 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4035 return false; 4036 4037 // Variable argument functions are not supported. 4038 if (isVarArg) 4039 return false; 4040 4041 MachineFunction &MF = DAG.getMachineFunction(); 4042 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 4043 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 4044 // Functions containing by val parameters are not supported. 4045 for (unsigned i = 0; i != Ins.size(); i++) { 4046 ISD::ArgFlagsTy Flags = Ins[i].Flags; 4047 if (Flags.isByVal()) return false; 4048 } 4049 4050 // Non-PIC/GOT tail calls are supported. 4051 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 4052 return true; 4053 4054 // At the moment we can only do local tail calls (in same module, hidden 4055 // or protected) if we are generating PIC. 4056 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4057 return G->getGlobal()->hasHiddenVisibility() 4058 || G->getGlobal()->hasProtectedVisibility(); 4059 } 4060 4061 return false; 4062 } 4063 4064 /// isCallCompatibleAddress - Return the immediate to use if the specified 4065 /// 32-bit value is representable in the immediate field of a BxA instruction. 4066 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 4067 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 4068 if (!C) return nullptr; 4069 4070 int Addr = C->getZExtValue(); 4071 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 4072 SignExtend32<26>(Addr) != Addr) 4073 return nullptr; // Top 6 bits have to be sext of immediate. 4074 4075 return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op), 4076 DAG.getTargetLoweringInfo().getPointerTy( 4077 DAG.getDataLayout())).getNode(); 4078 } 4079 4080 namespace { 4081 4082 struct TailCallArgumentInfo { 4083 SDValue Arg; 4084 SDValue FrameIdxOp; 4085 int FrameIdx; 4086 4087 TailCallArgumentInfo() : FrameIdx(0) {} 4088 }; 4089 } 4090 4091 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 4092 static void 4093 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 4094 SDValue Chain, 4095 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 4096 SmallVectorImpl<SDValue> &MemOpChains, 4097 SDLoc dl) { 4098 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 4099 SDValue Arg = TailCallArgs[i].Arg; 4100 SDValue FIN = TailCallArgs[i].FrameIdxOp; 4101 int FI = TailCallArgs[i].FrameIdx; 4102 // Store relative to framepointer. 4103 MemOpChains.push_back(DAG.getStore( 4104 Chain, dl, Arg, FIN, 4105 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, 4106 false, 0)); 4107 } 4108 } 4109 4110 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 4111 /// the appropriate stack slot for the tail call optimized function call. 4112 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 4113 MachineFunction &MF, 4114 SDValue Chain, 4115 SDValue OldRetAddr, 4116 SDValue OldFP, 4117 int SPDiff, 4118 bool isPPC64, 4119 bool isDarwinABI, 4120 SDLoc dl) { 4121 if (SPDiff) { 4122 // Calculate the new stack slot for the return address. 4123 int SlotSize = isPPC64 ? 8 : 4; 4124 const PPCFrameLowering *FL = 4125 MF.getSubtarget<PPCSubtarget>().getFrameLowering(); 4126 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); 4127 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 4128 NewRetAddrLoc, true); 4129 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4130 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 4131 Chain = DAG.getStore( 4132 Chain, dl, OldRetAddr, NewRetAddrFrIdx, 4133 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewRetAddr), 4134 false, false, 0); 4135 4136 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 4137 // slot as the FP is never overwritten. 4138 if (isDarwinABI) { 4139 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); 4140 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 4141 true); 4142 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 4143 Chain = DAG.getStore( 4144 Chain, dl, OldFP, NewFramePtrIdx, 4145 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFPIdx), 4146 false, false, 0); 4147 } 4148 } 4149 return Chain; 4150 } 4151 4152 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 4153 /// the position of the argument. 4154 static void 4155 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 4156 SDValue Arg, int SPDiff, unsigned ArgOffset, 4157 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 4158 int Offset = ArgOffset + SPDiff; 4159 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 4160 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 4161 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4162 SDValue FIN = DAG.getFrameIndex(FI, VT); 4163 TailCallArgumentInfo Info; 4164 Info.Arg = Arg; 4165 Info.FrameIdxOp = FIN; 4166 Info.FrameIdx = FI; 4167 TailCallArguments.push_back(Info); 4168 } 4169 4170 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 4171 /// stack slot. Returns the chain as result and the loaded frame pointers in 4172 /// LROpOut/FPOpout. Used when tail calling. 4173 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 4174 int SPDiff, 4175 SDValue Chain, 4176 SDValue &LROpOut, 4177 SDValue &FPOpOut, 4178 bool isDarwinABI, 4179 SDLoc dl) const { 4180 if (SPDiff) { 4181 // Load the LR and FP stack slot for later adjusting. 4182 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 4183 LROpOut = getReturnAddrFrameIndex(DAG); 4184 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 4185 false, false, false, 0); 4186 Chain = SDValue(LROpOut.getNode(), 1); 4187 4188 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 4189 // slot as the FP is never overwritten. 4190 if (isDarwinABI) { 4191 FPOpOut = getFramePointerFrameIndex(DAG); 4192 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 4193 false, false, false, 0); 4194 Chain = SDValue(FPOpOut.getNode(), 1); 4195 } 4196 } 4197 return Chain; 4198 } 4199 4200 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 4201 /// by "Src" to address "Dst" of size "Size". Alignment information is 4202 /// specified by the specific parameter attribute. The copy will be passed as 4203 /// a byval function parameter. 4204 /// Sometimes what we are copying is the end of a larger object, the part that 4205 /// does not fit in registers. 4206 static SDValue 4207 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 4208 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 4209 SDLoc dl) { 4210 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); 4211 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 4212 false, false, false, MachinePointerInfo(), 4213 MachinePointerInfo()); 4214 } 4215 4216 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 4217 /// tail calls. 4218 static void 4219 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 4220 SDValue Arg, SDValue PtrOff, int SPDiff, 4221 unsigned ArgOffset, bool isPPC64, bool isTailCall, 4222 bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 4223 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, 4224 SDLoc dl) { 4225 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4226 if (!isTailCall) { 4227 if (isVector) { 4228 SDValue StackPtr; 4229 if (isPPC64) 4230 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4231 else 4232 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4233 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4234 DAG.getConstant(ArgOffset, dl, PtrVT)); 4235 } 4236 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 4237 MachinePointerInfo(), false, false, 0)); 4238 // Calculate and remember argument location. 4239 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 4240 TailCallArguments); 4241 } 4242 4243 static 4244 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 4245 SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 4246 SDValue LROp, SDValue FPOp, bool isDarwinABI, 4247 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 4248 MachineFunction &MF = DAG.getMachineFunction(); 4249 4250 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 4251 // might overwrite each other in case of tail call optimization. 4252 SmallVector<SDValue, 8> MemOpChains2; 4253 // Do not flag preceding copytoreg stuff together with the following stuff. 4254 InFlag = SDValue(); 4255 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 4256 MemOpChains2, dl); 4257 if (!MemOpChains2.empty()) 4258 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 4259 4260 // Store the return address to the appropriate stack slot. 4261 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 4262 isPPC64, isDarwinABI, dl); 4263 4264 // Emit callseq_end just before tailcall node. 4265 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4266 DAG.getIntPtrConstant(0, dl, true), InFlag, dl); 4267 InFlag = Chain.getValue(1); 4268 } 4269 4270 // Is this global address that of a function that can be called by name? (as 4271 // opposed to something that must hold a descriptor for an indirect call). 4272 static bool isFunctionGlobalAddress(SDValue Callee) { 4273 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4274 if (Callee.getOpcode() == ISD::GlobalTLSAddress || 4275 Callee.getOpcode() == ISD::TargetGlobalTLSAddress) 4276 return false; 4277 4278 return G->getGlobal()->getValueType()->isFunctionTy(); 4279 } 4280 4281 return false; 4282 } 4283 4284 static 4285 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 4286 SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff, 4287 bool isTailCall, bool IsPatchPoint, bool hasNest, 4288 SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, 4289 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 4290 ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { 4291 4292 bool isPPC64 = Subtarget.isPPC64(); 4293 bool isSVR4ABI = Subtarget.isSVR4ABI(); 4294 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4295 4296 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4297 NodeTys.push_back(MVT::Other); // Returns a chain 4298 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 4299 4300 unsigned CallOpc = PPCISD::CALL; 4301 4302 bool needIndirectCall = true; 4303 if (!isSVR4ABI || !isPPC64) 4304 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 4305 // If this is an absolute destination address, use the munged value. 4306 Callee = SDValue(Dest, 0); 4307 needIndirectCall = false; 4308 } 4309 4310 if (isFunctionGlobalAddress(Callee)) { 4311 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); 4312 // A call to a TLS address is actually an indirect call to a 4313 // thread-specific pointer. 4314 unsigned OpFlags = 0; 4315 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 4316 (Subtarget.getTargetTriple().isMacOSX() && 4317 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 4318 !G->getGlobal()->isStrongDefinitionForLinker()) || 4319 (Subtarget.isTargetELF() && !isPPC64 && 4320 !G->getGlobal()->hasLocalLinkage() && 4321 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 4322 // PC-relative references to external symbols should go through $stub, 4323 // unless we're building with the leopard linker or later, which 4324 // automatically synthesizes these stubs. 4325 OpFlags = PPCII::MO_PLT_OR_STUB; 4326 } 4327 4328 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 4329 // every direct call is) turn it into a TargetGlobalAddress / 4330 // TargetExternalSymbol node so that legalize doesn't hack it. 4331 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 4332 Callee.getValueType(), 0, OpFlags); 4333 needIndirectCall = false; 4334 } 4335 4336 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4337 unsigned char OpFlags = 0; 4338 4339 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 4340 (Subtarget.getTargetTriple().isMacOSX() && 4341 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) || 4342 (Subtarget.isTargetELF() && !isPPC64 && 4343 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 4344 // PC-relative references to external symbols should go through $stub, 4345 // unless we're building with the leopard linker or later, which 4346 // automatically synthesizes these stubs. 4347 OpFlags = PPCII::MO_PLT_OR_STUB; 4348 } 4349 4350 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 4351 OpFlags); 4352 needIndirectCall = false; 4353 } 4354 4355 if (IsPatchPoint) { 4356 // We'll form an invalid direct call when lowering a patchpoint; the full 4357 // sequence for an indirect call is complicated, and many of the 4358 // instructions introduced might have side effects (and, thus, can't be 4359 // removed later). The call itself will be removed as soon as the 4360 // argument/return lowering is complete, so the fact that it has the wrong 4361 // kind of operands should not really matter. 4362 needIndirectCall = false; 4363 } 4364 4365 if (needIndirectCall) { 4366 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 4367 // to do the call, we can't use PPCISD::CALL. 4368 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 4369 4370 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 4371 // Function pointers in the 64-bit SVR4 ABI do not point to the function 4372 // entry point, but to the function descriptor (the function entry point 4373 // address is part of the function descriptor though). 4374 // The function descriptor is a three doubleword structure with the 4375 // following fields: function entry point, TOC base address and 4376 // environment pointer. 4377 // Thus for a call through a function pointer, the following actions need 4378 // to be performed: 4379 // 1. Save the TOC of the caller in the TOC save area of its stack 4380 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 4381 // 2. Load the address of the function entry point from the function 4382 // descriptor. 4383 // 3. Load the TOC of the callee from the function descriptor into r2. 4384 // 4. Load the environment pointer from the function descriptor into 4385 // r11. 4386 // 5. Branch to the function entry point address. 4387 // 6. On return of the callee, the TOC of the caller needs to be 4388 // restored (this is done in FinishCall()). 4389 // 4390 // The loads are scheduled at the beginning of the call sequence, and the 4391 // register copies are flagged together to ensure that no other 4392 // operations can be scheduled in between. E.g. without flagging the 4393 // copies together, a TOC access in the caller could be scheduled between 4394 // the assignment of the callee TOC and the branch to the callee, which 4395 // results in the TOC access going through the TOC of the callee instead 4396 // of going through the TOC of the caller, which leads to incorrect code. 4397 4398 // Load the address of the function entry point from the function 4399 // descriptor. 4400 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); 4401 if (LDChain.getValueType() == MVT::Glue) 4402 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); 4403 4404 bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors(); 4405 4406 MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr); 4407 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, 4408 false, false, LoadsInv, 8); 4409 4410 // Load environment pointer into r11. 4411 SDValue PtrOff = DAG.getIntPtrConstant(16, dl); 4412 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 4413 SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, 4414 MPI.getWithOffset(16), false, false, 4415 LoadsInv, 8); 4416 4417 SDValue TOCOff = DAG.getIntPtrConstant(8, dl); 4418 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 4419 SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, 4420 MPI.getWithOffset(8), false, false, 4421 LoadsInv, 8); 4422 4423 setUsesTOCBasePtr(DAG); 4424 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, 4425 InFlag); 4426 Chain = TOCVal.getValue(0); 4427 InFlag = TOCVal.getValue(1); 4428 4429 // If the function call has an explicit 'nest' parameter, it takes the 4430 // place of the environment pointer. 4431 if (!hasNest) { 4432 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 4433 InFlag); 4434 4435 Chain = EnvVal.getValue(0); 4436 InFlag = EnvVal.getValue(1); 4437 } 4438 4439 MTCTROps[0] = Chain; 4440 MTCTROps[1] = LoadFuncPtr; 4441 MTCTROps[2] = InFlag; 4442 } 4443 4444 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 4445 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 4446 InFlag = Chain.getValue(1); 4447 4448 NodeTys.clear(); 4449 NodeTys.push_back(MVT::Other); 4450 NodeTys.push_back(MVT::Glue); 4451 Ops.push_back(Chain); 4452 CallOpc = PPCISD::BCTRL; 4453 Callee.setNode(nullptr); 4454 // Add use of X11 (holding environment pointer) 4455 if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) 4456 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 4457 // Add CTR register as callee so a bctr can be emitted later. 4458 if (isTailCall) 4459 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 4460 } 4461 4462 // If this is a direct call, pass the chain and the callee. 4463 if (Callee.getNode()) { 4464 Ops.push_back(Chain); 4465 Ops.push_back(Callee); 4466 } 4467 // If this is a tail call add stack pointer delta. 4468 if (isTailCall) 4469 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); 4470 4471 // Add argument registers to the end of the list so that they are known live 4472 // into the call. 4473 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 4474 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 4475 RegsToPass[i].second.getValueType())); 4476 4477 // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live 4478 // into the call. 4479 if (isSVR4ABI && isPPC64 && !IsPatchPoint) { 4480 setUsesTOCBasePtr(DAG); 4481 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); 4482 } 4483 4484 return CallOpc; 4485 } 4486 4487 static 4488 bool isLocalCall(const SDValue &Callee) 4489 { 4490 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4491 return G->getGlobal()->isStrongDefinitionForLinker(); 4492 return false; 4493 } 4494 4495 SDValue 4496 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 4497 CallingConv::ID CallConv, bool isVarArg, 4498 const SmallVectorImpl<ISD::InputArg> &Ins, 4499 SDLoc dl, SelectionDAG &DAG, 4500 SmallVectorImpl<SDValue> &InVals) const { 4501 4502 SmallVector<CCValAssign, 16> RVLocs; 4503 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 4504 *DAG.getContext()); 4505 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 4506 4507 // Copy all of the result registers out of their specified physreg. 4508 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 4509 CCValAssign &VA = RVLocs[i]; 4510 assert(VA.isRegLoc() && "Can only return in registers!"); 4511 4512 SDValue Val = DAG.getCopyFromReg(Chain, dl, 4513 VA.getLocReg(), VA.getLocVT(), InFlag); 4514 Chain = Val.getValue(1); 4515 InFlag = Val.getValue(2); 4516 4517 switch (VA.getLocInfo()) { 4518 default: llvm_unreachable("Unknown loc info!"); 4519 case CCValAssign::Full: break; 4520 case CCValAssign::AExt: 4521 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4522 break; 4523 case CCValAssign::ZExt: 4524 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 4525 DAG.getValueType(VA.getValVT())); 4526 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4527 break; 4528 case CCValAssign::SExt: 4529 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 4530 DAG.getValueType(VA.getValVT())); 4531 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4532 break; 4533 } 4534 4535 InVals.push_back(Val); 4536 } 4537 4538 return Chain; 4539 } 4540 4541 SDValue 4542 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, 4543 bool isTailCall, bool isVarArg, bool IsPatchPoint, 4544 bool hasNest, SelectionDAG &DAG, 4545 SmallVector<std::pair<unsigned, SDValue>, 8> 4546 &RegsToPass, 4547 SDValue InFlag, SDValue Chain, 4548 SDValue CallSeqStart, SDValue &Callee, 4549 int SPDiff, unsigned NumBytes, 4550 const SmallVectorImpl<ISD::InputArg> &Ins, 4551 SmallVectorImpl<SDValue> &InVals, 4552 ImmutableCallSite *CS) const { 4553 4554 std::vector<EVT> NodeTys; 4555 SmallVector<SDValue, 8> Ops; 4556 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, 4557 SPDiff, isTailCall, IsPatchPoint, hasNest, 4558 RegsToPass, Ops, NodeTys, CS, Subtarget); 4559 4560 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 4561 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 4562 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 4563 4564 // When performing tail call optimization the callee pops its arguments off 4565 // the stack. Account for this here so these bytes can be pushed back on in 4566 // PPCFrameLowering::eliminateCallFramePseudoInstr. 4567 int BytesCalleePops = 4568 (CallConv == CallingConv::Fast && 4569 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 4570 4571 // Add a register mask operand representing the call-preserved registers. 4572 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4573 const uint32_t *Mask = 4574 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); 4575 assert(Mask && "Missing call preserved mask for calling convention"); 4576 Ops.push_back(DAG.getRegisterMask(Mask)); 4577 4578 if (InFlag.getNode()) 4579 Ops.push_back(InFlag); 4580 4581 // Emit tail call. 4582 if (isTailCall) { 4583 assert(((Callee.getOpcode() == ISD::Register && 4584 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 4585 Callee.getOpcode() == ISD::TargetExternalSymbol || 4586 Callee.getOpcode() == ISD::TargetGlobalAddress || 4587 isa<ConstantSDNode>(Callee)) && 4588 "Expecting an global address, external symbol, absolute value or register"); 4589 4590 DAG.getMachineFunction().getFrameInfo()->setHasTailCall(); 4591 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 4592 } 4593 4594 // Add a NOP immediately after the branch instruction when using the 64-bit 4595 // SVR4 ABI. At link time, if caller and callee are in a different module and 4596 // thus have a different TOC, the call will be replaced with a call to a stub 4597 // function which saves the current TOC, loads the TOC of the callee and 4598 // branches to the callee. The NOP will be replaced with a load instruction 4599 // which restores the TOC of the caller from the TOC save slot of the current 4600 // stack frame. If caller and callee belong to the same module (and have the 4601 // same TOC), the NOP will remain unchanged. 4602 4603 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && 4604 !IsPatchPoint) { 4605 if (CallOpc == PPCISD::BCTRL) { 4606 // This is a call through a function pointer. 4607 // Restore the caller TOC from the save area into R2. 4608 // See PrepareCall() for more information about calls through function 4609 // pointers in the 64-bit SVR4 ABI. 4610 // We are using a target-specific load with r2 hard coded, because the 4611 // result of a target-independent load would never go directly into r2, 4612 // since r2 is a reserved register (which prevents the register allocator 4613 // from allocating it), resulting in an additional register being 4614 // allocated and an unnecessary move instruction being generated. 4615 CallOpc = PPCISD::BCTRL_LOAD_TOC; 4616 4617 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4618 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 4619 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 4620 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 4621 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 4622 4623 // The address needs to go after the chain input but before the flag (or 4624 // any other variadic arguments). 4625 Ops.insert(std::next(Ops.begin()), AddTOC); 4626 } else if ((CallOpc == PPCISD::CALL) && 4627 (!isLocalCall(Callee) || 4628 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) 4629 // Otherwise insert NOP for non-local calls. 4630 CallOpc = PPCISD::CALL_NOP; 4631 } 4632 4633 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 4634 InFlag = Chain.getValue(1); 4635 4636 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4637 DAG.getIntPtrConstant(BytesCalleePops, dl, true), 4638 InFlag, dl); 4639 if (!Ins.empty()) 4640 InFlag = Chain.getValue(1); 4641 4642 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 4643 Ins, dl, DAG, InVals); 4644 } 4645 4646 SDValue 4647 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 4648 SmallVectorImpl<SDValue> &InVals) const { 4649 SelectionDAG &DAG = CLI.DAG; 4650 SDLoc &dl = CLI.DL; 4651 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4652 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4653 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4654 SDValue Chain = CLI.Chain; 4655 SDValue Callee = CLI.Callee; 4656 bool &isTailCall = CLI.IsTailCall; 4657 CallingConv::ID CallConv = CLI.CallConv; 4658 bool isVarArg = CLI.IsVarArg; 4659 bool IsPatchPoint = CLI.IsPatchPoint; 4660 ImmutableCallSite *CS = CLI.CS; 4661 4662 if (isTailCall) { 4663 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) 4664 isTailCall = 4665 IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, 4666 isVarArg, Outs, Ins, DAG); 4667 else 4668 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 4669 Ins, DAG); 4670 if (isTailCall) { 4671 ++NumTailCalls; 4672 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4673 ++NumSiblingCalls; 4674 4675 assert(isa<GlobalAddressSDNode>(Callee) && 4676 "Callee should be an llvm::Function object."); 4677 DEBUG( 4678 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); 4679 const unsigned Width = 80 - strlen("TCO caller: ") 4680 - strlen(", callee linkage: 0, 0"); 4681 dbgs() << "TCO caller: " 4682 << left_justify(DAG.getMachineFunction().getName(), Width) 4683 << ", callee linkage: " 4684 << GV->getVisibility() << ", " << GV->getLinkage() << "\n" 4685 ); 4686 } 4687 } 4688 4689 if (!isTailCall && CS && CS->isMustTailCall()) 4690 report_fatal_error("failed to perform tail call elimination on a call " 4691 "site marked musttail"); 4692 4693 if (Subtarget.isSVR4ABI()) { 4694 if (Subtarget.isPPC64()) 4695 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 4696 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 4697 dl, DAG, InVals, CS); 4698 else 4699 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 4700 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 4701 dl, DAG, InVals, CS); 4702 } 4703 4704 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 4705 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 4706 dl, DAG, InVals, CS); 4707 } 4708 4709 SDValue 4710 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, 4711 CallingConv::ID CallConv, bool isVarArg, 4712 bool isTailCall, bool IsPatchPoint, 4713 const SmallVectorImpl<ISD::OutputArg> &Outs, 4714 const SmallVectorImpl<SDValue> &OutVals, 4715 const SmallVectorImpl<ISD::InputArg> &Ins, 4716 SDLoc dl, SelectionDAG &DAG, 4717 SmallVectorImpl<SDValue> &InVals, 4718 ImmutableCallSite *CS) const { 4719 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 4720 // of the 32-bit SVR4 ABI stack frame layout. 4721 4722 assert((CallConv == CallingConv::C || 4723 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 4724 4725 unsigned PtrByteSize = 4; 4726 4727 MachineFunction &MF = DAG.getMachineFunction(); 4728 4729 // Mark this function as potentially containing a function that contains a 4730 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4731 // and restoring the callers stack pointer in this functions epilog. This is 4732 // done because by tail calling the called function might overwrite the value 4733 // in this function's (MF) stack pointer stack slot 0(SP). 4734 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4735 CallConv == CallingConv::Fast) 4736 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4737 4738 // Count how many bytes are to be pushed on the stack, including the linkage 4739 // area, parameter list area and the part of the local variable space which 4740 // contains copies of aggregates which are passed by value. 4741 4742 // Assign locations to all of the outgoing arguments. 4743 SmallVector<CCValAssign, 16> ArgLocs; 4744 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 4745 *DAG.getContext()); 4746 4747 // Reserve space for the linkage area on the stack. 4748 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), 4749 PtrByteSize); 4750 4751 if (isVarArg) { 4752 // Handle fixed and variable vector arguments differently. 4753 // Fixed vector arguments go into registers as long as registers are 4754 // available. Variable vector arguments always go into memory. 4755 unsigned NumArgs = Outs.size(); 4756 4757 for (unsigned i = 0; i != NumArgs; ++i) { 4758 MVT ArgVT = Outs[i].VT; 4759 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4760 bool Result; 4761 4762 if (Outs[i].IsFixed) { 4763 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 4764 CCInfo); 4765 } else { 4766 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 4767 ArgFlags, CCInfo); 4768 } 4769 4770 if (Result) { 4771 #ifndef NDEBUG 4772 errs() << "Call operand #" << i << " has unhandled type " 4773 << EVT(ArgVT).getEVTString() << "\n"; 4774 #endif 4775 llvm_unreachable(nullptr); 4776 } 4777 } 4778 } else { 4779 // All arguments are treated the same. 4780 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 4781 } 4782 4783 // Assign locations to all of the outgoing aggregate by value arguments. 4784 SmallVector<CCValAssign, 16> ByValArgLocs; 4785 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4786 ByValArgLocs, *DAG.getContext()); 4787 4788 // Reserve stack space for the allocations in CCInfo. 4789 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 4790 4791 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 4792 4793 // Size of the linkage area, parameter list area and the part of the local 4794 // space variable where copies of aggregates which are passed by value are 4795 // stored. 4796 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 4797 4798 // Calculate by how many bytes the stack has to be adjusted in case of tail 4799 // call optimization. 4800 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4801 4802 // Adjust the stack pointer for the new arguments... 4803 // These operations are automatically eliminated by the prolog/epilog pass 4804 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4805 dl); 4806 SDValue CallSeqStart = Chain; 4807 4808 // Load the return address and frame pointer so it can be moved somewhere else 4809 // later. 4810 SDValue LROp, FPOp; 4811 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 4812 dl); 4813 4814 // Set up a copy of the stack pointer for use loading and storing any 4815 // arguments that may not fit in the registers available for argument 4816 // passing. 4817 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4818 4819 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4820 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4821 SmallVector<SDValue, 8> MemOpChains; 4822 4823 bool seenFloatArg = false; 4824 // Walk the register/memloc assignments, inserting copies/loads. 4825 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 4826 i != e; 4827 ++i) { 4828 CCValAssign &VA = ArgLocs[i]; 4829 SDValue Arg = OutVals[i]; 4830 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4831 4832 if (Flags.isByVal()) { 4833 // Argument is an aggregate which is passed by value, thus we need to 4834 // create a copy of it in the local variable space of the current stack 4835 // frame (which is the stack frame of the caller) and pass the address of 4836 // this copy to the callee. 4837 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 4838 CCValAssign &ByValVA = ByValArgLocs[j++]; 4839 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 4840 4841 // Memory reserved in the local variable space of the callers stack frame. 4842 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 4843 4844 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 4845 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 4846 StackPtr, PtrOff); 4847 4848 // Create a copy of the argument in the local area of the current 4849 // stack frame. 4850 SDValue MemcpyCall = 4851 CreateCopyOfByValArgument(Arg, PtrOff, 4852 CallSeqStart.getNode()->getOperand(0), 4853 Flags, DAG, dl); 4854 4855 // This must go outside the CALLSEQ_START..END. 4856 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4857 CallSeqStart.getNode()->getOperand(1), 4858 SDLoc(MemcpyCall)); 4859 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4860 NewCallSeqStart.getNode()); 4861 Chain = CallSeqStart = NewCallSeqStart; 4862 4863 // Pass the address of the aggregate copy on the stack either in a 4864 // physical register or in the parameter list area of the current stack 4865 // frame to the callee. 4866 Arg = PtrOff; 4867 } 4868 4869 if (VA.isRegLoc()) { 4870 if (Arg.getValueType() == MVT::i1) 4871 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); 4872 4873 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 4874 // Put argument in a physical register. 4875 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 4876 } else { 4877 // Put argument in the parameter list area of the current stack frame. 4878 assert(VA.isMemLoc()); 4879 unsigned LocMemOffset = VA.getLocMemOffset(); 4880 4881 if (!isTailCall) { 4882 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 4883 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 4884 StackPtr, PtrOff); 4885 4886 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 4887 MachinePointerInfo(), 4888 false, false, 0)); 4889 } else { 4890 // Calculate and remember argument location. 4891 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 4892 TailCallArguments); 4893 } 4894 } 4895 } 4896 4897 if (!MemOpChains.empty()) 4898 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4899 4900 // Build a sequence of copy-to-reg nodes chained together with token chain 4901 // and flag operands which copy the outgoing args into the appropriate regs. 4902 SDValue InFlag; 4903 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4904 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4905 RegsToPass[i].second, InFlag); 4906 InFlag = Chain.getValue(1); 4907 } 4908 4909 // Set CR bit 6 to true if this is a vararg call with floating args passed in 4910 // registers. 4911 if (isVarArg) { 4912 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 4913 SDValue Ops[] = { Chain, InFlag }; 4914 4915 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 4916 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 4917 4918 InFlag = Chain.getValue(1); 4919 } 4920 4921 if (isTailCall) 4922 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 4923 false, TailCallArguments); 4924 4925 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, 4926 /* unused except on PPC64 ELFv1 */ false, DAG, 4927 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 4928 NumBytes, Ins, InVals, CS); 4929 } 4930 4931 // Copy an argument into memory, being careful to do this outside the 4932 // call sequence for the call to which the argument belongs. 4933 SDValue 4934 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 4935 SDValue CallSeqStart, 4936 ISD::ArgFlagsTy Flags, 4937 SelectionDAG &DAG, 4938 SDLoc dl) const { 4939 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 4940 CallSeqStart.getNode()->getOperand(0), 4941 Flags, DAG, dl); 4942 // The MEMCPY must go outside the CALLSEQ_START..END. 4943 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4944 CallSeqStart.getNode()->getOperand(1), 4945 SDLoc(MemcpyCall)); 4946 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4947 NewCallSeqStart.getNode()); 4948 return NewCallSeqStart; 4949 } 4950 4951 SDValue 4952 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, 4953 CallingConv::ID CallConv, bool isVarArg, 4954 bool isTailCall, bool IsPatchPoint, 4955 const SmallVectorImpl<ISD::OutputArg> &Outs, 4956 const SmallVectorImpl<SDValue> &OutVals, 4957 const SmallVectorImpl<ISD::InputArg> &Ins, 4958 SDLoc dl, SelectionDAG &DAG, 4959 SmallVectorImpl<SDValue> &InVals, 4960 ImmutableCallSite *CS) const { 4961 4962 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4963 bool isLittleEndian = Subtarget.isLittleEndian(); 4964 unsigned NumOps = Outs.size(); 4965 bool hasNest = false; 4966 bool IsSibCall = false; 4967 4968 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4969 unsigned PtrByteSize = 8; 4970 4971 MachineFunction &MF = DAG.getMachineFunction(); 4972 4973 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) 4974 IsSibCall = true; 4975 4976 // Mark this function as potentially containing a function that contains a 4977 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4978 // and restoring the callers stack pointer in this functions epilog. This is 4979 // done because by tail calling the called function might overwrite the value 4980 // in this function's (MF) stack pointer stack slot 0(SP). 4981 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4982 CallConv == CallingConv::Fast) 4983 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4984 4985 assert(!(CallConv == CallingConv::Fast && isVarArg) && 4986 "fastcc not supported on varargs functions"); 4987 4988 // Count how many bytes are to be pushed on the stack, including the linkage 4989 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 4990 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 4991 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 4992 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 4993 unsigned NumBytes = LinkageSize; 4994 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4995 unsigned &QFPR_idx = FPR_idx; 4996 4997 static const MCPhysReg GPR[] = { 4998 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4999 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5000 }; 5001 static const MCPhysReg VR[] = { 5002 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5003 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5004 }; 5005 static const MCPhysReg VSRH[] = { 5006 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 5007 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 5008 }; 5009 5010 const unsigned NumGPRs = array_lengthof(GPR); 5011 const unsigned NumFPRs = 13; 5012 const unsigned NumVRs = array_lengthof(VR); 5013 const unsigned NumQFPRs = NumFPRs; 5014 5015 // When using the fast calling convention, we don't provide backing for 5016 // arguments that will be in registers. 5017 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; 5018 5019 // Add up all the space actually used. 5020 for (unsigned i = 0; i != NumOps; ++i) { 5021 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5022 EVT ArgVT = Outs[i].VT; 5023 EVT OrigVT = Outs[i].ArgVT; 5024 5025 if (Flags.isNest()) 5026 continue; 5027 5028 if (CallConv == CallingConv::Fast) { 5029 if (Flags.isByVal()) 5030 NumGPRsUsed += (Flags.getByValSize()+7)/8; 5031 else 5032 switch (ArgVT.getSimpleVT().SimpleTy) { 5033 default: llvm_unreachable("Unexpected ValueType for argument!"); 5034 case MVT::i1: 5035 case MVT::i32: 5036 case MVT::i64: 5037 if (++NumGPRsUsed <= NumGPRs) 5038 continue; 5039 break; 5040 case MVT::v4i32: 5041 case MVT::v8i16: 5042 case MVT::v16i8: 5043 case MVT::v2f64: 5044 case MVT::v2i64: 5045 case MVT::v1i128: 5046 if (++NumVRsUsed <= NumVRs) 5047 continue; 5048 break; 5049 case MVT::v4f32: 5050 // When using QPX, this is handled like a FP register, otherwise, it 5051 // is an Altivec register. 5052 if (Subtarget.hasQPX()) { 5053 if (++NumFPRsUsed <= NumFPRs) 5054 continue; 5055 } else { 5056 if (++NumVRsUsed <= NumVRs) 5057 continue; 5058 } 5059 break; 5060 case MVT::f32: 5061 case MVT::f64: 5062 case MVT::v4f64: // QPX 5063 case MVT::v4i1: // QPX 5064 if (++NumFPRsUsed <= NumFPRs) 5065 continue; 5066 break; 5067 } 5068 } 5069 5070 /* Respect alignment of argument on the stack. */ 5071 unsigned Align = 5072 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5073 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 5074 5075 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5076 if (Flags.isInConsecutiveRegsLast()) 5077 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5078 } 5079 5080 unsigned NumBytesActuallyUsed = NumBytes; 5081 5082 // The prolog code of the callee may store up to 8 GPR argument registers to 5083 // the stack, allowing va_start to index over them in memory if its varargs. 5084 // Because we cannot tell if this is needed on the caller side, we have to 5085 // conservatively assume that it is needed. As such, make sure we have at 5086 // least enough stack space for the caller to store the 8 GPRs. 5087 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. 5088 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5089 5090 // Tail call needs the stack to be aligned. 5091 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5092 CallConv == CallingConv::Fast) 5093 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5094 5095 int SPDiff = 0; 5096 5097 // Calculate by how many bytes the stack has to be adjusted in case of tail 5098 // call optimization. 5099 if (!IsSibCall) 5100 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5101 5102 // To protect arguments on the stack from being clobbered in a tail call, 5103 // force all the loads to happen before doing any other lowering. 5104 if (isTailCall) 5105 Chain = DAG.getStackArgumentTokenFactor(Chain); 5106 5107 // Adjust the stack pointer for the new arguments... 5108 // These operations are automatically eliminated by the prolog/epilog pass 5109 if (!IsSibCall) 5110 Chain = DAG.getCALLSEQ_START(Chain, 5111 DAG.getIntPtrConstant(NumBytes, dl, true), dl); 5112 SDValue CallSeqStart = Chain; 5113 5114 // Load the return address and frame pointer so it can be move somewhere else 5115 // later. 5116 SDValue LROp, FPOp; 5117 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 5118 dl); 5119 5120 // Set up a copy of the stack pointer for use loading and storing any 5121 // arguments that may not fit in the registers available for argument 5122 // passing. 5123 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5124 5125 // Figure out which arguments are going to go in registers, and which in 5126 // memory. Also, if this is a vararg function, floating point operations 5127 // must be stored to our stack, and loaded into integer regs as well, if 5128 // any integer regs are available for argument passing. 5129 unsigned ArgOffset = LinkageSize; 5130 5131 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5132 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5133 5134 SmallVector<SDValue, 8> MemOpChains; 5135 for (unsigned i = 0; i != NumOps; ++i) { 5136 SDValue Arg = OutVals[i]; 5137 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5138 EVT ArgVT = Outs[i].VT; 5139 EVT OrigVT = Outs[i].ArgVT; 5140 5141 // PtrOff will be used to store the current argument to the stack if a 5142 // register cannot be found for it. 5143 SDValue PtrOff; 5144 5145 // We re-align the argument offset for each argument, except when using the 5146 // fast calling convention, when we need to make sure we do that only when 5147 // we'll actually use a stack slot. 5148 auto ComputePtrOff = [&]() { 5149 /* Respect alignment of argument on the stack. */ 5150 unsigned Align = 5151 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5152 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 5153 5154 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5155 5156 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5157 }; 5158 5159 if (CallConv != CallingConv::Fast) { 5160 ComputePtrOff(); 5161 5162 /* Compute GPR index associated with argument offset. */ 5163 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 5164 GPR_idx = std::min(GPR_idx, NumGPRs); 5165 } 5166 5167 // Promote integers to 64-bit values. 5168 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 5169 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5170 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5171 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5172 } 5173 5174 // FIXME memcpy is used way more than necessary. Correctness first. 5175 // Note: "by value" is code for passing a structure by value, not 5176 // basic types. 5177 if (Flags.isByVal()) { 5178 // Note: Size includes alignment padding, so 5179 // struct x { short a; char b; } 5180 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 5181 // These are the proper values we need for right-justifying the 5182 // aggregate in a parameter register. 5183 unsigned Size = Flags.getByValSize(); 5184 5185 // An empty aggregate parameter takes up no storage and no 5186 // registers. 5187 if (Size == 0) 5188 continue; 5189 5190 if (CallConv == CallingConv::Fast) 5191 ComputePtrOff(); 5192 5193 // All aggregates smaller than 8 bytes must be passed right-justified. 5194 if (Size==1 || Size==2 || Size==4) { 5195 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 5196 if (GPR_idx != NumGPRs) { 5197 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5198 MachinePointerInfo(), VT, 5199 false, false, false, 0); 5200 MemOpChains.push_back(Load.getValue(1)); 5201 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5202 5203 ArgOffset += PtrByteSize; 5204 continue; 5205 } 5206 } 5207 5208 if (GPR_idx == NumGPRs && Size < 8) { 5209 SDValue AddPtr = PtrOff; 5210 if (!isLittleEndian) { 5211 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5212 PtrOff.getValueType()); 5213 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5214 } 5215 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5216 CallSeqStart, 5217 Flags, DAG, dl); 5218 ArgOffset += PtrByteSize; 5219 continue; 5220 } 5221 // Copy entire object into memory. There are cases where gcc-generated 5222 // code assumes it is there, even if it could be put entirely into 5223 // registers. (This is not what the doc says.) 5224 5225 // FIXME: The above statement is likely due to a misunderstanding of the 5226 // documents. All arguments must be copied into the parameter area BY 5227 // THE CALLEE in the event that the callee takes the address of any 5228 // formal argument. That has not yet been implemented. However, it is 5229 // reasonable to use the stack area as a staging area for the register 5230 // load. 5231 5232 // Skip this for small aggregates, as we will use the same slot for a 5233 // right-justified copy, below. 5234 if (Size >= 8) 5235 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5236 CallSeqStart, 5237 Flags, DAG, dl); 5238 5239 // When a register is available, pass a small aggregate right-justified. 5240 if (Size < 8 && GPR_idx != NumGPRs) { 5241 // The easiest way to get this right-justified in a register 5242 // is to copy the structure into the rightmost portion of a 5243 // local variable slot, then load the whole slot into the 5244 // register. 5245 // FIXME: The memcpy seems to produce pretty awful code for 5246 // small aggregates, particularly for packed ones. 5247 // FIXME: It would be preferable to use the slot in the 5248 // parameter save area instead of a new local variable. 5249 SDValue AddPtr = PtrOff; 5250 if (!isLittleEndian) { 5251 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); 5252 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5253 } 5254 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5255 CallSeqStart, 5256 Flags, DAG, dl); 5257 5258 // Load the slot into the register. 5259 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, 5260 MachinePointerInfo(), 5261 false, false, false, 0); 5262 MemOpChains.push_back(Load.getValue(1)); 5263 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5264 5265 // Done with this argument. 5266 ArgOffset += PtrByteSize; 5267 continue; 5268 } 5269 5270 // For aggregates larger than PtrByteSize, copy the pieces of the 5271 // object that fit into registers from the parameter save area. 5272 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5273 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5274 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5275 if (GPR_idx != NumGPRs) { 5276 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 5277 MachinePointerInfo(), 5278 false, false, false, 0); 5279 MemOpChains.push_back(Load.getValue(1)); 5280 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5281 ArgOffset += PtrByteSize; 5282 } else { 5283 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5284 break; 5285 } 5286 } 5287 continue; 5288 } 5289 5290 switch (Arg.getSimpleValueType().SimpleTy) { 5291 default: llvm_unreachable("Unexpected ValueType for argument!"); 5292 case MVT::i1: 5293 case MVT::i32: 5294 case MVT::i64: 5295 if (Flags.isNest()) { 5296 // The 'nest' parameter, if any, is passed in R11. 5297 RegsToPass.push_back(std::make_pair(PPC::X11, Arg)); 5298 hasNest = true; 5299 break; 5300 } 5301 5302 // These can be scalar arguments or elements of an integer array type 5303 // passed directly. Clang may use those instead of "byval" aggregate 5304 // types to avoid forcing arguments to memory unnecessarily. 5305 if (GPR_idx != NumGPRs) { 5306 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5307 } else { 5308 if (CallConv == CallingConv::Fast) 5309 ComputePtrOff(); 5310 5311 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5312 true, isTailCall, false, MemOpChains, 5313 TailCallArguments, dl); 5314 if (CallConv == CallingConv::Fast) 5315 ArgOffset += PtrByteSize; 5316 } 5317 if (CallConv != CallingConv::Fast) 5318 ArgOffset += PtrByteSize; 5319 break; 5320 case MVT::f32: 5321 case MVT::f64: { 5322 // These can be scalar arguments or elements of a float array type 5323 // passed directly. The latter are used to implement ELFv2 homogenous 5324 // float aggregates. 5325 5326 // Named arguments go into FPRs first, and once they overflow, the 5327 // remaining arguments go into GPRs and then the parameter save area. 5328 // Unnamed arguments for vararg functions always go to GPRs and 5329 // then the parameter save area. For now, put all arguments to vararg 5330 // routines always in both locations (FPR *and* GPR or stack slot). 5331 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 5332 bool NeededLoad = false; 5333 5334 // First load the argument into the next available FPR. 5335 if (FPR_idx != NumFPRs) 5336 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5337 5338 // Next, load the argument into GPR or stack slot if needed. 5339 if (!NeedGPROrStack) 5340 ; 5341 else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) { 5342 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 5343 // once we support fp <-> gpr moves. 5344 5345 // In the non-vararg case, this can only ever happen in the 5346 // presence of f32 array types, since otherwise we never run 5347 // out of FPRs before running out of GPRs. 5348 SDValue ArgVal; 5349 5350 // Double values are always passed in a single GPR. 5351 if (Arg.getValueType() != MVT::f32) { 5352 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 5353 5354 // Non-array float values are extended and passed in a GPR. 5355 } else if (!Flags.isInConsecutiveRegs()) { 5356 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5357 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5358 5359 // If we have an array of floats, we collect every odd element 5360 // together with its predecessor into one GPR. 5361 } else if (ArgOffset % PtrByteSize != 0) { 5362 SDValue Lo, Hi; 5363 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 5364 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5365 if (!isLittleEndian) 5366 std::swap(Lo, Hi); 5367 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 5368 5369 // The final element, if even, goes into the first half of a GPR. 5370 } else if (Flags.isInConsecutiveRegsLast()) { 5371 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5372 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5373 if (!isLittleEndian) 5374 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 5375 DAG.getConstant(32, dl, MVT::i32)); 5376 5377 // Non-final even elements are skipped; they will be handled 5378 // together the with subsequent argument on the next go-around. 5379 } else 5380 ArgVal = SDValue(); 5381 5382 if (ArgVal.getNode()) 5383 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); 5384 } else { 5385 if (CallConv == CallingConv::Fast) 5386 ComputePtrOff(); 5387 5388 // Single-precision floating-point values are mapped to the 5389 // second (rightmost) word of the stack doubleword. 5390 if (Arg.getValueType() == MVT::f32 && 5391 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 5392 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 5393 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5394 } 5395 5396 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5397 true, isTailCall, false, MemOpChains, 5398 TailCallArguments, dl); 5399 5400 NeededLoad = true; 5401 } 5402 // When passing an array of floats, the array occupies consecutive 5403 // space in the argument area; only round up to the next doubleword 5404 // at the end of the array. Otherwise, each float takes 8 bytes. 5405 if (CallConv != CallingConv::Fast || NeededLoad) { 5406 ArgOffset += (Arg.getValueType() == MVT::f32 && 5407 Flags.isInConsecutiveRegs()) ? 4 : 8; 5408 if (Flags.isInConsecutiveRegsLast()) 5409 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5410 } 5411 break; 5412 } 5413 case MVT::v4f32: 5414 case MVT::v4i32: 5415 case MVT::v8i16: 5416 case MVT::v16i8: 5417 case MVT::v2f64: 5418 case MVT::v2i64: 5419 case MVT::v1i128: 5420 if (!Subtarget.hasQPX()) { 5421 // These can be scalar arguments or elements of a vector array type 5422 // passed directly. The latter are used to implement ELFv2 homogenous 5423 // vector aggregates. 5424 5425 // For a varargs call, named arguments go into VRs or on the stack as 5426 // usual; unnamed arguments always go to the stack or the corresponding 5427 // GPRs when within range. For now, we always put the value in both 5428 // locations (or even all three). 5429 if (isVarArg) { 5430 // We could elide this store in the case where the object fits 5431 // entirely in R registers. Maybe later. 5432 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 5433 MachinePointerInfo(), false, false, 0); 5434 MemOpChains.push_back(Store); 5435 if (VR_idx != NumVRs) { 5436 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 5437 MachinePointerInfo(), 5438 false, false, false, 0); 5439 MemOpChains.push_back(Load.getValue(1)); 5440 5441 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 5442 Arg.getSimpleValueType() == MVT::v2i64) ? 5443 VSRH[VR_idx] : VR[VR_idx]; 5444 ++VR_idx; 5445 5446 RegsToPass.push_back(std::make_pair(VReg, Load)); 5447 } 5448 ArgOffset += 16; 5449 for (unsigned i=0; i<16; i+=PtrByteSize) { 5450 if (GPR_idx == NumGPRs) 5451 break; 5452 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5453 DAG.getConstant(i, dl, PtrVT)); 5454 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 5455 false, false, false, 0); 5456 MemOpChains.push_back(Load.getValue(1)); 5457 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5458 } 5459 break; 5460 } 5461 5462 // Non-varargs Altivec params go into VRs or on the stack. 5463 if (VR_idx != NumVRs) { 5464 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 5465 Arg.getSimpleValueType() == MVT::v2i64) ? 5466 VSRH[VR_idx] : VR[VR_idx]; 5467 ++VR_idx; 5468 5469 RegsToPass.push_back(std::make_pair(VReg, Arg)); 5470 } else { 5471 if (CallConv == CallingConv::Fast) 5472 ComputePtrOff(); 5473 5474 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5475 true, isTailCall, true, MemOpChains, 5476 TailCallArguments, dl); 5477 if (CallConv == CallingConv::Fast) 5478 ArgOffset += 16; 5479 } 5480 5481 if (CallConv != CallingConv::Fast) 5482 ArgOffset += 16; 5483 break; 5484 } // not QPX 5485 5486 assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && 5487 "Invalid QPX parameter type"); 5488 5489 /* fall through */ 5490 case MVT::v4f64: 5491 case MVT::v4i1: { 5492 bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; 5493 if (isVarArg) { 5494 // We could elide this store in the case where the object fits 5495 // entirely in R registers. Maybe later. 5496 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 5497 MachinePointerInfo(), false, false, 0); 5498 MemOpChains.push_back(Store); 5499 if (QFPR_idx != NumQFPRs) { 5500 SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, 5501 Store, PtrOff, MachinePointerInfo(), 5502 false, false, false, 0); 5503 MemOpChains.push_back(Load.getValue(1)); 5504 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); 5505 } 5506 ArgOffset += (IsF32 ? 16 : 32); 5507 for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) { 5508 if (GPR_idx == NumGPRs) 5509 break; 5510 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5511 DAG.getConstant(i, dl, PtrVT)); 5512 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 5513 false, false, false, 0); 5514 MemOpChains.push_back(Load.getValue(1)); 5515 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5516 } 5517 break; 5518 } 5519 5520 // Non-varargs QPX params go into registers or on the stack. 5521 if (QFPR_idx != NumQFPRs) { 5522 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); 5523 } else { 5524 if (CallConv == CallingConv::Fast) 5525 ComputePtrOff(); 5526 5527 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5528 true, isTailCall, true, MemOpChains, 5529 TailCallArguments, dl); 5530 if (CallConv == CallingConv::Fast) 5531 ArgOffset += (IsF32 ? 16 : 32); 5532 } 5533 5534 if (CallConv != CallingConv::Fast) 5535 ArgOffset += (IsF32 ? 16 : 32); 5536 break; 5537 } 5538 } 5539 } 5540 5541 assert(NumBytesActuallyUsed == ArgOffset); 5542 (void)NumBytesActuallyUsed; 5543 5544 if (!MemOpChains.empty()) 5545 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5546 5547 // Check if this is an indirect call (MTCTR/BCTRL). 5548 // See PrepareCall() for more information about calls through function 5549 // pointers in the 64-bit SVR4 ABI. 5550 if (!isTailCall && !IsPatchPoint && 5551 !isFunctionGlobalAddress(Callee) && 5552 !isa<ExternalSymbolSDNode>(Callee)) { 5553 // Load r2 into a virtual register and store it to the TOC save area. 5554 setUsesTOCBasePtr(DAG); 5555 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 5556 // TOC save area offset. 5557 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 5558 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 5559 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5560 Chain = DAG.getStore( 5561 Val.getValue(1), dl, Val, AddPtr, 5562 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset), 5563 false, false, 0); 5564 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 5565 // This does not mean the MTCTR instruction must use R12; it's easier 5566 // to model this as an extra parameter, so do that. 5567 if (isELFv2ABI && !IsPatchPoint) 5568 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 5569 } 5570 5571 // Build a sequence of copy-to-reg nodes chained together with token chain 5572 // and flag operands which copy the outgoing args into the appropriate regs. 5573 SDValue InFlag; 5574 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5575 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5576 RegsToPass[i].second, InFlag); 5577 InFlag = Chain.getValue(1); 5578 } 5579 5580 if (isTailCall && !IsSibCall) 5581 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, 5582 FPOp, true, TailCallArguments); 5583 5584 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, hasNest, 5585 DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, 5586 SPDiff, NumBytes, Ins, InVals, CS); 5587 } 5588 5589 SDValue 5590 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 5591 CallingConv::ID CallConv, bool isVarArg, 5592 bool isTailCall, bool IsPatchPoint, 5593 const SmallVectorImpl<ISD::OutputArg> &Outs, 5594 const SmallVectorImpl<SDValue> &OutVals, 5595 const SmallVectorImpl<ISD::InputArg> &Ins, 5596 SDLoc dl, SelectionDAG &DAG, 5597 SmallVectorImpl<SDValue> &InVals, 5598 ImmutableCallSite *CS) const { 5599 5600 unsigned NumOps = Outs.size(); 5601 5602 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 5603 bool isPPC64 = PtrVT == MVT::i64; 5604 unsigned PtrByteSize = isPPC64 ? 8 : 4; 5605 5606 MachineFunction &MF = DAG.getMachineFunction(); 5607 5608 // Mark this function as potentially containing a function that contains a 5609 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5610 // and restoring the callers stack pointer in this functions epilog. This is 5611 // done because by tail calling the called function might overwrite the value 5612 // in this function's (MF) stack pointer stack slot 0(SP). 5613 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5614 CallConv == CallingConv::Fast) 5615 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5616 5617 // Count how many bytes are to be pushed on the stack, including the linkage 5618 // area, and parameter passing area. We start with 24/48 bytes, which is 5619 // prereserved space for [SP][CR][LR][3 x unused]. 5620 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 5621 unsigned NumBytes = LinkageSize; 5622 5623 // Add up all the space actually used. 5624 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 5625 // they all go in registers, but we must reserve stack space for them for 5626 // possible use by the caller. In varargs or 64-bit calls, parameters are 5627 // assigned stack space in order, with padding so Altivec parameters are 5628 // 16-byte aligned. 5629 unsigned nAltivecParamsAtEnd = 0; 5630 for (unsigned i = 0; i != NumOps; ++i) { 5631 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5632 EVT ArgVT = Outs[i].VT; 5633 // Varargs Altivec parameters are padded to a 16 byte boundary. 5634 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 5635 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 5636 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 5637 if (!isVarArg && !isPPC64) { 5638 // Non-varargs Altivec parameters go after all the non-Altivec 5639 // parameters; handle those later so we know how much padding we need. 5640 nAltivecParamsAtEnd++; 5641 continue; 5642 } 5643 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 5644 NumBytes = ((NumBytes+15)/16)*16; 5645 } 5646 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5647 } 5648 5649 // Allow for Altivec parameters at the end, if needed. 5650 if (nAltivecParamsAtEnd) { 5651 NumBytes = ((NumBytes+15)/16)*16; 5652 NumBytes += 16*nAltivecParamsAtEnd; 5653 } 5654 5655 // The prolog code of the callee may store up to 8 GPR argument registers to 5656 // the stack, allowing va_start to index over them in memory if its varargs. 5657 // Because we cannot tell if this is needed on the caller side, we have to 5658 // conservatively assume that it is needed. As such, make sure we have at 5659 // least enough stack space for the caller to store the 8 GPRs. 5660 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5661 5662 // Tail call needs the stack to be aligned. 5663 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5664 CallConv == CallingConv::Fast) 5665 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5666 5667 // Calculate by how many bytes the stack has to be adjusted in case of tail 5668 // call optimization. 5669 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5670 5671 // To protect arguments on the stack from being clobbered in a tail call, 5672 // force all the loads to happen before doing any other lowering. 5673 if (isTailCall) 5674 Chain = DAG.getStackArgumentTokenFactor(Chain); 5675 5676 // Adjust the stack pointer for the new arguments... 5677 // These operations are automatically eliminated by the prolog/epilog pass 5678 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 5679 dl); 5680 SDValue CallSeqStart = Chain; 5681 5682 // Load the return address and frame pointer so it can be move somewhere else 5683 // later. 5684 SDValue LROp, FPOp; 5685 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 5686 dl); 5687 5688 // Set up a copy of the stack pointer for use loading and storing any 5689 // arguments that may not fit in the registers available for argument 5690 // passing. 5691 SDValue StackPtr; 5692 if (isPPC64) 5693 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5694 else 5695 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 5696 5697 // Figure out which arguments are going to go in registers, and which in 5698 // memory. Also, if this is a vararg function, floating point operations 5699 // must be stored to our stack, and loaded into integer regs as well, if 5700 // any integer regs are available for argument passing. 5701 unsigned ArgOffset = LinkageSize; 5702 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5703 5704 static const MCPhysReg GPR_32[] = { // 32-bit registers. 5705 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 5706 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 5707 }; 5708 static const MCPhysReg GPR_64[] = { // 64-bit registers. 5709 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5710 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5711 }; 5712 static const MCPhysReg VR[] = { 5713 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5714 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5715 }; 5716 const unsigned NumGPRs = array_lengthof(GPR_32); 5717 const unsigned NumFPRs = 13; 5718 const unsigned NumVRs = array_lengthof(VR); 5719 5720 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 5721 5722 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5723 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5724 5725 SmallVector<SDValue, 8> MemOpChains; 5726 for (unsigned i = 0; i != NumOps; ++i) { 5727 SDValue Arg = OutVals[i]; 5728 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5729 5730 // PtrOff will be used to store the current argument to the stack if a 5731 // register cannot be found for it. 5732 SDValue PtrOff; 5733 5734 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5735 5736 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5737 5738 // On PPC64, promote integers to 64-bit values. 5739 if (isPPC64 && Arg.getValueType() == MVT::i32) { 5740 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5741 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5742 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5743 } 5744 5745 // FIXME memcpy is used way more than necessary. Correctness first. 5746 // Note: "by value" is code for passing a structure by value, not 5747 // basic types. 5748 if (Flags.isByVal()) { 5749 unsigned Size = Flags.getByValSize(); 5750 // Very small objects are passed right-justified. Everything else is 5751 // passed left-justified. 5752 if (Size==1 || Size==2) { 5753 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 5754 if (GPR_idx != NumGPRs) { 5755 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5756 MachinePointerInfo(), VT, 5757 false, false, false, 0); 5758 MemOpChains.push_back(Load.getValue(1)); 5759 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5760 5761 ArgOffset += PtrByteSize; 5762 } else { 5763 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5764 PtrOff.getValueType()); 5765 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5766 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5767 CallSeqStart, 5768 Flags, DAG, dl); 5769 ArgOffset += PtrByteSize; 5770 } 5771 continue; 5772 } 5773 // Copy entire object into memory. There are cases where gcc-generated 5774 // code assumes it is there, even if it could be put entirely into 5775 // registers. (This is not what the doc says.) 5776 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5777 CallSeqStart, 5778 Flags, DAG, dl); 5779 5780 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 5781 // copy the pieces of the object that fit into registers from the 5782 // parameter save area. 5783 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5784 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5785 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5786 if (GPR_idx != NumGPRs) { 5787 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 5788 MachinePointerInfo(), 5789 false, false, false, 0); 5790 MemOpChains.push_back(Load.getValue(1)); 5791 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5792 ArgOffset += PtrByteSize; 5793 } else { 5794 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5795 break; 5796 } 5797 } 5798 continue; 5799 } 5800 5801 switch (Arg.getSimpleValueType().SimpleTy) { 5802 default: llvm_unreachable("Unexpected ValueType for argument!"); 5803 case MVT::i1: 5804 case MVT::i32: 5805 case MVT::i64: 5806 if (GPR_idx != NumGPRs) { 5807 if (Arg.getValueType() == MVT::i1) 5808 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 5809 5810 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5811 } else { 5812 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5813 isPPC64, isTailCall, false, MemOpChains, 5814 TailCallArguments, dl); 5815 } 5816 ArgOffset += PtrByteSize; 5817 break; 5818 case MVT::f32: 5819 case MVT::f64: 5820 if (FPR_idx != NumFPRs) { 5821 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5822 5823 if (isVarArg) { 5824 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 5825 MachinePointerInfo(), false, false, 0); 5826 MemOpChains.push_back(Store); 5827 5828 // Float varargs are always shadowed in available integer registers 5829 if (GPR_idx != NumGPRs) { 5830 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 5831 MachinePointerInfo(), false, false, 5832 false, 0); 5833 MemOpChains.push_back(Load.getValue(1)); 5834 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5835 } 5836 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 5837 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 5838 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5839 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 5840 MachinePointerInfo(), 5841 false, false, false, 0); 5842 MemOpChains.push_back(Load.getValue(1)); 5843 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5844 } 5845 } else { 5846 // If we have any FPRs remaining, we may also have GPRs remaining. 5847 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 5848 // GPRs. 5849 if (GPR_idx != NumGPRs) 5850 ++GPR_idx; 5851 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 5852 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 5853 ++GPR_idx; 5854 } 5855 } else 5856 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5857 isPPC64, isTailCall, false, MemOpChains, 5858 TailCallArguments, dl); 5859 if (isPPC64) 5860 ArgOffset += 8; 5861 else 5862 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 5863 break; 5864 case MVT::v4f32: 5865 case MVT::v4i32: 5866 case MVT::v8i16: 5867 case MVT::v16i8: 5868 if (isVarArg) { 5869 // These go aligned on the stack, or in the corresponding R registers 5870 // when within range. The Darwin PPC ABI doc claims they also go in 5871 // V registers; in fact gcc does this only for arguments that are 5872 // prototyped, not for those that match the ... We do it for all 5873 // arguments, seems to work. 5874 while (ArgOffset % 16 !=0) { 5875 ArgOffset += PtrByteSize; 5876 if (GPR_idx != NumGPRs) 5877 GPR_idx++; 5878 } 5879 // We could elide this store in the case where the object fits 5880 // entirely in R registers. Maybe later. 5881 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 5882 DAG.getConstant(ArgOffset, dl, PtrVT)); 5883 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 5884 MachinePointerInfo(), false, false, 0); 5885 MemOpChains.push_back(Store); 5886 if (VR_idx != NumVRs) { 5887 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 5888 MachinePointerInfo(), 5889 false, false, false, 0); 5890 MemOpChains.push_back(Load.getValue(1)); 5891 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5892 } 5893 ArgOffset += 16; 5894 for (unsigned i=0; i<16; i+=PtrByteSize) { 5895 if (GPR_idx == NumGPRs) 5896 break; 5897 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5898 DAG.getConstant(i, dl, PtrVT)); 5899 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 5900 false, false, false, 0); 5901 MemOpChains.push_back(Load.getValue(1)); 5902 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5903 } 5904 break; 5905 } 5906 5907 // Non-varargs Altivec params generally go in registers, but have 5908 // stack space allocated at the end. 5909 if (VR_idx != NumVRs) { 5910 // Doesn't have GPR space allocated. 5911 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5912 } else if (nAltivecParamsAtEnd==0) { 5913 // We are emitting Altivec params in order. 5914 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5915 isPPC64, isTailCall, true, MemOpChains, 5916 TailCallArguments, dl); 5917 ArgOffset += 16; 5918 } 5919 break; 5920 } 5921 } 5922 // If all Altivec parameters fit in registers, as they usually do, 5923 // they get stack space following the non-Altivec parameters. We 5924 // don't track this here because nobody below needs it. 5925 // If there are more Altivec parameters than fit in registers emit 5926 // the stores here. 5927 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 5928 unsigned j = 0; 5929 // Offset is aligned; skip 1st 12 params which go in V registers. 5930 ArgOffset = ((ArgOffset+15)/16)*16; 5931 ArgOffset += 12*16; 5932 for (unsigned i = 0; i != NumOps; ++i) { 5933 SDValue Arg = OutVals[i]; 5934 EVT ArgType = Outs[i].VT; 5935 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 5936 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 5937 if (++j > NumVRs) { 5938 SDValue PtrOff; 5939 // We are emitting Altivec params in order. 5940 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5941 isPPC64, isTailCall, true, MemOpChains, 5942 TailCallArguments, dl); 5943 ArgOffset += 16; 5944 } 5945 } 5946 } 5947 } 5948 5949 if (!MemOpChains.empty()) 5950 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5951 5952 // On Darwin, R12 must contain the address of an indirect callee. This does 5953 // not mean the MTCTR instruction must use R12; it's easier to model this as 5954 // an extra parameter, so do that. 5955 if (!isTailCall && 5956 !isFunctionGlobalAddress(Callee) && 5957 !isa<ExternalSymbolSDNode>(Callee) && 5958 !isBLACompatibleAddress(Callee, DAG)) 5959 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 5960 PPC::R12), Callee)); 5961 5962 // Build a sequence of copy-to-reg nodes chained together with token chain 5963 // and flag operands which copy the outgoing args into the appropriate regs. 5964 SDValue InFlag; 5965 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5966 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5967 RegsToPass[i].second, InFlag); 5968 InFlag = Chain.getValue(1); 5969 } 5970 5971 if (isTailCall) 5972 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 5973 FPOp, true, TailCallArguments); 5974 5975 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, 5976 /* unused except on PPC64 ELFv1 */ false, DAG, 5977 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 5978 NumBytes, Ins, InVals, CS); 5979 } 5980 5981 bool 5982 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 5983 MachineFunction &MF, bool isVarArg, 5984 const SmallVectorImpl<ISD::OutputArg> &Outs, 5985 LLVMContext &Context) const { 5986 SmallVector<CCValAssign, 16> RVLocs; 5987 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 5988 return CCInfo.CheckReturn(Outs, RetCC_PPC); 5989 } 5990 5991 SDValue 5992 PPCTargetLowering::LowerReturn(SDValue Chain, 5993 CallingConv::ID CallConv, bool isVarArg, 5994 const SmallVectorImpl<ISD::OutputArg> &Outs, 5995 const SmallVectorImpl<SDValue> &OutVals, 5996 SDLoc dl, SelectionDAG &DAG) const { 5997 5998 SmallVector<CCValAssign, 16> RVLocs; 5999 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 6000 *DAG.getContext()); 6001 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 6002 6003 SDValue Flag; 6004 SmallVector<SDValue, 4> RetOps(1, Chain); 6005 6006 // Copy the result values into the output registers. 6007 for (unsigned i = 0; i != RVLocs.size(); ++i) { 6008 CCValAssign &VA = RVLocs[i]; 6009 assert(VA.isRegLoc() && "Can only return in registers!"); 6010 6011 SDValue Arg = OutVals[i]; 6012 6013 switch (VA.getLocInfo()) { 6014 default: llvm_unreachable("Unknown loc info!"); 6015 case CCValAssign::Full: break; 6016 case CCValAssign::AExt: 6017 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 6018 break; 6019 case CCValAssign::ZExt: 6020 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 6021 break; 6022 case CCValAssign::SExt: 6023 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 6024 break; 6025 } 6026 6027 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 6028 Flag = Chain.getValue(1); 6029 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 6030 } 6031 6032 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6033 const MCPhysReg *I = 6034 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); 6035 if (I) { 6036 for (; *I; ++I) { 6037 6038 if (PPC::G8RCRegClass.contains(*I)) 6039 RetOps.push_back(DAG.getRegister(*I, MVT::i64)); 6040 else if (PPC::F8RCRegClass.contains(*I)) 6041 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); 6042 else if (PPC::CRRCRegClass.contains(*I)) 6043 RetOps.push_back(DAG.getRegister(*I, MVT::i1)); 6044 else if (PPC::VRRCRegClass.contains(*I)) 6045 RetOps.push_back(DAG.getRegister(*I, MVT::Other)); 6046 else 6047 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 6048 } 6049 } 6050 6051 RetOps[0] = Chain; // Update chain. 6052 6053 // Add the flag if we have it. 6054 if (Flag.getNode()) 6055 RetOps.push_back(Flag); 6056 6057 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 6058 } 6059 6060 SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET( 6061 SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { 6062 SDLoc dl(Op); 6063 6064 // Get the corect type for integers. 6065 EVT IntVT = Op.getValueType(); 6066 6067 // Get the inputs. 6068 SDValue Chain = Op.getOperand(0); 6069 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6070 // Build a DYNAREAOFFSET node. 6071 SDValue Ops[2] = {Chain, FPSIdx}; 6072 SDVTList VTs = DAG.getVTList(IntVT); 6073 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); 6074 } 6075 6076 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 6077 const PPCSubtarget &Subtarget) const { 6078 // When we pop the dynamic allocation we need to restore the SP link. 6079 SDLoc dl(Op); 6080 6081 // Get the corect type for pointers. 6082 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 6083 6084 // Construct the stack pointer operand. 6085 bool isPPC64 = Subtarget.isPPC64(); 6086 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 6087 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 6088 6089 // Get the operands for the STACKRESTORE. 6090 SDValue Chain = Op.getOperand(0); 6091 SDValue SaveSP = Op.getOperand(1); 6092 6093 // Load the old link SP. 6094 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 6095 MachinePointerInfo(), 6096 false, false, false, 0); 6097 6098 // Restore the stack pointer. 6099 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 6100 6101 // Store the old link SP. 6102 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 6103 false, false, 0); 6104 } 6105 6106 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { 6107 MachineFunction &MF = DAG.getMachineFunction(); 6108 bool isPPC64 = Subtarget.isPPC64(); 6109 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); 6110 6111 // Get current frame pointer save index. The users of this index will be 6112 // primarily DYNALLOC instructions. 6113 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6114 int RASI = FI->getReturnAddrSaveIndex(); 6115 6116 // If the frame pointer save index hasn't been defined yet. 6117 if (!RASI) { 6118 // Find out what the fix offset of the frame pointer save area. 6119 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); 6120 // Allocate the frame index for frame pointer save area. 6121 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false); 6122 // Save the result. 6123 FI->setReturnAddrSaveIndex(RASI); 6124 } 6125 return DAG.getFrameIndex(RASI, PtrVT); 6126 } 6127 6128 SDValue 6129 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 6130 MachineFunction &MF = DAG.getMachineFunction(); 6131 bool isPPC64 = Subtarget.isPPC64(); 6132 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); 6133 6134 // Get current frame pointer save index. The users of this index will be 6135 // primarily DYNALLOC instructions. 6136 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6137 int FPSI = FI->getFramePointerSaveIndex(); 6138 6139 // If the frame pointer save index hasn't been defined yet. 6140 if (!FPSI) { 6141 // Find out what the fix offset of the frame pointer save area. 6142 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); 6143 // Allocate the frame index for frame pointer save area. 6144 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 6145 // Save the result. 6146 FI->setFramePointerSaveIndex(FPSI); 6147 } 6148 return DAG.getFrameIndex(FPSI, PtrVT); 6149 } 6150 6151 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 6152 SelectionDAG &DAG, 6153 const PPCSubtarget &Subtarget) const { 6154 // Get the inputs. 6155 SDValue Chain = Op.getOperand(0); 6156 SDValue Size = Op.getOperand(1); 6157 SDLoc dl(Op); 6158 6159 // Get the corect type for pointers. 6160 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 6161 // Negate the size. 6162 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 6163 DAG.getConstant(0, dl, PtrVT), Size); 6164 // Construct a node for the frame pointer save index. 6165 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6166 // Build a DYNALLOC node. 6167 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 6168 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 6169 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 6170 } 6171 6172 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 6173 SelectionDAG &DAG) const { 6174 SDLoc DL(Op); 6175 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 6176 DAG.getVTList(MVT::i32, MVT::Other), 6177 Op.getOperand(0), Op.getOperand(1)); 6178 } 6179 6180 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 6181 SelectionDAG &DAG) const { 6182 SDLoc DL(Op); 6183 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 6184 Op.getOperand(0), Op.getOperand(1)); 6185 } 6186 6187 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 6188 if (Op.getValueType().isVector()) 6189 return LowerVectorLoad(Op, DAG); 6190 6191 assert(Op.getValueType() == MVT::i1 && 6192 "Custom lowering only for i1 loads"); 6193 6194 // First, load 8 bits into 32 bits, then truncate to 1 bit. 6195 6196 SDLoc dl(Op); 6197 LoadSDNode *LD = cast<LoadSDNode>(Op); 6198 6199 SDValue Chain = LD->getChain(); 6200 SDValue BasePtr = LD->getBasePtr(); 6201 MachineMemOperand *MMO = LD->getMemOperand(); 6202 6203 SDValue NewLD = 6204 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, 6205 BasePtr, MVT::i8, MMO); 6206 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 6207 6208 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 6209 return DAG.getMergeValues(Ops, dl); 6210 } 6211 6212 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 6213 if (Op.getOperand(1).getValueType().isVector()) 6214 return LowerVectorStore(Op, DAG); 6215 6216 assert(Op.getOperand(1).getValueType() == MVT::i1 && 6217 "Custom lowering only for i1 stores"); 6218 6219 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 6220 6221 SDLoc dl(Op); 6222 StoreSDNode *ST = cast<StoreSDNode>(Op); 6223 6224 SDValue Chain = ST->getChain(); 6225 SDValue BasePtr = ST->getBasePtr(); 6226 SDValue Value = ST->getValue(); 6227 MachineMemOperand *MMO = ST->getMemOperand(); 6228 6229 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), 6230 Value); 6231 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 6232 } 6233 6234 // FIXME: Remove this once the ANDI glue bug is fixed: 6235 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 6236 assert(Op.getValueType() == MVT::i1 && 6237 "Custom lowering only for i1 results"); 6238 6239 SDLoc DL(Op); 6240 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 6241 Op.getOperand(0)); 6242 } 6243 6244 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 6245 /// possible. 6246 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 6247 // Not FP? Not a fsel. 6248 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 6249 !Op.getOperand(2).getValueType().isFloatingPoint()) 6250 return Op; 6251 6252 // We might be able to do better than this under some circumstances, but in 6253 // general, fsel-based lowering of select is a finite-math-only optimization. 6254 // For more information, see section F.3 of the 2.06 ISA specification. 6255 if (!DAG.getTarget().Options.NoInfsFPMath || 6256 !DAG.getTarget().Options.NoNaNsFPMath) 6257 return Op; 6258 // TODO: Propagate flags from the select rather than global settings. 6259 SDNodeFlags Flags; 6260 Flags.setNoInfs(true); 6261 Flags.setNoNaNs(true); 6262 6263 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 6264 6265 EVT ResVT = Op.getValueType(); 6266 EVT CmpVT = Op.getOperand(0).getValueType(); 6267 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6268 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 6269 SDLoc dl(Op); 6270 6271 // If the RHS of the comparison is a 0.0, we don't need to do the 6272 // subtraction at all. 6273 SDValue Sel1; 6274 if (isFloatingPointZero(RHS)) 6275 switch (CC) { 6276 default: break; // SETUO etc aren't handled by fsel. 6277 case ISD::SETNE: 6278 std::swap(TV, FV); 6279 case ISD::SETEQ: 6280 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6281 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6282 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6283 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6284 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6285 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6286 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 6287 case ISD::SETULT: 6288 case ISD::SETLT: 6289 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6290 case ISD::SETOGE: 6291 case ISD::SETGE: 6292 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6293 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6294 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6295 case ISD::SETUGT: 6296 case ISD::SETGT: 6297 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6298 case ISD::SETOLE: 6299 case ISD::SETLE: 6300 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6301 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6302 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6303 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 6304 } 6305 6306 SDValue Cmp; 6307 switch (CC) { 6308 default: break; // SETUO etc aren't handled by fsel. 6309 case ISD::SETNE: 6310 std::swap(TV, FV); 6311 case ISD::SETEQ: 6312 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6313 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6314 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6315 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6316 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6317 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6318 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6319 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 6320 case ISD::SETULT: 6321 case ISD::SETLT: 6322 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6323 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6324 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6325 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6326 case ISD::SETOGE: 6327 case ISD::SETGE: 6328 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6329 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6330 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6331 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6332 case ISD::SETUGT: 6333 case ISD::SETGT: 6334 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); 6335 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6336 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6337 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6338 case ISD::SETOLE: 6339 case ISD::SETLE: 6340 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); 6341 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6342 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6343 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6344 } 6345 return Op; 6346 } 6347 6348 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 6349 SelectionDAG &DAG, 6350 SDLoc dl) const { 6351 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 6352 SDValue Src = Op.getOperand(0); 6353 if (Src.getValueType() == MVT::f32) 6354 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 6355 6356 SDValue Tmp; 6357 switch (Op.getSimpleValueType().SimpleTy) { 6358 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 6359 case MVT::i32: 6360 Tmp = DAG.getNode( 6361 Op.getOpcode() == ISD::FP_TO_SINT 6362 ? PPCISD::FCTIWZ 6363 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 6364 dl, MVT::f64, Src); 6365 break; 6366 case MVT::i64: 6367 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 6368 "i64 FP_TO_UINT is supported only with FPCVT"); 6369 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 6370 PPCISD::FCTIDUZ, 6371 dl, MVT::f64, Src); 6372 break; 6373 } 6374 6375 // Convert the FP value to an int value through memory. 6376 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 6377 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 6378 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 6379 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 6380 MachinePointerInfo MPI = 6381 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); 6382 6383 // Emit a store to the stack slot. 6384 SDValue Chain; 6385 if (i32Stack) { 6386 MachineFunction &MF = DAG.getMachineFunction(); 6387 MachineMemOperand *MMO = 6388 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 6389 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 6390 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 6391 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 6392 } else 6393 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 6394 MPI, false, false, 0); 6395 6396 // Result is a load from the stack slot. If loading 4 bytes, make sure to 6397 // add in a bias on big endian. 6398 if (Op.getValueType() == MVT::i32 && !i32Stack) { 6399 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 6400 DAG.getConstant(4, dl, FIPtr.getValueType())); 6401 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); 6402 } 6403 6404 RLI.Chain = Chain; 6405 RLI.Ptr = FIPtr; 6406 RLI.MPI = MPI; 6407 } 6408 6409 /// \brief Custom lowers floating point to integer conversions to use 6410 /// the direct move instructions available in ISA 2.07 to avoid the 6411 /// need for load/store combinations. 6412 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, 6413 SelectionDAG &DAG, 6414 SDLoc dl) const { 6415 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 6416 SDValue Src = Op.getOperand(0); 6417 6418 if (Src.getValueType() == MVT::f32) 6419 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 6420 6421 SDValue Tmp; 6422 switch (Op.getSimpleValueType().SimpleTy) { 6423 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 6424 case MVT::i32: 6425 Tmp = DAG.getNode( 6426 Op.getOpcode() == ISD::FP_TO_SINT 6427 ? PPCISD::FCTIWZ 6428 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 6429 dl, MVT::f64, Src); 6430 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); 6431 break; 6432 case MVT::i64: 6433 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 6434 "i64 FP_TO_UINT is supported only with FPCVT"); 6435 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 6436 PPCISD::FCTIDUZ, 6437 dl, MVT::f64, Src); 6438 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); 6439 break; 6440 } 6441 return Tmp; 6442 } 6443 6444 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 6445 SDLoc dl) const { 6446 if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) 6447 return LowerFP_TO_INTDirectMove(Op, DAG, dl); 6448 6449 ReuseLoadInfo RLI; 6450 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 6451 6452 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, 6453 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, 6454 RLI.Ranges); 6455 } 6456 6457 // We're trying to insert a regular store, S, and then a load, L. If the 6458 // incoming value, O, is a load, we might just be able to have our load use the 6459 // address used by O. However, we don't know if anything else will store to 6460 // that address before we can load from it. To prevent this situation, we need 6461 // to insert our load, L, into the chain as a peer of O. To do this, we give L 6462 // the same chain operand as O, we create a token factor from the chain results 6463 // of O and L, and we replace all uses of O's chain result with that token 6464 // factor (see spliceIntoChain below for this last part). 6465 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, 6466 ReuseLoadInfo &RLI, 6467 SelectionDAG &DAG, 6468 ISD::LoadExtType ET) const { 6469 SDLoc dl(Op); 6470 if (ET == ISD::NON_EXTLOAD && 6471 (Op.getOpcode() == ISD::FP_TO_UINT || 6472 Op.getOpcode() == ISD::FP_TO_SINT) && 6473 isOperationLegalOrCustom(Op.getOpcode(), 6474 Op.getOperand(0).getValueType())) { 6475 6476 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 6477 return true; 6478 } 6479 6480 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); 6481 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || 6482 LD->isNonTemporal()) 6483 return false; 6484 if (LD->getMemoryVT() != MemVT) 6485 return false; 6486 6487 RLI.Ptr = LD->getBasePtr(); 6488 if (LD->isIndexed() && !LD->getOffset().isUndef()) { 6489 assert(LD->getAddressingMode() == ISD::PRE_INC && 6490 "Non-pre-inc AM on PPC?"); 6491 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, 6492 LD->getOffset()); 6493 } 6494 6495 RLI.Chain = LD->getChain(); 6496 RLI.MPI = LD->getPointerInfo(); 6497 RLI.IsInvariant = LD->isInvariant(); 6498 RLI.Alignment = LD->getAlignment(); 6499 RLI.AAInfo = LD->getAAInfo(); 6500 RLI.Ranges = LD->getRanges(); 6501 6502 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); 6503 return true; 6504 } 6505 6506 // Given the head of the old chain, ResChain, insert a token factor containing 6507 // it and NewResChain, and make users of ResChain now be users of that token 6508 // factor. 6509 void PPCTargetLowering::spliceIntoChain(SDValue ResChain, 6510 SDValue NewResChain, 6511 SelectionDAG &DAG) const { 6512 if (!ResChain) 6513 return; 6514 6515 SDLoc dl(NewResChain); 6516 6517 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 6518 NewResChain, DAG.getUNDEF(MVT::Other)); 6519 assert(TF.getNode() != NewResChain.getNode() && 6520 "A new TF really is required here"); 6521 6522 DAG.ReplaceAllUsesOfValueWith(ResChain, TF); 6523 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); 6524 } 6525 6526 /// \brief Analyze profitability of direct move 6527 /// prefer float load to int load plus direct move 6528 /// when there is no integer use of int load 6529 static bool directMoveIsProfitable(const SDValue &Op) { 6530 SDNode *Origin = Op.getOperand(0).getNode(); 6531 if (Origin->getOpcode() != ISD::LOAD) 6532 return true; 6533 6534 for (SDNode::use_iterator UI = Origin->use_begin(), 6535 UE = Origin->use_end(); 6536 UI != UE; ++UI) { 6537 6538 // Only look at the users of the loaded value. 6539 if (UI.getUse().get().getResNo() != 0) 6540 continue; 6541 6542 if (UI->getOpcode() != ISD::SINT_TO_FP && 6543 UI->getOpcode() != ISD::UINT_TO_FP) 6544 return true; 6545 } 6546 6547 return false; 6548 } 6549 6550 /// \brief Custom lowers integer to floating point conversions to use 6551 /// the direct move instructions available in ISA 2.07 to avoid the 6552 /// need for load/store combinations. 6553 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, 6554 SelectionDAG &DAG, 6555 SDLoc dl) const { 6556 assert((Op.getValueType() == MVT::f32 || 6557 Op.getValueType() == MVT::f64) && 6558 "Invalid floating point type as target of conversion"); 6559 assert(Subtarget.hasFPCVT() && 6560 "Int to FP conversions with direct moves require FPCVT"); 6561 SDValue FP; 6562 SDValue Src = Op.getOperand(0); 6563 bool SinglePrec = Op.getValueType() == MVT::f32; 6564 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; 6565 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; 6566 unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : 6567 (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); 6568 6569 if (WordInt) { 6570 FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, 6571 dl, MVT::f64, Src); 6572 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 6573 } 6574 else { 6575 FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); 6576 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 6577 } 6578 6579 return FP; 6580 } 6581 6582 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 6583 SelectionDAG &DAG) const { 6584 SDLoc dl(Op); 6585 6586 if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { 6587 if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) 6588 return SDValue(); 6589 6590 SDValue Value = Op.getOperand(0); 6591 // The values are now known to be -1 (false) or 1 (true). To convert this 6592 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 6593 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 6594 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 6595 6596 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 6597 6598 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 6599 6600 if (Op.getValueType() != MVT::v4f64) 6601 Value = DAG.getNode(ISD::FP_ROUND, dl, 6602 Op.getValueType(), Value, 6603 DAG.getIntPtrConstant(1, dl)); 6604 return Value; 6605 } 6606 6607 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 6608 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 6609 return SDValue(); 6610 6611 if (Op.getOperand(0).getValueType() == MVT::i1) 6612 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 6613 DAG.getConstantFP(1.0, dl, Op.getValueType()), 6614 DAG.getConstantFP(0.0, dl, Op.getValueType())); 6615 6616 // If we have direct moves, we can do all the conversion, skip the store/load 6617 // however, without FPCVT we can't do most conversions. 6618 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && 6619 Subtarget.isPPC64() && Subtarget.hasFPCVT()) 6620 return LowerINT_TO_FPDirectMove(Op, DAG, dl); 6621 6622 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 6623 "UINT_TO_FP is supported only with FPCVT"); 6624 6625 // If we have FCFIDS, then use it when converting to single-precision. 6626 // Otherwise, convert to double-precision and then round. 6627 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6628 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 6629 : PPCISD::FCFIDS) 6630 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 6631 : PPCISD::FCFID); 6632 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6633 ? MVT::f32 6634 : MVT::f64; 6635 6636 if (Op.getOperand(0).getValueType() == MVT::i64) { 6637 SDValue SINT = Op.getOperand(0); 6638 // When converting to single-precision, we actually need to convert 6639 // to double-precision first and then round to single-precision. 6640 // To avoid double-rounding effects during that operation, we have 6641 // to prepare the input operand. Bits that might be truncated when 6642 // converting to double-precision are replaced by a bit that won't 6643 // be lost at this stage, but is below the single-precision rounding 6644 // position. 6645 // 6646 // However, if -enable-unsafe-fp-math is in effect, accept double 6647 // rounding to avoid the extra overhead. 6648 if (Op.getValueType() == MVT::f32 && 6649 !Subtarget.hasFPCVT() && 6650 !DAG.getTarget().Options.UnsafeFPMath) { 6651 6652 // Twiddle input to make sure the low 11 bits are zero. (If this 6653 // is the case, we are guaranteed the value will fit into the 53 bit 6654 // mantissa of an IEEE double-precision value without rounding.) 6655 // If any of those low 11 bits were not zero originally, make sure 6656 // bit 12 (value 2048) is set instead, so that the final rounding 6657 // to single-precision gets the correct result. 6658 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6659 SINT, DAG.getConstant(2047, dl, MVT::i64)); 6660 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 6661 Round, DAG.getConstant(2047, dl, MVT::i64)); 6662 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 6663 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6664 Round, DAG.getConstant(-2048, dl, MVT::i64)); 6665 6666 // However, we cannot use that value unconditionally: if the magnitude 6667 // of the input value is small, the bit-twiddling we did above might 6668 // end up visibly changing the output. Fortunately, in that case, we 6669 // don't need to twiddle bits since the original input will convert 6670 // exactly to double-precision floating-point already. Therefore, 6671 // construct a conditional to use the original value if the top 11 6672 // bits are all sign-bit copies, and use the rounded value computed 6673 // above otherwise. 6674 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 6675 SINT, DAG.getConstant(53, dl, MVT::i32)); 6676 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 6677 Cond, DAG.getConstant(1, dl, MVT::i64)); 6678 Cond = DAG.getSetCC(dl, MVT::i32, 6679 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); 6680 6681 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 6682 } 6683 6684 ReuseLoadInfo RLI; 6685 SDValue Bits; 6686 6687 MachineFunction &MF = DAG.getMachineFunction(); 6688 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { 6689 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, 6690 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, 6691 RLI.Ranges); 6692 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6693 } else if (Subtarget.hasLFIWAX() && 6694 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { 6695 MachineMemOperand *MMO = 6696 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6697 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6698 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6699 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, 6700 DAG.getVTList(MVT::f64, MVT::Other), 6701 Ops, MVT::i32, MMO); 6702 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6703 } else if (Subtarget.hasFPCVT() && 6704 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { 6705 MachineMemOperand *MMO = 6706 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6707 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6708 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6709 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, 6710 DAG.getVTList(MVT::f64, MVT::Other), 6711 Ops, MVT::i32, MMO); 6712 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6713 } else if (((Subtarget.hasLFIWAX() && 6714 SINT.getOpcode() == ISD::SIGN_EXTEND) || 6715 (Subtarget.hasFPCVT() && 6716 SINT.getOpcode() == ISD::ZERO_EXTEND)) && 6717 SINT.getOperand(0).getValueType() == MVT::i32) { 6718 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 6719 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 6720 6721 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 6722 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6723 6724 SDValue Store = DAG.getStore( 6725 DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, 6726 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), 6727 false, false, 0); 6728 6729 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6730 "Expected an i32 store"); 6731 6732 RLI.Ptr = FIdx; 6733 RLI.Chain = Store; 6734 RLI.MPI = 6735 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 6736 RLI.Alignment = 4; 6737 6738 MachineMemOperand *MMO = 6739 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6740 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6741 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6742 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? 6743 PPCISD::LFIWZX : PPCISD::LFIWAX, 6744 dl, DAG.getVTList(MVT::f64, MVT::Other), 6745 Ops, MVT::i32, MMO); 6746 } else 6747 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 6748 6749 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 6750 6751 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6752 FP = DAG.getNode(ISD::FP_ROUND, dl, 6753 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 6754 return FP; 6755 } 6756 6757 assert(Op.getOperand(0).getValueType() == MVT::i32 && 6758 "Unhandled INT_TO_FP type in custom expander!"); 6759 // Since we only generate this in 64-bit mode, we can take advantage of 6760 // 64-bit registers. In particular, sign extend the input value into the 6761 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 6762 // then lfd it and fcfid it. 6763 MachineFunction &MF = DAG.getMachineFunction(); 6764 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 6765 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); 6766 6767 SDValue Ld; 6768 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 6769 ReuseLoadInfo RLI; 6770 bool ReusingLoad; 6771 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, 6772 DAG))) { 6773 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 6774 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6775 6776 SDValue Store = DAG.getStore( 6777 DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 6778 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), 6779 false, false, 0); 6780 6781 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6782 "Expected an i32 store"); 6783 6784 RLI.Ptr = FIdx; 6785 RLI.Chain = Store; 6786 RLI.MPI = 6787 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 6788 RLI.Alignment = 4; 6789 } 6790 6791 MachineMemOperand *MMO = 6792 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6793 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6794 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6795 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 6796 PPCISD::LFIWZX : PPCISD::LFIWAX, 6797 dl, DAG.getVTList(MVT::f64, MVT::Other), 6798 Ops, MVT::i32, MMO); 6799 if (ReusingLoad) 6800 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); 6801 } else { 6802 assert(Subtarget.isPPC64() && 6803 "i32->FP without LFIWAX supported only on PPC64"); 6804 6805 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 6806 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6807 6808 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 6809 Op.getOperand(0)); 6810 6811 // STD the extended value into the stack slot. 6812 SDValue Store = DAG.getStore( 6813 DAG.getEntryNode(), dl, Ext64, FIdx, 6814 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), 6815 false, false, 0); 6816 6817 // Load the value as a double. 6818 Ld = DAG.getLoad( 6819 MVT::f64, dl, Store, FIdx, 6820 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), 6821 false, false, false, 0); 6822 } 6823 6824 // FCFID it and return it. 6825 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 6826 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6827 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, 6828 DAG.getIntPtrConstant(0, dl)); 6829 return FP; 6830 } 6831 6832 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 6833 SelectionDAG &DAG) const { 6834 SDLoc dl(Op); 6835 /* 6836 The rounding mode is in bits 30:31 of FPSR, and has the following 6837 settings: 6838 00 Round to nearest 6839 01 Round to 0 6840 10 Round to +inf 6841 11 Round to -inf 6842 6843 FLT_ROUNDS, on the other hand, expects the following: 6844 -1 Undefined 6845 0 Round to 0 6846 1 Round to nearest 6847 2 Round to +inf 6848 3 Round to -inf 6849 6850 To perform the conversion, we do: 6851 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 6852 */ 6853 6854 MachineFunction &MF = DAG.getMachineFunction(); 6855 EVT VT = Op.getValueType(); 6856 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); 6857 6858 // Save FP Control Word to register 6859 EVT NodeTys[] = { 6860 MVT::f64, // return register 6861 MVT::Glue // unused in this context 6862 }; 6863 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 6864 6865 // Save FP register to stack slot 6866 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 6867 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 6868 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 6869 StackSlot, MachinePointerInfo(), false, false,0); 6870 6871 // Load FP Control Word from low 32 bits of stack slot. 6872 SDValue Four = DAG.getConstant(4, dl, PtrVT); 6873 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 6874 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 6875 false, false, false, 0); 6876 6877 // Transform as necessary 6878 SDValue CWD1 = 6879 DAG.getNode(ISD::AND, dl, MVT::i32, 6880 CWD, DAG.getConstant(3, dl, MVT::i32)); 6881 SDValue CWD2 = 6882 DAG.getNode(ISD::SRL, dl, MVT::i32, 6883 DAG.getNode(ISD::AND, dl, MVT::i32, 6884 DAG.getNode(ISD::XOR, dl, MVT::i32, 6885 CWD, DAG.getConstant(3, dl, MVT::i32)), 6886 DAG.getConstant(3, dl, MVT::i32)), 6887 DAG.getConstant(1, dl, MVT::i32)); 6888 6889 SDValue RetVal = 6890 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 6891 6892 return DAG.getNode((VT.getSizeInBits() < 16 ? 6893 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 6894 } 6895 6896 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 6897 EVT VT = Op.getValueType(); 6898 unsigned BitWidth = VT.getSizeInBits(); 6899 SDLoc dl(Op); 6900 assert(Op.getNumOperands() == 3 && 6901 VT == Op.getOperand(1).getValueType() && 6902 "Unexpected SHL!"); 6903 6904 // Expand into a bunch of logical ops. Note that these ops 6905 // depend on the PPC behavior for oversized shift amounts. 6906 SDValue Lo = Op.getOperand(0); 6907 SDValue Hi = Op.getOperand(1); 6908 SDValue Amt = Op.getOperand(2); 6909 EVT AmtVT = Amt.getValueType(); 6910 6911 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6912 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 6913 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 6914 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 6915 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 6916 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6917 DAG.getConstant(-BitWidth, dl, AmtVT)); 6918 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 6919 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 6920 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 6921 SDValue OutOps[] = { OutLo, OutHi }; 6922 return DAG.getMergeValues(OutOps, dl); 6923 } 6924 6925 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 6926 EVT VT = Op.getValueType(); 6927 SDLoc dl(Op); 6928 unsigned BitWidth = VT.getSizeInBits(); 6929 assert(Op.getNumOperands() == 3 && 6930 VT == Op.getOperand(1).getValueType() && 6931 "Unexpected SRL!"); 6932 6933 // Expand into a bunch of logical ops. Note that these ops 6934 // depend on the PPC behavior for oversized shift amounts. 6935 SDValue Lo = Op.getOperand(0); 6936 SDValue Hi = Op.getOperand(1); 6937 SDValue Amt = Op.getOperand(2); 6938 EVT AmtVT = Amt.getValueType(); 6939 6940 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6941 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 6942 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 6943 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 6944 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 6945 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6946 DAG.getConstant(-BitWidth, dl, AmtVT)); 6947 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 6948 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 6949 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 6950 SDValue OutOps[] = { OutLo, OutHi }; 6951 return DAG.getMergeValues(OutOps, dl); 6952 } 6953 6954 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 6955 SDLoc dl(Op); 6956 EVT VT = Op.getValueType(); 6957 unsigned BitWidth = VT.getSizeInBits(); 6958 assert(Op.getNumOperands() == 3 && 6959 VT == Op.getOperand(1).getValueType() && 6960 "Unexpected SRA!"); 6961 6962 // Expand into a bunch of logical ops, followed by a select_cc. 6963 SDValue Lo = Op.getOperand(0); 6964 SDValue Hi = Op.getOperand(1); 6965 SDValue Amt = Op.getOperand(2); 6966 EVT AmtVT = Amt.getValueType(); 6967 6968 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6969 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 6970 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 6971 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 6972 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 6973 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6974 DAG.getConstant(-BitWidth, dl, AmtVT)); 6975 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 6976 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 6977 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), 6978 Tmp4, Tmp6, ISD::SETLE); 6979 SDValue OutOps[] = { OutLo, OutHi }; 6980 return DAG.getMergeValues(OutOps, dl); 6981 } 6982 6983 //===----------------------------------------------------------------------===// 6984 // Vector related lowering. 6985 // 6986 6987 /// BuildSplatI - Build a canonical splati of Val with an element size of 6988 /// SplatSize. Cast the result to VT. 6989 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 6990 SelectionDAG &DAG, SDLoc dl) { 6991 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 6992 6993 static const MVT VTys[] = { // canonical VT to use for each size. 6994 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 6995 }; 6996 6997 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 6998 6999 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 7000 if (Val == -1) 7001 SplatSize = 1; 7002 7003 EVT CanonicalVT = VTys[SplatSize-1]; 7004 7005 // Build a canonical splat for this value. 7006 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); 7007 } 7008 7009 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the 7010 /// specified intrinsic ID. 7011 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, 7012 SelectionDAG &DAG, SDLoc dl, 7013 EVT DestVT = MVT::Other) { 7014 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 7015 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7016 DAG.getConstant(IID, dl, MVT::i32), Op); 7017 } 7018 7019 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 7020 /// specified intrinsic ID. 7021 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 7022 SelectionDAG &DAG, SDLoc dl, 7023 EVT DestVT = MVT::Other) { 7024 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 7025 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7026 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); 7027 } 7028 7029 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 7030 /// specified intrinsic ID. 7031 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 7032 SDValue Op2, SelectionDAG &DAG, 7033 SDLoc dl, EVT DestVT = MVT::Other) { 7034 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 7035 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7036 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); 7037 } 7038 7039 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 7040 /// amount. The result has the specified value type. 7041 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 7042 EVT VT, SelectionDAG &DAG, SDLoc dl) { 7043 // Force LHS/RHS to be the right type. 7044 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 7045 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 7046 7047 int Ops[16]; 7048 for (unsigned i = 0; i != 16; ++i) 7049 Ops[i] = i + Amt; 7050 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 7051 return DAG.getNode(ISD::BITCAST, dl, VT, T); 7052 } 7053 7054 // If this is a case we can't handle, return null and let the default 7055 // expansion code take care of it. If we CAN select this case, and if it 7056 // selects to a single instruction, return Op. Otherwise, if we can codegen 7057 // this case more efficiently than a constant pool load, lower it to the 7058 // sequence of ops that should be used. 7059 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 7060 SelectionDAG &DAG) const { 7061 SDLoc dl(Op); 7062 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 7063 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 7064 7065 if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { 7066 // We first build an i32 vector, load it into a QPX register, 7067 // then convert it to a floating-point vector and compare it 7068 // to a zero vector to get the boolean result. 7069 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 7070 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 7071 MachinePointerInfo PtrInfo = 7072 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7073 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7074 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7075 7076 assert(BVN->getNumOperands() == 4 && 7077 "BUILD_VECTOR for v4i1 does not have 4 operands"); 7078 7079 bool IsConst = true; 7080 for (unsigned i = 0; i < 4; ++i) { 7081 if (BVN->getOperand(i).isUndef()) continue; 7082 if (!isa<ConstantSDNode>(BVN->getOperand(i))) { 7083 IsConst = false; 7084 break; 7085 } 7086 } 7087 7088 if (IsConst) { 7089 Constant *One = 7090 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); 7091 Constant *NegOne = 7092 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); 7093 7094 SmallVector<Constant*, 4> CV(4, NegOne); 7095 for (unsigned i = 0; i < 4; ++i) { 7096 if (BVN->getOperand(i).isUndef()) 7097 CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); 7098 else if (isNullConstant(BVN->getOperand(i))) 7099 continue; 7100 else 7101 CV[i] = One; 7102 } 7103 7104 Constant *CP = ConstantVector::get(CV); 7105 SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), 7106 16 /* alignment */); 7107 7108 SmallVector<SDValue, 2> Ops; 7109 Ops.push_back(DAG.getEntryNode()); 7110 Ops.push_back(CPIdx); 7111 7112 SmallVector<EVT, 2> ValueVTs; 7113 ValueVTs.push_back(MVT::v4i1); 7114 ValueVTs.push_back(MVT::Other); // chain 7115 SDVTList VTs = DAG.getVTList(ValueVTs); 7116 7117 return DAG.getMemIntrinsicNode( 7118 PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, 7119 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 7120 } 7121 7122 SmallVector<SDValue, 4> Stores; 7123 for (unsigned i = 0; i < 4; ++i) { 7124 if (BVN->getOperand(i).isUndef()) continue; 7125 7126 unsigned Offset = 4*i; 7127 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 7128 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 7129 7130 unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); 7131 if (StoreSize > 4) { 7132 Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, 7133 BVN->getOperand(i), Idx, 7134 PtrInfo.getWithOffset(Offset), 7135 MVT::i32, false, false, 0)); 7136 } else { 7137 SDValue StoreValue = BVN->getOperand(i); 7138 if (StoreSize < 4) 7139 StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); 7140 7141 Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, 7142 StoreValue, Idx, 7143 PtrInfo.getWithOffset(Offset), 7144 false, false, 0)); 7145 } 7146 } 7147 7148 SDValue StoreChain; 7149 if (!Stores.empty()) 7150 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 7151 else 7152 StoreChain = DAG.getEntryNode(); 7153 7154 // Now load from v4i32 into the QPX register; this will extend it to 7155 // v4i64 but not yet convert it to a floating point. Nevertheless, this 7156 // is typed as v4f64 because the QPX register integer states are not 7157 // explicitly represented. 7158 7159 SmallVector<SDValue, 2> Ops; 7160 Ops.push_back(StoreChain); 7161 Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32)); 7162 Ops.push_back(FIdx); 7163 7164 SmallVector<EVT, 2> ValueVTs; 7165 ValueVTs.push_back(MVT::v4f64); 7166 ValueVTs.push_back(MVT::Other); // chain 7167 SDVTList VTs = DAG.getVTList(ValueVTs); 7168 7169 SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, 7170 dl, VTs, Ops, MVT::v4i32, PtrInfo); 7171 LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 7172 DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), 7173 LoadedVect); 7174 7175 SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); 7176 7177 return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); 7178 } 7179 7180 // All other QPX vectors are handled by generic code. 7181 if (Subtarget.hasQPX()) 7182 return SDValue(); 7183 7184 // Check if this is a splat of a constant value. 7185 APInt APSplatBits, APSplatUndef; 7186 unsigned SplatBitSize; 7187 bool HasAnyUndefs; 7188 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 7189 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || 7190 SplatBitSize > 32) 7191 return SDValue(); 7192 7193 unsigned SplatBits = APSplatBits.getZExtValue(); 7194 unsigned SplatUndef = APSplatUndef.getZExtValue(); 7195 unsigned SplatSize = SplatBitSize / 8; 7196 7197 // First, handle single instruction cases. 7198 7199 // All zeros? 7200 if (SplatBits == 0) { 7201 // Canonicalize all zero vectors to be v4i32. 7202 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 7203 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); 7204 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 7205 } 7206 return Op; 7207 } 7208 7209 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 7210 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 7211 (32-SplatBitSize)); 7212 if (SextVal >= -16 && SextVal <= 15) 7213 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 7214 7215 // Two instruction sequences. 7216 7217 // If this value is in the range [-32,30] and is even, use: 7218 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 7219 // If this value is in the range [17,31] and is odd, use: 7220 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 7221 // If this value is in the range [-31,-17] and is odd, use: 7222 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 7223 // Note the last two are three-instruction sequences. 7224 if (SextVal >= -32 && SextVal <= 31) { 7225 // To avoid having these optimizations undone by constant folding, 7226 // we convert to a pseudo that will be expanded later into one of 7227 // the above forms. 7228 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); 7229 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 7230 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 7231 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); 7232 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 7233 if (VT == Op.getValueType()) 7234 return RetVal; 7235 else 7236 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 7237 } 7238 7239 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 7240 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 7241 // for fneg/fabs. 7242 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 7243 // Make -1 and vspltisw -1: 7244 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 7245 7246 // Make the VSLW intrinsic, computing 0x8000_0000. 7247 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 7248 OnesV, DAG, dl); 7249 7250 // xor by OnesV to invert it. 7251 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 7252 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7253 } 7254 7255 // Check to see if this is a wide variety of vsplti*, binop self cases. 7256 static const signed char SplatCsts[] = { 7257 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 7258 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 7259 }; 7260 7261 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 7262 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 7263 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 7264 int i = SplatCsts[idx]; 7265 7266 // Figure out what shift amount will be used by altivec if shifted by i in 7267 // this splat size. 7268 unsigned TypeShiftAmt = i & (SplatBitSize-1); 7269 7270 // vsplti + shl self. 7271 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 7272 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7273 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7274 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 7275 Intrinsic::ppc_altivec_vslw 7276 }; 7277 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7278 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7279 } 7280 7281 // vsplti + srl self. 7282 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 7283 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7284 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7285 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 7286 Intrinsic::ppc_altivec_vsrw 7287 }; 7288 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7289 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7290 } 7291 7292 // vsplti + sra self. 7293 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 7294 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7295 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7296 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 7297 Intrinsic::ppc_altivec_vsraw 7298 }; 7299 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7300 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7301 } 7302 7303 // vsplti + rol self. 7304 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 7305 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 7306 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7307 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7308 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 7309 Intrinsic::ppc_altivec_vrlw 7310 }; 7311 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7312 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7313 } 7314 7315 // t = vsplti c, result = vsldoi t, t, 1 7316 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 7317 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7318 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; 7319 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7320 } 7321 // t = vsplti c, result = vsldoi t, t, 2 7322 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 7323 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7324 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; 7325 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7326 } 7327 // t = vsplti c, result = vsldoi t, t, 3 7328 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 7329 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7330 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; 7331 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7332 } 7333 } 7334 7335 return SDValue(); 7336 } 7337 7338 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 7339 /// the specified operations to build the shuffle. 7340 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 7341 SDValue RHS, SelectionDAG &DAG, 7342 SDLoc dl) { 7343 unsigned OpNum = (PFEntry >> 26) & 0x0F; 7344 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 7345 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 7346 7347 enum { 7348 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 7349 OP_VMRGHW, 7350 OP_VMRGLW, 7351 OP_VSPLTISW0, 7352 OP_VSPLTISW1, 7353 OP_VSPLTISW2, 7354 OP_VSPLTISW3, 7355 OP_VSLDOI4, 7356 OP_VSLDOI8, 7357 OP_VSLDOI12 7358 }; 7359 7360 if (OpNum == OP_COPY) { 7361 if (LHSID == (1*9+2)*9+3) return LHS; 7362 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 7363 return RHS; 7364 } 7365 7366 SDValue OpLHS, OpRHS; 7367 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 7368 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 7369 7370 int ShufIdxs[16]; 7371 switch (OpNum) { 7372 default: llvm_unreachable("Unknown i32 permute!"); 7373 case OP_VMRGHW: 7374 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 7375 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 7376 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 7377 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 7378 break; 7379 case OP_VMRGLW: 7380 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 7381 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 7382 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 7383 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 7384 break; 7385 case OP_VSPLTISW0: 7386 for (unsigned i = 0; i != 16; ++i) 7387 ShufIdxs[i] = (i&3)+0; 7388 break; 7389 case OP_VSPLTISW1: 7390 for (unsigned i = 0; i != 16; ++i) 7391 ShufIdxs[i] = (i&3)+4; 7392 break; 7393 case OP_VSPLTISW2: 7394 for (unsigned i = 0; i != 16; ++i) 7395 ShufIdxs[i] = (i&3)+8; 7396 break; 7397 case OP_VSPLTISW3: 7398 for (unsigned i = 0; i != 16; ++i) 7399 ShufIdxs[i] = (i&3)+12; 7400 break; 7401 case OP_VSLDOI4: 7402 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 7403 case OP_VSLDOI8: 7404 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 7405 case OP_VSLDOI12: 7406 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 7407 } 7408 EVT VT = OpLHS.getValueType(); 7409 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 7410 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 7411 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 7412 return DAG.getNode(ISD::BITCAST, dl, VT, T); 7413 } 7414 7415 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 7416 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 7417 /// return the code it can be lowered into. Worst case, it can always be 7418 /// lowered into a vperm. 7419 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 7420 SelectionDAG &DAG) const { 7421 SDLoc dl(Op); 7422 SDValue V1 = Op.getOperand(0); 7423 SDValue V2 = Op.getOperand(1); 7424 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 7425 EVT VT = Op.getValueType(); 7426 bool isLittleEndian = Subtarget.isLittleEndian(); 7427 7428 if (Subtarget.hasQPX()) { 7429 if (VT.getVectorNumElements() != 4) 7430 return SDValue(); 7431 7432 if (V2.isUndef()) V2 = V1; 7433 7434 int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); 7435 if (AlignIdx != -1) { 7436 return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, 7437 DAG.getConstant(AlignIdx, dl, MVT::i32)); 7438 } else if (SVOp->isSplat()) { 7439 int SplatIdx = SVOp->getSplatIndex(); 7440 if (SplatIdx >= 4) { 7441 std::swap(V1, V2); 7442 SplatIdx -= 4; 7443 } 7444 7445 return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, 7446 DAG.getConstant(SplatIdx, dl, MVT::i32)); 7447 } 7448 7449 // Lower this into a qvgpci/qvfperm pair. 7450 7451 // Compute the qvgpci literal 7452 unsigned idx = 0; 7453 for (unsigned i = 0; i < 4; ++i) { 7454 int m = SVOp->getMaskElt(i); 7455 unsigned mm = m >= 0 ? (unsigned) m : i; 7456 idx |= mm << (3-i)*3; 7457 } 7458 7459 SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, 7460 DAG.getConstant(idx, dl, MVT::i32)); 7461 return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); 7462 } 7463 7464 // Cases that are handled by instructions that take permute immediates 7465 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 7466 // selected by the instruction selector. 7467 if (V2.isUndef()) { 7468 if (PPC::isSplatShuffleMask(SVOp, 1) || 7469 PPC::isSplatShuffleMask(SVOp, 2) || 7470 PPC::isSplatShuffleMask(SVOp, 4) || 7471 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 7472 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 7473 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 7474 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 7475 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 7476 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 7477 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 7478 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 7479 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || 7480 (Subtarget.hasP8Altivec() && ( 7481 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || 7482 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || 7483 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { 7484 return Op; 7485 } 7486 } 7487 7488 // Altivec has a variety of "shuffle immediates" that take two vector inputs 7489 // and produce a fixed permutation. If any of these match, do not lower to 7490 // VPERM. 7491 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 7492 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 7493 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 7494 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 7495 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 7496 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 7497 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 7498 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 7499 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 7500 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || 7501 (Subtarget.hasP8Altivec() && ( 7502 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || 7503 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || 7504 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) 7505 return Op; 7506 7507 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 7508 // perfect shuffle table to emit an optimal matching sequence. 7509 ArrayRef<int> PermMask = SVOp->getMask(); 7510 7511 unsigned PFIndexes[4]; 7512 bool isFourElementShuffle = true; 7513 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 7514 unsigned EltNo = 8; // Start out undef. 7515 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 7516 if (PermMask[i*4+j] < 0) 7517 continue; // Undef, ignore it. 7518 7519 unsigned ByteSource = PermMask[i*4+j]; 7520 if ((ByteSource & 3) != j) { 7521 isFourElementShuffle = false; 7522 break; 7523 } 7524 7525 if (EltNo == 8) { 7526 EltNo = ByteSource/4; 7527 } else if (EltNo != ByteSource/4) { 7528 isFourElementShuffle = false; 7529 break; 7530 } 7531 } 7532 PFIndexes[i] = EltNo; 7533 } 7534 7535 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 7536 // perfect shuffle vector to determine if it is cost effective to do this as 7537 // discrete instructions, or whether we should use a vperm. 7538 // For now, we skip this for little endian until such time as we have a 7539 // little-endian perfect shuffle table. 7540 if (isFourElementShuffle && !isLittleEndian) { 7541 // Compute the index in the perfect shuffle table. 7542 unsigned PFTableIndex = 7543 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 7544 7545 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 7546 unsigned Cost = (PFEntry >> 30); 7547 7548 // Determining when to avoid vperm is tricky. Many things affect the cost 7549 // of vperm, particularly how many times the perm mask needs to be computed. 7550 // For example, if the perm mask can be hoisted out of a loop or is already 7551 // used (perhaps because there are multiple permutes with the same shuffle 7552 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 7553 // the loop requires an extra register. 7554 // 7555 // As a compromise, we only emit discrete instructions if the shuffle can be 7556 // generated in 3 or fewer operations. When we have loop information 7557 // available, if this block is within a loop, we should avoid using vperm 7558 // for 3-operation perms and use a constant pool load instead. 7559 if (Cost < 3) 7560 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 7561 } 7562 7563 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 7564 // vector that will get spilled to the constant pool. 7565 if (V2.isUndef()) V2 = V1; 7566 7567 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 7568 // that it is in input element units, not in bytes. Convert now. 7569 7570 // For little endian, the order of the input vectors is reversed, and 7571 // the permutation mask is complemented with respect to 31. This is 7572 // necessary to produce proper semantics with the big-endian-biased vperm 7573 // instruction. 7574 EVT EltVT = V1.getValueType().getVectorElementType(); 7575 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 7576 7577 SmallVector<SDValue, 16> ResultMask; 7578 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 7579 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 7580 7581 for (unsigned j = 0; j != BytesPerElement; ++j) 7582 if (isLittleEndian) 7583 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), 7584 dl, MVT::i32)); 7585 else 7586 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, 7587 MVT::i32)); 7588 } 7589 7590 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 7591 ResultMask); 7592 if (isLittleEndian) 7593 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7594 V2, V1, VPermMask); 7595 else 7596 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7597 V1, V2, VPermMask); 7598 } 7599 7600 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a 7601 /// vector comparison. If it is, return true and fill in Opc/isDot with 7602 /// information about the intrinsic. 7603 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, 7604 bool &isDot, const PPCSubtarget &Subtarget) { 7605 unsigned IntrinsicID = 7606 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 7607 CompareOpc = -1; 7608 isDot = false; 7609 switch (IntrinsicID) { 7610 default: return false; 7611 // Comparison predicates. 7612 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 7613 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 7614 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 7615 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 7616 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 7617 case Intrinsic::ppc_altivec_vcmpequd_p: 7618 if (Subtarget.hasP8Altivec()) { 7619 CompareOpc = 199; 7620 isDot = 1; 7621 } else 7622 return false; 7623 7624 break; 7625 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 7626 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 7627 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 7628 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 7629 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 7630 case Intrinsic::ppc_altivec_vcmpgtsd_p: 7631 if (Subtarget.hasP8Altivec()) { 7632 CompareOpc = 967; 7633 isDot = 1; 7634 } else 7635 return false; 7636 7637 break; 7638 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 7639 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 7640 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 7641 case Intrinsic::ppc_altivec_vcmpgtud_p: 7642 if (Subtarget.hasP8Altivec()) { 7643 CompareOpc = 711; 7644 isDot = 1; 7645 } else 7646 return false; 7647 7648 break; 7649 // VSX predicate comparisons use the same infrastructure 7650 case Intrinsic::ppc_vsx_xvcmpeqdp_p: 7651 case Intrinsic::ppc_vsx_xvcmpgedp_p: 7652 case Intrinsic::ppc_vsx_xvcmpgtdp_p: 7653 case Intrinsic::ppc_vsx_xvcmpeqsp_p: 7654 case Intrinsic::ppc_vsx_xvcmpgesp_p: 7655 case Intrinsic::ppc_vsx_xvcmpgtsp_p: 7656 if (Subtarget.hasVSX()) { 7657 switch (IntrinsicID) { 7658 case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; 7659 case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; 7660 case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; 7661 case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; 7662 case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; 7663 case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; 7664 } 7665 isDot = 1; 7666 } 7667 else 7668 return false; 7669 7670 break; 7671 7672 // Normal Comparisons. 7673 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 7674 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 7675 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 7676 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 7677 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 7678 case Intrinsic::ppc_altivec_vcmpequd: 7679 if (Subtarget.hasP8Altivec()) { 7680 CompareOpc = 199; 7681 isDot = 0; 7682 } else 7683 return false; 7684 7685 break; 7686 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 7687 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 7688 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 7689 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 7690 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 7691 case Intrinsic::ppc_altivec_vcmpgtsd: 7692 if (Subtarget.hasP8Altivec()) { 7693 CompareOpc = 967; 7694 isDot = 0; 7695 } else 7696 return false; 7697 7698 break; 7699 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 7700 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 7701 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 7702 case Intrinsic::ppc_altivec_vcmpgtud: 7703 if (Subtarget.hasP8Altivec()) { 7704 CompareOpc = 711; 7705 isDot = 0; 7706 } else 7707 return false; 7708 7709 break; 7710 } 7711 return true; 7712 } 7713 7714 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 7715 /// lower, do it, otherwise return null. 7716 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 7717 SelectionDAG &DAG) const { 7718 // If this is a lowered altivec predicate compare, CompareOpc is set to the 7719 // opcode number of the comparison. 7720 SDLoc dl(Op); 7721 int CompareOpc; 7722 bool isDot; 7723 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) 7724 return SDValue(); // Don't custom lower most intrinsics. 7725 7726 // If this is a non-dot comparison, make the VCMP node and we are done. 7727 if (!isDot) { 7728 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 7729 Op.getOperand(1), Op.getOperand(2), 7730 DAG.getConstant(CompareOpc, dl, MVT::i32)); 7731 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 7732 } 7733 7734 // Create the PPCISD altivec 'dot' comparison node. 7735 SDValue Ops[] = { 7736 Op.getOperand(2), // LHS 7737 Op.getOperand(3), // RHS 7738 DAG.getConstant(CompareOpc, dl, MVT::i32) 7739 }; 7740 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 7741 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 7742 7743 // Now that we have the comparison, emit a copy from the CR to a GPR. 7744 // This is flagged to the above dot comparison. 7745 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 7746 DAG.getRegister(PPC::CR6, MVT::i32), 7747 CompNode.getValue(1)); 7748 7749 // Unpack the result based on how the target uses it. 7750 unsigned BitNo; // Bit # of CR6. 7751 bool InvertBit; // Invert result? 7752 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 7753 default: // Can't happen, don't crash on invalid number though. 7754 case 0: // Return the value of the EQ bit of CR6. 7755 BitNo = 0; InvertBit = false; 7756 break; 7757 case 1: // Return the inverted value of the EQ bit of CR6. 7758 BitNo = 0; InvertBit = true; 7759 break; 7760 case 2: // Return the value of the LT bit of CR6. 7761 BitNo = 2; InvertBit = false; 7762 break; 7763 case 3: // Return the inverted value of the LT bit of CR6. 7764 BitNo = 2; InvertBit = true; 7765 break; 7766 } 7767 7768 // Shift the bit into the low position. 7769 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 7770 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); 7771 // Isolate the bit. 7772 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 7773 DAG.getConstant(1, dl, MVT::i32)); 7774 7775 // If we are supposed to, toggle the bit. 7776 if (InvertBit) 7777 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 7778 DAG.getConstant(1, dl, MVT::i32)); 7779 return Flags; 7780 } 7781 7782 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 7783 SelectionDAG &DAG) const { 7784 SDLoc dl(Op); 7785 // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int 7786 // instructions), but for smaller types, we need to first extend up to v2i32 7787 // before doing going farther. 7788 if (Op.getValueType() == MVT::v2i64) { 7789 EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 7790 if (ExtVT != MVT::v2i32) { 7791 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); 7792 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, 7793 DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), 7794 ExtVT.getVectorElementType(), 4))); 7795 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); 7796 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, 7797 DAG.getValueType(MVT::v2i32)); 7798 } 7799 7800 return Op; 7801 } 7802 7803 return SDValue(); 7804 } 7805 7806 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 7807 SelectionDAG &DAG) const { 7808 SDLoc dl(Op); 7809 // Create a stack slot that is 16-byte aligned. 7810 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 7811 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 7812 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7813 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7814 7815 // Store the input value into Value#0 of the stack slot. 7816 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 7817 Op.getOperand(0), FIdx, MachinePointerInfo(), 7818 false, false, 0); 7819 // Load it out. 7820 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 7821 false, false, false, 0); 7822 } 7823 7824 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 7825 SelectionDAG &DAG) const { 7826 SDLoc dl(Op); 7827 SDNode *N = Op.getNode(); 7828 7829 assert(N->getOperand(0).getValueType() == MVT::v4i1 && 7830 "Unknown extract_vector_elt type"); 7831 7832 SDValue Value = N->getOperand(0); 7833 7834 // The first part of this is like the store lowering except that we don't 7835 // need to track the chain. 7836 7837 // The values are now known to be -1 (false) or 1 (true). To convert this 7838 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 7839 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 7840 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 7841 7842 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 7843 // understand how to form the extending load. 7844 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 7845 7846 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 7847 7848 // Now convert to an integer and store. 7849 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 7850 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 7851 Value); 7852 7853 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 7854 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 7855 MachinePointerInfo PtrInfo = 7856 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7857 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7858 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7859 7860 SDValue StoreChain = DAG.getEntryNode(); 7861 SmallVector<SDValue, 2> Ops; 7862 Ops.push_back(StoreChain); 7863 Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32)); 7864 Ops.push_back(Value); 7865 Ops.push_back(FIdx); 7866 7867 SmallVector<EVT, 2> ValueVTs; 7868 ValueVTs.push_back(MVT::Other); // chain 7869 SDVTList VTs = DAG.getVTList(ValueVTs); 7870 7871 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 7872 dl, VTs, Ops, MVT::v4i32, PtrInfo); 7873 7874 // Extract the value requested. 7875 unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 7876 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 7877 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 7878 7879 SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, 7880 PtrInfo.getWithOffset(Offset), 7881 false, false, false, 0); 7882 7883 if (!Subtarget.useCRBits()) 7884 return IntVal; 7885 7886 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); 7887 } 7888 7889 /// Lowering for QPX v4i1 loads 7890 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, 7891 SelectionDAG &DAG) const { 7892 SDLoc dl(Op); 7893 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); 7894 SDValue LoadChain = LN->getChain(); 7895 SDValue BasePtr = LN->getBasePtr(); 7896 7897 if (Op.getValueType() == MVT::v4f64 || 7898 Op.getValueType() == MVT::v4f32) { 7899 EVT MemVT = LN->getMemoryVT(); 7900 unsigned Alignment = LN->getAlignment(); 7901 7902 // If this load is properly aligned, then it is legal. 7903 if (Alignment >= MemVT.getStoreSize()) 7904 return Op; 7905 7906 EVT ScalarVT = Op.getValueType().getScalarType(), 7907 ScalarMemVT = MemVT.getScalarType(); 7908 unsigned Stride = ScalarMemVT.getStoreSize(); 7909 7910 SmallVector<SDValue, 8> Vals, LoadChains; 7911 for (unsigned Idx = 0; Idx < 4; ++Idx) { 7912 SDValue Load; 7913 if (ScalarVT != ScalarMemVT) 7914 Load = 7915 DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, 7916 BasePtr, 7917 LN->getPointerInfo().getWithOffset(Idx*Stride), 7918 ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(), 7919 LN->isInvariant(), MinAlign(Alignment, Idx*Stride), 7920 LN->getAAInfo()); 7921 else 7922 Load = 7923 DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, 7924 LN->getPointerInfo().getWithOffset(Idx*Stride), 7925 LN->isVolatile(), LN->isNonTemporal(), 7926 LN->isInvariant(), MinAlign(Alignment, Idx*Stride), 7927 LN->getAAInfo()); 7928 7929 if (Idx == 0 && LN->isIndexed()) { 7930 assert(LN->getAddressingMode() == ISD::PRE_INC && 7931 "Unknown addressing mode on vector load"); 7932 Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), 7933 LN->getAddressingMode()); 7934 } 7935 7936 Vals.push_back(Load); 7937 LoadChains.push_back(Load.getValue(1)); 7938 7939 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 7940 DAG.getConstant(Stride, dl, 7941 BasePtr.getValueType())); 7942 } 7943 7944 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 7945 SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, 7946 Op.getValueType(), Vals); 7947 7948 if (LN->isIndexed()) { 7949 SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; 7950 return DAG.getMergeValues(RetOps, dl); 7951 } 7952 7953 SDValue RetOps[] = { Value, TF }; 7954 return DAG.getMergeValues(RetOps, dl); 7955 } 7956 7957 assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); 7958 assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); 7959 7960 // To lower v4i1 from a byte array, we load the byte elements of the 7961 // vector and then reuse the BUILD_VECTOR logic. 7962 7963 SmallVector<SDValue, 4> VectElmts, VectElmtChains; 7964 for (unsigned i = 0; i < 4; ++i) { 7965 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 7966 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 7967 7968 VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD, 7969 dl, MVT::i32, LoadChain, Idx, 7970 LN->getPointerInfo().getWithOffset(i), 7971 MVT::i8 /* memory type */, 7972 LN->isVolatile(), LN->isNonTemporal(), 7973 LN->isInvariant(), 7974 1 /* alignment */, LN->getAAInfo())); 7975 VectElmtChains.push_back(VectElmts[i].getValue(1)); 7976 } 7977 7978 LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); 7979 SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts); 7980 7981 SDValue RVals[] = { Value, LoadChain }; 7982 return DAG.getMergeValues(RVals, dl); 7983 } 7984 7985 /// Lowering for QPX v4i1 stores 7986 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, 7987 SelectionDAG &DAG) const { 7988 SDLoc dl(Op); 7989 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode()); 7990 SDValue StoreChain = SN->getChain(); 7991 SDValue BasePtr = SN->getBasePtr(); 7992 SDValue Value = SN->getValue(); 7993 7994 if (Value.getValueType() == MVT::v4f64 || 7995 Value.getValueType() == MVT::v4f32) { 7996 EVT MemVT = SN->getMemoryVT(); 7997 unsigned Alignment = SN->getAlignment(); 7998 7999 // If this store is properly aligned, then it is legal. 8000 if (Alignment >= MemVT.getStoreSize()) 8001 return Op; 8002 8003 EVT ScalarVT = Value.getValueType().getScalarType(), 8004 ScalarMemVT = MemVT.getScalarType(); 8005 unsigned Stride = ScalarMemVT.getStoreSize(); 8006 8007 SmallVector<SDValue, 8> Stores; 8008 for (unsigned Idx = 0; Idx < 4; ++Idx) { 8009 SDValue Ex = DAG.getNode( 8010 ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, 8011 DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); 8012 SDValue Store; 8013 if (ScalarVT != ScalarMemVT) 8014 Store = 8015 DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, 8016 SN->getPointerInfo().getWithOffset(Idx*Stride), 8017 ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(), 8018 MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); 8019 else 8020 Store = 8021 DAG.getStore(StoreChain, dl, Ex, BasePtr, 8022 SN->getPointerInfo().getWithOffset(Idx*Stride), 8023 SN->isVolatile(), SN->isNonTemporal(), 8024 MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); 8025 8026 if (Idx == 0 && SN->isIndexed()) { 8027 assert(SN->getAddressingMode() == ISD::PRE_INC && 8028 "Unknown addressing mode on vector store"); 8029 Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), 8030 SN->getAddressingMode()); 8031 } 8032 8033 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 8034 DAG.getConstant(Stride, dl, 8035 BasePtr.getValueType())); 8036 Stores.push_back(Store); 8037 } 8038 8039 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 8040 8041 if (SN->isIndexed()) { 8042 SDValue RetOps[] = { TF, Stores[0].getValue(1) }; 8043 return DAG.getMergeValues(RetOps, dl); 8044 } 8045 8046 return TF; 8047 } 8048 8049 assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); 8050 assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); 8051 8052 // The values are now known to be -1 (false) or 1 (true). To convert this 8053 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 8054 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 8055 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 8056 8057 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 8058 // understand how to form the extending load. 8059 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 8060 8061 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 8062 8063 // Now convert to an integer and store. 8064 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 8065 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 8066 Value); 8067 8068 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 8069 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 8070 MachinePointerInfo PtrInfo = 8071 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 8072 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8073 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 8074 8075 SmallVector<SDValue, 2> Ops; 8076 Ops.push_back(StoreChain); 8077 Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32)); 8078 Ops.push_back(Value); 8079 Ops.push_back(FIdx); 8080 8081 SmallVector<EVT, 2> ValueVTs; 8082 ValueVTs.push_back(MVT::Other); // chain 8083 SDVTList VTs = DAG.getVTList(ValueVTs); 8084 8085 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 8086 dl, VTs, Ops, MVT::v4i32, PtrInfo); 8087 8088 // Move data into the byte array. 8089 SmallVector<SDValue, 4> Loads, LoadChains; 8090 for (unsigned i = 0; i < 4; ++i) { 8091 unsigned Offset = 4*i; 8092 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 8093 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 8094 8095 Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx, 8096 PtrInfo.getWithOffset(Offset), 8097 false, false, false, 0)); 8098 LoadChains.push_back(Loads[i].getValue(1)); 8099 } 8100 8101 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 8102 8103 SmallVector<SDValue, 4> Stores; 8104 for (unsigned i = 0; i < 4; ++i) { 8105 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 8106 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 8107 8108 Stores.push_back(DAG.getTruncStore( 8109 StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), 8110 MVT::i8 /* memory type */, SN->isNonTemporal(), SN->isVolatile(), 8111 1 /* alignment */, SN->getAAInfo())); 8112 } 8113 8114 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 8115 8116 return StoreChain; 8117 } 8118 8119 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 8120 SDLoc dl(Op); 8121 if (Op.getValueType() == MVT::v4i32) { 8122 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8123 8124 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 8125 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 8126 8127 SDValue RHSSwap = // = vrlw RHS, 16 8128 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 8129 8130 // Shrinkify inputs to v8i16. 8131 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 8132 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 8133 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 8134 8135 // Low parts multiplied together, generating 32-bit results (we ignore the 8136 // top parts). 8137 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 8138 LHS, RHS, DAG, dl, MVT::v4i32); 8139 8140 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 8141 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 8142 // Shift the high parts up 16 bits. 8143 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 8144 Neg16, DAG, dl); 8145 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 8146 } else if (Op.getValueType() == MVT::v8i16) { 8147 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8148 8149 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 8150 8151 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 8152 LHS, RHS, Zero, DAG, dl); 8153 } else if (Op.getValueType() == MVT::v16i8) { 8154 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8155 bool isLittleEndian = Subtarget.isLittleEndian(); 8156 8157 // Multiply the even 8-bit parts, producing 16-bit sums. 8158 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 8159 LHS, RHS, DAG, dl, MVT::v8i16); 8160 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 8161 8162 // Multiply the odd 8-bit parts, producing 16-bit sums. 8163 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 8164 LHS, RHS, DAG, dl, MVT::v8i16); 8165 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 8166 8167 // Merge the results together. Because vmuleub and vmuloub are 8168 // instructions with a big-endian bias, we must reverse the 8169 // element numbering and reverse the meaning of "odd" and "even" 8170 // when generating little endian code. 8171 int Ops[16]; 8172 for (unsigned i = 0; i != 8; ++i) { 8173 if (isLittleEndian) { 8174 Ops[i*2 ] = 2*i; 8175 Ops[i*2+1] = 2*i+16; 8176 } else { 8177 Ops[i*2 ] = 2*i+1; 8178 Ops[i*2+1] = 2*i+1+16; 8179 } 8180 } 8181 if (isLittleEndian) 8182 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 8183 else 8184 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 8185 } else { 8186 llvm_unreachable("Unknown mul to lower!"); 8187 } 8188 } 8189 8190 /// LowerOperation - Provide custom lowering hooks for some operations. 8191 /// 8192 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 8193 switch (Op.getOpcode()) { 8194 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 8195 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 8196 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 8197 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 8198 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 8199 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 8200 case ISD::SETCC: return LowerSETCC(Op, DAG); 8201 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 8202 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 8203 case ISD::VASTART: 8204 return LowerVASTART(Op, DAG, Subtarget); 8205 8206 case ISD::VAARG: 8207 return LowerVAARG(Op, DAG, Subtarget); 8208 8209 case ISD::VACOPY: 8210 return LowerVACOPY(Op, DAG, Subtarget); 8211 8212 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget); 8213 case ISD::DYNAMIC_STACKALLOC: 8214 return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); 8215 case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG, Subtarget); 8216 8217 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 8218 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 8219 8220 case ISD::LOAD: return LowerLOAD(Op, DAG); 8221 case ISD::STORE: return LowerSTORE(Op, DAG); 8222 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 8223 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 8224 case ISD::FP_TO_UINT: 8225 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 8226 SDLoc(Op)); 8227 case ISD::UINT_TO_FP: 8228 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 8229 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 8230 8231 // Lower 64-bit shifts. 8232 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 8233 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 8234 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 8235 8236 // Vector-related lowering. 8237 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 8238 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 8239 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 8240 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 8241 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 8242 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 8243 case ISD::MUL: return LowerMUL(Op, DAG); 8244 8245 // For counter-based loop handling. 8246 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 8247 8248 // Frame & Return address. 8249 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 8250 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 8251 } 8252 } 8253 8254 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 8255 SmallVectorImpl<SDValue>&Results, 8256 SelectionDAG &DAG) const { 8257 SDLoc dl(N); 8258 switch (N->getOpcode()) { 8259 default: 8260 llvm_unreachable("Do not know how to custom type legalize this operation!"); 8261 case ISD::READCYCLECOUNTER: { 8262 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 8263 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); 8264 8265 Results.push_back(RTB); 8266 Results.push_back(RTB.getValue(1)); 8267 Results.push_back(RTB.getValue(2)); 8268 break; 8269 } 8270 case ISD::INTRINSIC_W_CHAIN: { 8271 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 8272 Intrinsic::ppc_is_decremented_ctr_nonzero) 8273 break; 8274 8275 assert(N->getValueType(0) == MVT::i1 && 8276 "Unexpected result type for CTR decrement intrinsic"); 8277 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), 8278 N->getValueType(0)); 8279 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 8280 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 8281 N->getOperand(1)); 8282 8283 Results.push_back(NewInt); 8284 Results.push_back(NewInt.getValue(1)); 8285 break; 8286 } 8287 case ISD::VAARG: { 8288 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) 8289 return; 8290 8291 EVT VT = N->getValueType(0); 8292 8293 if (VT == MVT::i64) { 8294 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget); 8295 8296 Results.push_back(NewNode); 8297 Results.push_back(NewNode.getValue(1)); 8298 } 8299 return; 8300 } 8301 case ISD::FP_ROUND_INREG: { 8302 assert(N->getValueType(0) == MVT::ppcf128); 8303 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 8304 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 8305 MVT::f64, N->getOperand(0), 8306 DAG.getIntPtrConstant(0, dl)); 8307 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 8308 MVT::f64, N->getOperand(0), 8309 DAG.getIntPtrConstant(1, dl)); 8310 8311 // Add the two halves of the long double in round-to-zero mode. 8312 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 8313 8314 // We know the low half is about to be thrown away, so just use something 8315 // convenient. 8316 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 8317 FPreg, FPreg)); 8318 return; 8319 } 8320 case ISD::FP_TO_SINT: 8321 case ISD::FP_TO_UINT: 8322 // LowerFP_TO_INT() can only handle f32 and f64. 8323 if (N->getOperand(0).getValueType() == MVT::ppcf128) 8324 return; 8325 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 8326 return; 8327 } 8328 } 8329 8330 //===----------------------------------------------------------------------===// 8331 // Other Lowering Code 8332 //===----------------------------------------------------------------------===// 8333 8334 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { 8335 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 8336 Function *Func = Intrinsic::getDeclaration(M, Id); 8337 return Builder.CreateCall(Func, {}); 8338 } 8339 8340 // The mappings for emitLeading/TrailingFence is taken from 8341 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 8342 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 8343 AtomicOrdering Ord, bool IsStore, 8344 bool IsLoad) const { 8345 if (Ord == AtomicOrdering::SequentiallyConsistent) 8346 return callIntrinsic(Builder, Intrinsic::ppc_sync); 8347 if (isReleaseOrStronger(Ord)) 8348 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 8349 return nullptr; 8350 } 8351 8352 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 8353 AtomicOrdering Ord, bool IsStore, 8354 bool IsLoad) const { 8355 if (IsLoad && isAcquireOrStronger(Ord)) 8356 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 8357 // FIXME: this is too conservative, a dependent branch + isync is enough. 8358 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and 8359 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 8360 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. 8361 return nullptr; 8362 } 8363 8364 MachineBasicBlock * 8365 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 8366 unsigned AtomicSize, 8367 unsigned BinOpcode) const { 8368 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 8369 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8370 8371 auto LoadMnemonic = PPC::LDARX; 8372 auto StoreMnemonic = PPC::STDCX; 8373 switch (AtomicSize) { 8374 default: 8375 llvm_unreachable("Unexpected size of atomic entity"); 8376 case 1: 8377 LoadMnemonic = PPC::LBARX; 8378 StoreMnemonic = PPC::STBCX; 8379 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 8380 break; 8381 case 2: 8382 LoadMnemonic = PPC::LHARX; 8383 StoreMnemonic = PPC::STHCX; 8384 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 8385 break; 8386 case 4: 8387 LoadMnemonic = PPC::LWARX; 8388 StoreMnemonic = PPC::STWCX; 8389 break; 8390 case 8: 8391 LoadMnemonic = PPC::LDARX; 8392 StoreMnemonic = PPC::STDCX; 8393 break; 8394 } 8395 8396 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8397 MachineFunction *F = BB->getParent(); 8398 MachineFunction::iterator It = ++BB->getIterator(); 8399 8400 unsigned dest = MI->getOperand(0).getReg(); 8401 unsigned ptrA = MI->getOperand(1).getReg(); 8402 unsigned ptrB = MI->getOperand(2).getReg(); 8403 unsigned incr = MI->getOperand(3).getReg(); 8404 DebugLoc dl = MI->getDebugLoc(); 8405 8406 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 8407 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8408 F->insert(It, loopMBB); 8409 F->insert(It, exitMBB); 8410 exitMBB->splice(exitMBB->begin(), BB, 8411 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8412 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8413 8414 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8415 unsigned TmpReg = (!BinOpcode) ? incr : 8416 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass 8417 : &PPC::GPRCRegClass); 8418 8419 // thisMBB: 8420 // ... 8421 // fallthrough --> loopMBB 8422 BB->addSuccessor(loopMBB); 8423 8424 // loopMBB: 8425 // l[wd]arx dest, ptr 8426 // add r0, dest, incr 8427 // st[wd]cx. r0, ptr 8428 // bne- loopMBB 8429 // fallthrough --> exitMBB 8430 BB = loopMBB; 8431 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 8432 .addReg(ptrA).addReg(ptrB); 8433 if (BinOpcode) 8434 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 8435 BuildMI(BB, dl, TII->get(StoreMnemonic)) 8436 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 8437 BuildMI(BB, dl, TII->get(PPC::BCC)) 8438 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 8439 BB->addSuccessor(loopMBB); 8440 BB->addSuccessor(exitMBB); 8441 8442 // exitMBB: 8443 // ... 8444 BB = exitMBB; 8445 return BB; 8446 } 8447 8448 MachineBasicBlock * 8449 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 8450 MachineBasicBlock *BB, 8451 bool is8bit, // operation 8452 unsigned BinOpcode) const { 8453 // If we support part-word atomic mnemonics, just use them 8454 if (Subtarget.hasPartwordAtomics()) 8455 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); 8456 8457 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 8458 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8459 // In 64 bit mode we have to use 64 bits for addresses, even though the 8460 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 8461 // registers without caring whether they're 32 or 64, but here we're 8462 // doing actual arithmetic on the addresses. 8463 bool is64bit = Subtarget.isPPC64(); 8464 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 8465 8466 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8467 MachineFunction *F = BB->getParent(); 8468 MachineFunction::iterator It = ++BB->getIterator(); 8469 8470 unsigned dest = MI->getOperand(0).getReg(); 8471 unsigned ptrA = MI->getOperand(1).getReg(); 8472 unsigned ptrB = MI->getOperand(2).getReg(); 8473 unsigned incr = MI->getOperand(3).getReg(); 8474 DebugLoc dl = MI->getDebugLoc(); 8475 8476 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 8477 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8478 F->insert(It, loopMBB); 8479 F->insert(It, exitMBB); 8480 exitMBB->splice(exitMBB->begin(), BB, 8481 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8482 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8483 8484 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8485 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 8486 : &PPC::GPRCRegClass; 8487 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 8488 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 8489 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 8490 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 8491 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 8492 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 8493 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 8494 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 8495 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 8496 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 8497 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 8498 unsigned Ptr1Reg; 8499 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 8500 8501 // thisMBB: 8502 // ... 8503 // fallthrough --> loopMBB 8504 BB->addSuccessor(loopMBB); 8505 8506 // The 4-byte load must be aligned, while a char or short may be 8507 // anywhere in the word. Hence all this nasty bookkeeping code. 8508 // add ptr1, ptrA, ptrB [copy if ptrA==0] 8509 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 8510 // xori shift, shift1, 24 [16] 8511 // rlwinm ptr, ptr1, 0, 0, 29 8512 // slw incr2, incr, shift 8513 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 8514 // slw mask, mask2, shift 8515 // loopMBB: 8516 // lwarx tmpDest, ptr 8517 // add tmp, tmpDest, incr2 8518 // andc tmp2, tmpDest, mask 8519 // and tmp3, tmp, mask 8520 // or tmp4, tmp3, tmp2 8521 // stwcx. tmp4, ptr 8522 // bne- loopMBB 8523 // fallthrough --> exitMBB 8524 // srw dest, tmpDest, shift 8525 if (ptrA != ZeroReg) { 8526 Ptr1Reg = RegInfo.createVirtualRegister(RC); 8527 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 8528 .addReg(ptrA).addReg(ptrB); 8529 } else { 8530 Ptr1Reg = ptrB; 8531 } 8532 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 8533 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 8534 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 8535 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 8536 if (is64bit) 8537 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 8538 .addReg(Ptr1Reg).addImm(0).addImm(61); 8539 else 8540 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 8541 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 8542 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 8543 .addReg(incr).addReg(ShiftReg); 8544 if (is8bit) 8545 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 8546 else { 8547 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 8548 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 8549 } 8550 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 8551 .addReg(Mask2Reg).addReg(ShiftReg); 8552 8553 BB = loopMBB; 8554 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 8555 .addReg(ZeroReg).addReg(PtrReg); 8556 if (BinOpcode) 8557 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 8558 .addReg(Incr2Reg).addReg(TmpDestReg); 8559 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 8560 .addReg(TmpDestReg).addReg(MaskReg); 8561 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 8562 .addReg(TmpReg).addReg(MaskReg); 8563 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 8564 .addReg(Tmp3Reg).addReg(Tmp2Reg); 8565 BuildMI(BB, dl, TII->get(PPC::STWCX)) 8566 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 8567 BuildMI(BB, dl, TII->get(PPC::BCC)) 8568 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 8569 BB->addSuccessor(loopMBB); 8570 BB->addSuccessor(exitMBB); 8571 8572 // exitMBB: 8573 // ... 8574 BB = exitMBB; 8575 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 8576 .addReg(ShiftReg); 8577 return BB; 8578 } 8579 8580 llvm::MachineBasicBlock* 8581 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, 8582 MachineBasicBlock *MBB) const { 8583 DebugLoc DL = MI->getDebugLoc(); 8584 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8585 8586 MachineFunction *MF = MBB->getParent(); 8587 MachineRegisterInfo &MRI = MF->getRegInfo(); 8588 8589 const BasicBlock *BB = MBB->getBasicBlock(); 8590 MachineFunction::iterator I = ++MBB->getIterator(); 8591 8592 // Memory Reference 8593 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 8594 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 8595 8596 unsigned DstReg = MI->getOperand(0).getReg(); 8597 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 8598 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 8599 unsigned mainDstReg = MRI.createVirtualRegister(RC); 8600 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 8601 8602 MVT PVT = getPointerTy(MF->getDataLayout()); 8603 assert((PVT == MVT::i64 || PVT == MVT::i32) && 8604 "Invalid Pointer Size!"); 8605 // For v = setjmp(buf), we generate 8606 // 8607 // thisMBB: 8608 // SjLjSetup mainMBB 8609 // bl mainMBB 8610 // v_restore = 1 8611 // b sinkMBB 8612 // 8613 // mainMBB: 8614 // buf[LabelOffset] = LR 8615 // v_main = 0 8616 // 8617 // sinkMBB: 8618 // v = phi(main, restore) 8619 // 8620 8621 MachineBasicBlock *thisMBB = MBB; 8622 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 8623 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 8624 MF->insert(I, mainMBB); 8625 MF->insert(I, sinkMBB); 8626 8627 MachineInstrBuilder MIB; 8628 8629 // Transfer the remainder of BB and its successor edges to sinkMBB. 8630 sinkMBB->splice(sinkMBB->begin(), MBB, 8631 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 8632 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 8633 8634 // Note that the structure of the jmp_buf used here is not compatible 8635 // with that used by libc, and is not designed to be. Specifically, it 8636 // stores only those 'reserved' registers that LLVM does not otherwise 8637 // understand how to spill. Also, by convention, by the time this 8638 // intrinsic is called, Clang has already stored the frame address in the 8639 // first slot of the buffer and stack address in the third. Following the 8640 // X86 target code, we'll store the jump address in the second slot. We also 8641 // need to save the TOC pointer (R2) to handle jumps between shared 8642 // libraries, and that will be stored in the fourth slot. The thread 8643 // identifier (R13) is not affected. 8644 8645 // thisMBB: 8646 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 8647 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 8648 const int64_t BPOffset = 4 * PVT.getStoreSize(); 8649 8650 // Prepare IP either in reg. 8651 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 8652 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 8653 unsigned BufReg = MI->getOperand(1).getReg(); 8654 8655 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 8656 setUsesTOCBasePtr(*MBB->getParent()); 8657 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 8658 .addReg(PPC::X2) 8659 .addImm(TOCOffset) 8660 .addReg(BufReg); 8661 MIB.setMemRefs(MMOBegin, MMOEnd); 8662 } 8663 8664 // Naked functions never have a base pointer, and so we use r1. For all 8665 // other functions, this decision must be delayed until during PEI. 8666 unsigned BaseReg; 8667 if (MF->getFunction()->hasFnAttribute(Attribute::Naked)) 8668 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 8669 else 8670 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 8671 8672 MIB = BuildMI(*thisMBB, MI, DL, 8673 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 8674 .addReg(BaseReg) 8675 .addImm(BPOffset) 8676 .addReg(BufReg); 8677 MIB.setMemRefs(MMOBegin, MMOEnd); 8678 8679 // Setup 8680 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 8681 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 8682 MIB.addRegMask(TRI->getNoPreservedMask()); 8683 8684 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 8685 8686 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 8687 .addMBB(mainMBB); 8688 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 8689 8690 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); 8691 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); 8692 8693 // mainMBB: 8694 // mainDstReg = 0 8695 MIB = 8696 BuildMI(mainMBB, DL, 8697 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 8698 8699 // Store IP 8700 if (Subtarget.isPPC64()) { 8701 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 8702 .addReg(LabelReg) 8703 .addImm(LabelOffset) 8704 .addReg(BufReg); 8705 } else { 8706 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 8707 .addReg(LabelReg) 8708 .addImm(LabelOffset) 8709 .addReg(BufReg); 8710 } 8711 8712 MIB.setMemRefs(MMOBegin, MMOEnd); 8713 8714 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 8715 mainMBB->addSuccessor(sinkMBB); 8716 8717 // sinkMBB: 8718 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 8719 TII->get(PPC::PHI), DstReg) 8720 .addReg(mainDstReg).addMBB(mainMBB) 8721 .addReg(restoreDstReg).addMBB(thisMBB); 8722 8723 MI->eraseFromParent(); 8724 return sinkMBB; 8725 } 8726 8727 MachineBasicBlock * 8728 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, 8729 MachineBasicBlock *MBB) const { 8730 DebugLoc DL = MI->getDebugLoc(); 8731 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8732 8733 MachineFunction *MF = MBB->getParent(); 8734 MachineRegisterInfo &MRI = MF->getRegInfo(); 8735 8736 // Memory Reference 8737 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 8738 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 8739 8740 MVT PVT = getPointerTy(MF->getDataLayout()); 8741 assert((PVT == MVT::i64 || PVT == MVT::i32) && 8742 "Invalid Pointer Size!"); 8743 8744 const TargetRegisterClass *RC = 8745 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 8746 unsigned Tmp = MRI.createVirtualRegister(RC); 8747 // Since FP is only updated here but NOT referenced, it's treated as GPR. 8748 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 8749 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 8750 unsigned BP = 8751 (PVT == MVT::i64) 8752 ? PPC::X30 8753 : (Subtarget.isSVR4ABI() && 8754 MF->getTarget().getRelocationModel() == Reloc::PIC_ 8755 ? PPC::R29 8756 : PPC::R30); 8757 8758 MachineInstrBuilder MIB; 8759 8760 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 8761 const int64_t SPOffset = 2 * PVT.getStoreSize(); 8762 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 8763 const int64_t BPOffset = 4 * PVT.getStoreSize(); 8764 8765 unsigned BufReg = MI->getOperand(0).getReg(); 8766 8767 // Reload FP (the jumped-to function may not have had a 8768 // frame pointer, and if so, then its r31 will be restored 8769 // as necessary). 8770 if (PVT == MVT::i64) { 8771 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 8772 .addImm(0) 8773 .addReg(BufReg); 8774 } else { 8775 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 8776 .addImm(0) 8777 .addReg(BufReg); 8778 } 8779 MIB.setMemRefs(MMOBegin, MMOEnd); 8780 8781 // Reload IP 8782 if (PVT == MVT::i64) { 8783 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 8784 .addImm(LabelOffset) 8785 .addReg(BufReg); 8786 } else { 8787 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 8788 .addImm(LabelOffset) 8789 .addReg(BufReg); 8790 } 8791 MIB.setMemRefs(MMOBegin, MMOEnd); 8792 8793 // Reload SP 8794 if (PVT == MVT::i64) { 8795 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 8796 .addImm(SPOffset) 8797 .addReg(BufReg); 8798 } else { 8799 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 8800 .addImm(SPOffset) 8801 .addReg(BufReg); 8802 } 8803 MIB.setMemRefs(MMOBegin, MMOEnd); 8804 8805 // Reload BP 8806 if (PVT == MVT::i64) { 8807 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 8808 .addImm(BPOffset) 8809 .addReg(BufReg); 8810 } else { 8811 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 8812 .addImm(BPOffset) 8813 .addReg(BufReg); 8814 } 8815 MIB.setMemRefs(MMOBegin, MMOEnd); 8816 8817 // Reload TOC 8818 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 8819 setUsesTOCBasePtr(*MBB->getParent()); 8820 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 8821 .addImm(TOCOffset) 8822 .addReg(BufReg); 8823 8824 MIB.setMemRefs(MMOBegin, MMOEnd); 8825 } 8826 8827 // Jump 8828 BuildMI(*MBB, MI, DL, 8829 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 8830 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 8831 8832 MI->eraseFromParent(); 8833 return MBB; 8834 } 8835 8836 MachineBasicBlock * 8837 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 8838 MachineBasicBlock *BB) const { 8839 if (MI->getOpcode() == TargetOpcode::STACKMAP || 8840 MI->getOpcode() == TargetOpcode::PATCHPOINT) { 8841 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && 8842 MI->getOpcode() == TargetOpcode::PATCHPOINT) { 8843 // Call lowering should have added an r2 operand to indicate a dependence 8844 // on the TOC base pointer value. It can't however, because there is no 8845 // way to mark the dependence as implicit there, and so the stackmap code 8846 // will confuse it with a regular operand. Instead, add the dependence 8847 // here. 8848 setUsesTOCBasePtr(*BB->getParent()); 8849 MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); 8850 } 8851 8852 return emitPatchPoint(MI, BB); 8853 } 8854 8855 if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || 8856 MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { 8857 return emitEHSjLjSetJmp(MI, BB); 8858 } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || 8859 MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { 8860 return emitEHSjLjLongJmp(MI, BB); 8861 } 8862 8863 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8864 8865 // To "insert" these instructions we actually have to insert their 8866 // control-flow patterns. 8867 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8868 MachineFunction::iterator It = ++BB->getIterator(); 8869 8870 MachineFunction *F = BB->getParent(); 8871 8872 if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || 8873 MI->getOpcode() == PPC::SELECT_CC_I8 || 8874 MI->getOpcode() == PPC::SELECT_I4 || 8875 MI->getOpcode() == PPC::SELECT_I8)) { 8876 SmallVector<MachineOperand, 2> Cond; 8877 if (MI->getOpcode() == PPC::SELECT_CC_I4 || 8878 MI->getOpcode() == PPC::SELECT_CC_I8) 8879 Cond.push_back(MI->getOperand(4)); 8880 else 8881 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 8882 Cond.push_back(MI->getOperand(1)); 8883 8884 DebugLoc dl = MI->getDebugLoc(); 8885 TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), 8886 Cond, MI->getOperand(2).getReg(), 8887 MI->getOperand(3).getReg()); 8888 } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || 8889 MI->getOpcode() == PPC::SELECT_CC_I8 || 8890 MI->getOpcode() == PPC::SELECT_CC_F4 || 8891 MI->getOpcode() == PPC::SELECT_CC_F8 || 8892 MI->getOpcode() == PPC::SELECT_CC_QFRC || 8893 MI->getOpcode() == PPC::SELECT_CC_QSRC || 8894 MI->getOpcode() == PPC::SELECT_CC_QBRC || 8895 MI->getOpcode() == PPC::SELECT_CC_VRRC || 8896 MI->getOpcode() == PPC::SELECT_CC_VSFRC || 8897 MI->getOpcode() == PPC::SELECT_CC_VSSRC || 8898 MI->getOpcode() == PPC::SELECT_CC_VSRC || 8899 MI->getOpcode() == PPC::SELECT_I4 || 8900 MI->getOpcode() == PPC::SELECT_I8 || 8901 MI->getOpcode() == PPC::SELECT_F4 || 8902 MI->getOpcode() == PPC::SELECT_F8 || 8903 MI->getOpcode() == PPC::SELECT_QFRC || 8904 MI->getOpcode() == PPC::SELECT_QSRC || 8905 MI->getOpcode() == PPC::SELECT_QBRC || 8906 MI->getOpcode() == PPC::SELECT_VRRC || 8907 MI->getOpcode() == PPC::SELECT_VSFRC || 8908 MI->getOpcode() == PPC::SELECT_VSSRC || 8909 MI->getOpcode() == PPC::SELECT_VSRC) { 8910 // The incoming instruction knows the destination vreg to set, the 8911 // condition code register to branch on, the true/false values to 8912 // select between, and a branch opcode to use. 8913 8914 // thisMBB: 8915 // ... 8916 // TrueVal = ... 8917 // cmpTY ccX, r1, r2 8918 // bCC copy1MBB 8919 // fallthrough --> copy0MBB 8920 MachineBasicBlock *thisMBB = BB; 8921 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 8922 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 8923 DebugLoc dl = MI->getDebugLoc(); 8924 F->insert(It, copy0MBB); 8925 F->insert(It, sinkMBB); 8926 8927 // Transfer the remainder of BB and its successor edges to sinkMBB. 8928 sinkMBB->splice(sinkMBB->begin(), BB, 8929 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8930 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 8931 8932 // Next, add the true and fallthrough blocks as its successors. 8933 BB->addSuccessor(copy0MBB); 8934 BB->addSuccessor(sinkMBB); 8935 8936 if (MI->getOpcode() == PPC::SELECT_I4 || 8937 MI->getOpcode() == PPC::SELECT_I8 || 8938 MI->getOpcode() == PPC::SELECT_F4 || 8939 MI->getOpcode() == PPC::SELECT_F8 || 8940 MI->getOpcode() == PPC::SELECT_QFRC || 8941 MI->getOpcode() == PPC::SELECT_QSRC || 8942 MI->getOpcode() == PPC::SELECT_QBRC || 8943 MI->getOpcode() == PPC::SELECT_VRRC || 8944 MI->getOpcode() == PPC::SELECT_VSFRC || 8945 MI->getOpcode() == PPC::SELECT_VSSRC || 8946 MI->getOpcode() == PPC::SELECT_VSRC) { 8947 BuildMI(BB, dl, TII->get(PPC::BC)) 8948 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 8949 } else { 8950 unsigned SelectPred = MI->getOperand(4).getImm(); 8951 BuildMI(BB, dl, TII->get(PPC::BCC)) 8952 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 8953 } 8954 8955 // copy0MBB: 8956 // %FalseValue = ... 8957 // # fallthrough to sinkMBB 8958 BB = copy0MBB; 8959 8960 // Update machine-CFG edges 8961 BB->addSuccessor(sinkMBB); 8962 8963 // sinkMBB: 8964 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 8965 // ... 8966 BB = sinkMBB; 8967 BuildMI(*BB, BB->begin(), dl, 8968 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 8969 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 8970 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 8971 } else if (MI->getOpcode() == PPC::ReadTB) { 8972 // To read the 64-bit time-base register on a 32-bit target, we read the 8973 // two halves. Should the counter have wrapped while it was being read, we 8974 // need to try again. 8975 // ... 8976 // readLoop: 8977 // mfspr Rx,TBU # load from TBU 8978 // mfspr Ry,TB # load from TB 8979 // mfspr Rz,TBU # load from TBU 8980 // cmpw crX,Rx,Rz # check if 'old'='new' 8981 // bne readLoop # branch if they're not equal 8982 // ... 8983 8984 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); 8985 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 8986 DebugLoc dl = MI->getDebugLoc(); 8987 F->insert(It, readMBB); 8988 F->insert(It, sinkMBB); 8989 8990 // Transfer the remainder of BB and its successor edges to sinkMBB. 8991 sinkMBB->splice(sinkMBB->begin(), BB, 8992 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8993 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 8994 8995 BB->addSuccessor(readMBB); 8996 BB = readMBB; 8997 8998 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8999 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 9000 unsigned LoReg = MI->getOperand(0).getReg(); 9001 unsigned HiReg = MI->getOperand(1).getReg(); 9002 9003 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); 9004 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); 9005 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); 9006 9007 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 9008 9009 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) 9010 .addReg(HiReg).addReg(ReadAgainReg); 9011 BuildMI(BB, dl, TII->get(PPC::BCC)) 9012 .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); 9013 9014 BB->addSuccessor(readMBB); 9015 BB->addSuccessor(sinkMBB); 9016 } 9017 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 9018 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 9019 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 9020 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 9021 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 9022 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); 9023 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 9024 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); 9025 9026 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 9027 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 9028 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 9029 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 9030 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 9031 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); 9032 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 9033 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); 9034 9035 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 9036 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 9037 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 9038 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 9039 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 9040 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); 9041 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 9042 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); 9043 9044 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 9045 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 9046 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 9047 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 9048 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 9049 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); 9050 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 9051 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); 9052 9053 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 9054 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 9055 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 9056 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 9057 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 9058 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); 9059 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 9060 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); 9061 9062 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 9063 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 9064 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 9065 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 9066 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 9067 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); 9068 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 9069 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); 9070 9071 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 9072 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 9073 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 9074 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 9075 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 9076 BB = EmitAtomicBinary(MI, BB, 4, 0); 9077 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 9078 BB = EmitAtomicBinary(MI, BB, 8, 0); 9079 9080 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 9081 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || 9082 (Subtarget.hasPartwordAtomics() && 9083 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || 9084 (Subtarget.hasPartwordAtomics() && 9085 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { 9086 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 9087 9088 auto LoadMnemonic = PPC::LDARX; 9089 auto StoreMnemonic = PPC::STDCX; 9090 switch(MI->getOpcode()) { 9091 default: 9092 llvm_unreachable("Compare and swap of unknown size"); 9093 case PPC::ATOMIC_CMP_SWAP_I8: 9094 LoadMnemonic = PPC::LBARX; 9095 StoreMnemonic = PPC::STBCX; 9096 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 9097 break; 9098 case PPC::ATOMIC_CMP_SWAP_I16: 9099 LoadMnemonic = PPC::LHARX; 9100 StoreMnemonic = PPC::STHCX; 9101 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 9102 break; 9103 case PPC::ATOMIC_CMP_SWAP_I32: 9104 LoadMnemonic = PPC::LWARX; 9105 StoreMnemonic = PPC::STWCX; 9106 break; 9107 case PPC::ATOMIC_CMP_SWAP_I64: 9108 LoadMnemonic = PPC::LDARX; 9109 StoreMnemonic = PPC::STDCX; 9110 break; 9111 } 9112 unsigned dest = MI->getOperand(0).getReg(); 9113 unsigned ptrA = MI->getOperand(1).getReg(); 9114 unsigned ptrB = MI->getOperand(2).getReg(); 9115 unsigned oldval = MI->getOperand(3).getReg(); 9116 unsigned newval = MI->getOperand(4).getReg(); 9117 DebugLoc dl = MI->getDebugLoc(); 9118 9119 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 9120 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 9121 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 9122 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9123 F->insert(It, loop1MBB); 9124 F->insert(It, loop2MBB); 9125 F->insert(It, midMBB); 9126 F->insert(It, exitMBB); 9127 exitMBB->splice(exitMBB->begin(), BB, 9128 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9129 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9130 9131 // thisMBB: 9132 // ... 9133 // fallthrough --> loopMBB 9134 BB->addSuccessor(loop1MBB); 9135 9136 // loop1MBB: 9137 // l[bhwd]arx dest, ptr 9138 // cmp[wd] dest, oldval 9139 // bne- midMBB 9140 // loop2MBB: 9141 // st[bhwd]cx. newval, ptr 9142 // bne- loopMBB 9143 // b exitBB 9144 // midMBB: 9145 // st[bhwd]cx. dest, ptr 9146 // exitBB: 9147 BB = loop1MBB; 9148 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 9149 .addReg(ptrA).addReg(ptrB); 9150 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 9151 .addReg(oldval).addReg(dest); 9152 BuildMI(BB, dl, TII->get(PPC::BCC)) 9153 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 9154 BB->addSuccessor(loop2MBB); 9155 BB->addSuccessor(midMBB); 9156 9157 BB = loop2MBB; 9158 BuildMI(BB, dl, TII->get(StoreMnemonic)) 9159 .addReg(newval).addReg(ptrA).addReg(ptrB); 9160 BuildMI(BB, dl, TII->get(PPC::BCC)) 9161 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 9162 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 9163 BB->addSuccessor(loop1MBB); 9164 BB->addSuccessor(exitMBB); 9165 9166 BB = midMBB; 9167 BuildMI(BB, dl, TII->get(StoreMnemonic)) 9168 .addReg(dest).addReg(ptrA).addReg(ptrB); 9169 BB->addSuccessor(exitMBB); 9170 9171 // exitMBB: 9172 // ... 9173 BB = exitMBB; 9174 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 9175 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 9176 // We must use 64-bit registers for addresses when targeting 64-bit, 9177 // since we're actually doing arithmetic on them. Other registers 9178 // can be 32-bit. 9179 bool is64bit = Subtarget.isPPC64(); 9180 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 9181 9182 unsigned dest = MI->getOperand(0).getReg(); 9183 unsigned ptrA = MI->getOperand(1).getReg(); 9184 unsigned ptrB = MI->getOperand(2).getReg(); 9185 unsigned oldval = MI->getOperand(3).getReg(); 9186 unsigned newval = MI->getOperand(4).getReg(); 9187 DebugLoc dl = MI->getDebugLoc(); 9188 9189 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 9190 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 9191 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 9192 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9193 F->insert(It, loop1MBB); 9194 F->insert(It, loop2MBB); 9195 F->insert(It, midMBB); 9196 F->insert(It, exitMBB); 9197 exitMBB->splice(exitMBB->begin(), BB, 9198 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9199 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9200 9201 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9202 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 9203 : &PPC::GPRCRegClass; 9204 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 9205 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 9206 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 9207 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 9208 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 9209 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 9210 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 9211 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 9212 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 9213 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 9214 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 9215 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 9216 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 9217 unsigned Ptr1Reg; 9218 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 9219 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 9220 // thisMBB: 9221 // ... 9222 // fallthrough --> loopMBB 9223 BB->addSuccessor(loop1MBB); 9224 9225 // The 4-byte load must be aligned, while a char or short may be 9226 // anywhere in the word. Hence all this nasty bookkeeping code. 9227 // add ptr1, ptrA, ptrB [copy if ptrA==0] 9228 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 9229 // xori shift, shift1, 24 [16] 9230 // rlwinm ptr, ptr1, 0, 0, 29 9231 // slw newval2, newval, shift 9232 // slw oldval2, oldval,shift 9233 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 9234 // slw mask, mask2, shift 9235 // and newval3, newval2, mask 9236 // and oldval3, oldval2, mask 9237 // loop1MBB: 9238 // lwarx tmpDest, ptr 9239 // and tmp, tmpDest, mask 9240 // cmpw tmp, oldval3 9241 // bne- midMBB 9242 // loop2MBB: 9243 // andc tmp2, tmpDest, mask 9244 // or tmp4, tmp2, newval3 9245 // stwcx. tmp4, ptr 9246 // bne- loop1MBB 9247 // b exitBB 9248 // midMBB: 9249 // stwcx. tmpDest, ptr 9250 // exitBB: 9251 // srw dest, tmpDest, shift 9252 if (ptrA != ZeroReg) { 9253 Ptr1Reg = RegInfo.createVirtualRegister(RC); 9254 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 9255 .addReg(ptrA).addReg(ptrB); 9256 } else { 9257 Ptr1Reg = ptrB; 9258 } 9259 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 9260 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 9261 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 9262 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 9263 if (is64bit) 9264 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 9265 .addReg(Ptr1Reg).addImm(0).addImm(61); 9266 else 9267 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 9268 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 9269 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 9270 .addReg(newval).addReg(ShiftReg); 9271 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 9272 .addReg(oldval).addReg(ShiftReg); 9273 if (is8bit) 9274 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 9275 else { 9276 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 9277 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 9278 .addReg(Mask3Reg).addImm(65535); 9279 } 9280 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 9281 .addReg(Mask2Reg).addReg(ShiftReg); 9282 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 9283 .addReg(NewVal2Reg).addReg(MaskReg); 9284 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 9285 .addReg(OldVal2Reg).addReg(MaskReg); 9286 9287 BB = loop1MBB; 9288 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 9289 .addReg(ZeroReg).addReg(PtrReg); 9290 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 9291 .addReg(TmpDestReg).addReg(MaskReg); 9292 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 9293 .addReg(TmpReg).addReg(OldVal3Reg); 9294 BuildMI(BB, dl, TII->get(PPC::BCC)) 9295 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 9296 BB->addSuccessor(loop2MBB); 9297 BB->addSuccessor(midMBB); 9298 9299 BB = loop2MBB; 9300 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 9301 .addReg(TmpDestReg).addReg(MaskReg); 9302 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 9303 .addReg(Tmp2Reg).addReg(NewVal3Reg); 9304 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 9305 .addReg(ZeroReg).addReg(PtrReg); 9306 BuildMI(BB, dl, TII->get(PPC::BCC)) 9307 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 9308 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 9309 BB->addSuccessor(loop1MBB); 9310 BB->addSuccessor(exitMBB); 9311 9312 BB = midMBB; 9313 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 9314 .addReg(ZeroReg).addReg(PtrReg); 9315 BB->addSuccessor(exitMBB); 9316 9317 // exitMBB: 9318 // ... 9319 BB = exitMBB; 9320 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 9321 .addReg(ShiftReg); 9322 } else if (MI->getOpcode() == PPC::FADDrtz) { 9323 // This pseudo performs an FADD with rounding mode temporarily forced 9324 // to round-to-zero. We emit this via custom inserter since the FPSCR 9325 // is not modeled at the SelectionDAG level. 9326 unsigned Dest = MI->getOperand(0).getReg(); 9327 unsigned Src1 = MI->getOperand(1).getReg(); 9328 unsigned Src2 = MI->getOperand(2).getReg(); 9329 DebugLoc dl = MI->getDebugLoc(); 9330 9331 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9332 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 9333 9334 // Save FPSCR value. 9335 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 9336 9337 // Set rounding mode to round-to-zero. 9338 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 9339 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 9340 9341 // Perform addition. 9342 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 9343 9344 // Restore FPSCR value. 9345 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); 9346 } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 9347 MI->getOpcode() == PPC::ANDIo_1_GT_BIT || 9348 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 9349 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { 9350 unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 9351 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? 9352 PPC::ANDIo8 : PPC::ANDIo; 9353 bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 9354 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); 9355 9356 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9357 unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? 9358 &PPC::GPRCRegClass : 9359 &PPC::G8RCRegClass); 9360 9361 DebugLoc dl = MI->getDebugLoc(); 9362 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 9363 .addReg(MI->getOperand(1).getReg()).addImm(1); 9364 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 9365 MI->getOperand(0).getReg()) 9366 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 9367 } else if (MI->getOpcode() == PPC::TCHECK_RET) { 9368 DebugLoc Dl = MI->getDebugLoc(); 9369 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9370 unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 9371 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); 9372 return BB; 9373 } else { 9374 llvm_unreachable("Unexpected instr type to insert"); 9375 } 9376 9377 MI->eraseFromParent(); // The pseudo instruction is gone now. 9378 return BB; 9379 } 9380 9381 //===----------------------------------------------------------------------===// 9382 // Target Optimization Hooks 9383 //===----------------------------------------------------------------------===// 9384 9385 static std::string getRecipOp(const char *Base, EVT VT) { 9386 std::string RecipOp(Base); 9387 if (VT.getScalarType() == MVT::f64) 9388 RecipOp += "d"; 9389 else 9390 RecipOp += "f"; 9391 9392 if (VT.isVector()) 9393 RecipOp = "vec-" + RecipOp; 9394 9395 return RecipOp; 9396 } 9397 9398 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, 9399 DAGCombinerInfo &DCI, 9400 unsigned &RefinementSteps, 9401 bool &UseOneConstNR) const { 9402 EVT VT = Operand.getValueType(); 9403 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 9404 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 9405 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 9406 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 9407 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 9408 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 9409 TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; 9410 std::string RecipOp = getRecipOp("sqrt", VT); 9411 if (!Recips.isEnabled(RecipOp)) 9412 return SDValue(); 9413 9414 RefinementSteps = Recips.getRefinementSteps(RecipOp); 9415 UseOneConstNR = true; 9416 return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); 9417 } 9418 return SDValue(); 9419 } 9420 9421 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, 9422 DAGCombinerInfo &DCI, 9423 unsigned &RefinementSteps) const { 9424 EVT VT = Operand.getValueType(); 9425 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 9426 (VT == MVT::f64 && Subtarget.hasFRE()) || 9427 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 9428 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 9429 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 9430 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 9431 TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; 9432 std::string RecipOp = getRecipOp("div", VT); 9433 if (!Recips.isEnabled(RecipOp)) 9434 return SDValue(); 9435 9436 RefinementSteps = Recips.getRefinementSteps(RecipOp); 9437 return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); 9438 } 9439 return SDValue(); 9440 } 9441 9442 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { 9443 // Note: This functionality is used only when unsafe-fp-math is enabled, and 9444 // on cores with reciprocal estimates (which are used when unsafe-fp-math is 9445 // enabled for division), this functionality is redundant with the default 9446 // combiner logic (once the division -> reciprocal/multiply transformation 9447 // has taken place). As a result, this matters more for older cores than for 9448 // newer ones. 9449 9450 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 9451 // reciprocal if there are two or more FDIVs (for embedded cores with only 9452 // one FP pipeline) for three or more FDIVs (for generic OOO cores). 9453 switch (Subtarget.getDarwinDirective()) { 9454 default: 9455 return 3; 9456 case PPC::DIR_440: 9457 case PPC::DIR_A2: 9458 case PPC::DIR_E500mc: 9459 case PPC::DIR_E5500: 9460 return 2; 9461 } 9462 } 9463 9464 // isConsecutiveLSLoc needs to work even if all adds have not yet been 9465 // collapsed, and so we need to look through chains of them. 9466 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, 9467 int64_t& Offset, SelectionDAG &DAG) { 9468 if (DAG.isBaseWithConstantOffset(Loc)) { 9469 Base = Loc.getOperand(0); 9470 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); 9471 9472 // The base might itself be a base plus an offset, and if so, accumulate 9473 // that as well. 9474 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); 9475 } 9476 } 9477 9478 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, 9479 unsigned Bytes, int Dist, 9480 SelectionDAG &DAG) { 9481 if (VT.getSizeInBits() / 8 != Bytes) 9482 return false; 9483 9484 SDValue BaseLoc = Base->getBasePtr(); 9485 if (Loc.getOpcode() == ISD::FrameIndex) { 9486 if (BaseLoc.getOpcode() != ISD::FrameIndex) 9487 return false; 9488 const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 9489 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 9490 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 9491 int FS = MFI->getObjectSize(FI); 9492 int BFS = MFI->getObjectSize(BFI); 9493 if (FS != BFS || FS != (int)Bytes) return false; 9494 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); 9495 } 9496 9497 SDValue Base1 = Loc, Base2 = BaseLoc; 9498 int64_t Offset1 = 0, Offset2 = 0; 9499 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); 9500 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); 9501 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) 9502 return true; 9503 9504 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9505 const GlobalValue *GV1 = nullptr; 9506 const GlobalValue *GV2 = nullptr; 9507 Offset1 = 0; 9508 Offset2 = 0; 9509 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 9510 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 9511 if (isGA1 && isGA2 && GV1 == GV2) 9512 return Offset1 == (Offset2 + Dist*Bytes); 9513 return false; 9514 } 9515 9516 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 9517 // not enforce equality of the chain operands. 9518 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, 9519 unsigned Bytes, int Dist, 9520 SelectionDAG &DAG) { 9521 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { 9522 EVT VT = LS->getMemoryVT(); 9523 SDValue Loc = LS->getBasePtr(); 9524 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); 9525 } 9526 9527 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 9528 EVT VT; 9529 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9530 default: return false; 9531 case Intrinsic::ppc_qpx_qvlfd: 9532 case Intrinsic::ppc_qpx_qvlfda: 9533 VT = MVT::v4f64; 9534 break; 9535 case Intrinsic::ppc_qpx_qvlfs: 9536 case Intrinsic::ppc_qpx_qvlfsa: 9537 VT = MVT::v4f32; 9538 break; 9539 case Intrinsic::ppc_qpx_qvlfcd: 9540 case Intrinsic::ppc_qpx_qvlfcda: 9541 VT = MVT::v2f64; 9542 break; 9543 case Intrinsic::ppc_qpx_qvlfcs: 9544 case Intrinsic::ppc_qpx_qvlfcsa: 9545 VT = MVT::v2f32; 9546 break; 9547 case Intrinsic::ppc_qpx_qvlfiwa: 9548 case Intrinsic::ppc_qpx_qvlfiwz: 9549 case Intrinsic::ppc_altivec_lvx: 9550 case Intrinsic::ppc_altivec_lvxl: 9551 case Intrinsic::ppc_vsx_lxvw4x: 9552 VT = MVT::v4i32; 9553 break; 9554 case Intrinsic::ppc_vsx_lxvd2x: 9555 VT = MVT::v2f64; 9556 break; 9557 case Intrinsic::ppc_altivec_lvebx: 9558 VT = MVT::i8; 9559 break; 9560 case Intrinsic::ppc_altivec_lvehx: 9561 VT = MVT::i16; 9562 break; 9563 case Intrinsic::ppc_altivec_lvewx: 9564 VT = MVT::i32; 9565 break; 9566 } 9567 9568 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); 9569 } 9570 9571 if (N->getOpcode() == ISD::INTRINSIC_VOID) { 9572 EVT VT; 9573 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9574 default: return false; 9575 case Intrinsic::ppc_qpx_qvstfd: 9576 case Intrinsic::ppc_qpx_qvstfda: 9577 VT = MVT::v4f64; 9578 break; 9579 case Intrinsic::ppc_qpx_qvstfs: 9580 case Intrinsic::ppc_qpx_qvstfsa: 9581 VT = MVT::v4f32; 9582 break; 9583 case Intrinsic::ppc_qpx_qvstfcd: 9584 case Intrinsic::ppc_qpx_qvstfcda: 9585 VT = MVT::v2f64; 9586 break; 9587 case Intrinsic::ppc_qpx_qvstfcs: 9588 case Intrinsic::ppc_qpx_qvstfcsa: 9589 VT = MVT::v2f32; 9590 break; 9591 case Intrinsic::ppc_qpx_qvstfiw: 9592 case Intrinsic::ppc_qpx_qvstfiwa: 9593 case Intrinsic::ppc_altivec_stvx: 9594 case Intrinsic::ppc_altivec_stvxl: 9595 case Intrinsic::ppc_vsx_stxvw4x: 9596 VT = MVT::v4i32; 9597 break; 9598 case Intrinsic::ppc_vsx_stxvd2x: 9599 VT = MVT::v2f64; 9600 break; 9601 case Intrinsic::ppc_altivec_stvebx: 9602 VT = MVT::i8; 9603 break; 9604 case Intrinsic::ppc_altivec_stvehx: 9605 VT = MVT::i16; 9606 break; 9607 case Intrinsic::ppc_altivec_stvewx: 9608 VT = MVT::i32; 9609 break; 9610 } 9611 9612 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); 9613 } 9614 9615 return false; 9616 } 9617 9618 // Return true is there is a nearyby consecutive load to the one provided 9619 // (regardless of alignment). We search up and down the chain, looking though 9620 // token factors and other loads (but nothing else). As a result, a true result 9621 // indicates that it is safe to create a new consecutive load adjacent to the 9622 // load provided. 9623 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 9624 SDValue Chain = LD->getChain(); 9625 EVT VT = LD->getMemoryVT(); 9626 9627 SmallSet<SDNode *, 16> LoadRoots; 9628 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 9629 SmallSet<SDNode *, 16> Visited; 9630 9631 // First, search up the chain, branching to follow all token-factor operands. 9632 // If we find a consecutive load, then we're done, otherwise, record all 9633 // nodes just above the top-level loads and token factors. 9634 while (!Queue.empty()) { 9635 SDNode *ChainNext = Queue.pop_back_val(); 9636 if (!Visited.insert(ChainNext).second) 9637 continue; 9638 9639 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { 9640 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 9641 return true; 9642 9643 if (!Visited.count(ChainLD->getChain().getNode())) 9644 Queue.push_back(ChainLD->getChain().getNode()); 9645 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 9646 for (const SDUse &O : ChainNext->ops()) 9647 if (!Visited.count(O.getNode())) 9648 Queue.push_back(O.getNode()); 9649 } else 9650 LoadRoots.insert(ChainNext); 9651 } 9652 9653 // Second, search down the chain, starting from the top-level nodes recorded 9654 // in the first phase. These top-level nodes are the nodes just above all 9655 // loads and token factors. Starting with their uses, recursively look though 9656 // all loads (just the chain uses) and token factors to find a consecutive 9657 // load. 9658 Visited.clear(); 9659 Queue.clear(); 9660 9661 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 9662 IE = LoadRoots.end(); I != IE; ++I) { 9663 Queue.push_back(*I); 9664 9665 while (!Queue.empty()) { 9666 SDNode *LoadRoot = Queue.pop_back_val(); 9667 if (!Visited.insert(LoadRoot).second) 9668 continue; 9669 9670 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) 9671 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 9672 return true; 9673 9674 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 9675 UE = LoadRoot->use_end(); UI != UE; ++UI) 9676 if (((isa<MemSDNode>(*UI) && 9677 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || 9678 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 9679 Queue.push_back(*UI); 9680 } 9681 } 9682 9683 return false; 9684 } 9685 9686 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 9687 DAGCombinerInfo &DCI) const { 9688 SelectionDAG &DAG = DCI.DAG; 9689 SDLoc dl(N); 9690 9691 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); 9692 // If we're tracking CR bits, we need to be careful that we don't have: 9693 // trunc(binary-ops(zext(x), zext(y))) 9694 // or 9695 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 9696 // such that we're unnecessarily moving things into GPRs when it would be 9697 // better to keep them in CR bits. 9698 9699 // Note that trunc here can be an actual i1 trunc, or can be the effective 9700 // truncation that comes from a setcc or select_cc. 9701 if (N->getOpcode() == ISD::TRUNCATE && 9702 N->getValueType(0) != MVT::i1) 9703 return SDValue(); 9704 9705 if (N->getOperand(0).getValueType() != MVT::i32 && 9706 N->getOperand(0).getValueType() != MVT::i64) 9707 return SDValue(); 9708 9709 if (N->getOpcode() == ISD::SETCC || 9710 N->getOpcode() == ISD::SELECT_CC) { 9711 // If we're looking at a comparison, then we need to make sure that the 9712 // high bits (all except for the first) don't matter the result. 9713 ISD::CondCode CC = 9714 cast<CondCodeSDNode>(N->getOperand( 9715 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 9716 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 9717 9718 if (ISD::isSignedIntSetCC(CC)) { 9719 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 9720 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 9721 return SDValue(); 9722 } else if (ISD::isUnsignedIntSetCC(CC)) { 9723 if (!DAG.MaskedValueIsZero(N->getOperand(0), 9724 APInt::getHighBitsSet(OpBits, OpBits-1)) || 9725 !DAG.MaskedValueIsZero(N->getOperand(1), 9726 APInt::getHighBitsSet(OpBits, OpBits-1))) 9727 return SDValue(); 9728 } else { 9729 // This is neither a signed nor an unsigned comparison, just make sure 9730 // that the high bits are equal. 9731 APInt Op1Zero, Op1One; 9732 APInt Op2Zero, Op2One; 9733 DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); 9734 DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); 9735 9736 // We don't really care about what is known about the first bit (if 9737 // anything), so clear it in all masks prior to comparing them. 9738 Op1Zero.clearBit(0); Op1One.clearBit(0); 9739 Op2Zero.clearBit(0); Op2One.clearBit(0); 9740 9741 if (Op1Zero != Op2Zero || Op1One != Op2One) 9742 return SDValue(); 9743 } 9744 } 9745 9746 // We now know that the higher-order bits are irrelevant, we just need to 9747 // make sure that all of the intermediate operations are bit operations, and 9748 // all inputs are extensions. 9749 if (N->getOperand(0).getOpcode() != ISD::AND && 9750 N->getOperand(0).getOpcode() != ISD::OR && 9751 N->getOperand(0).getOpcode() != ISD::XOR && 9752 N->getOperand(0).getOpcode() != ISD::SELECT && 9753 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 9754 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 9755 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 9756 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 9757 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 9758 return SDValue(); 9759 9760 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 9761 N->getOperand(1).getOpcode() != ISD::AND && 9762 N->getOperand(1).getOpcode() != ISD::OR && 9763 N->getOperand(1).getOpcode() != ISD::XOR && 9764 N->getOperand(1).getOpcode() != ISD::SELECT && 9765 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 9766 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 9767 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 9768 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 9769 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 9770 return SDValue(); 9771 9772 SmallVector<SDValue, 4> Inputs; 9773 SmallVector<SDValue, 8> BinOps, PromOps; 9774 SmallPtrSet<SDNode *, 16> Visited; 9775 9776 for (unsigned i = 0; i < 2; ++i) { 9777 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 9778 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 9779 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 9780 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 9781 isa<ConstantSDNode>(N->getOperand(i))) 9782 Inputs.push_back(N->getOperand(i)); 9783 else 9784 BinOps.push_back(N->getOperand(i)); 9785 9786 if (N->getOpcode() == ISD::TRUNCATE) 9787 break; 9788 } 9789 9790 // Visit all inputs, collect all binary operations (and, or, xor and 9791 // select) that are all fed by extensions. 9792 while (!BinOps.empty()) { 9793 SDValue BinOp = BinOps.back(); 9794 BinOps.pop_back(); 9795 9796 if (!Visited.insert(BinOp.getNode()).second) 9797 continue; 9798 9799 PromOps.push_back(BinOp); 9800 9801 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 9802 // The condition of the select is not promoted. 9803 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 9804 continue; 9805 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 9806 continue; 9807 9808 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 9809 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 9810 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 9811 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 9812 isa<ConstantSDNode>(BinOp.getOperand(i))) { 9813 Inputs.push_back(BinOp.getOperand(i)); 9814 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 9815 BinOp.getOperand(i).getOpcode() == ISD::OR || 9816 BinOp.getOperand(i).getOpcode() == ISD::XOR || 9817 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 9818 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 9819 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 9820 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 9821 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 9822 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 9823 BinOps.push_back(BinOp.getOperand(i)); 9824 } else { 9825 // We have an input that is not an extension or another binary 9826 // operation; we'll abort this transformation. 9827 return SDValue(); 9828 } 9829 } 9830 } 9831 9832 // Make sure that this is a self-contained cluster of operations (which 9833 // is not quite the same thing as saying that everything has only one 9834 // use). 9835 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 9836 if (isa<ConstantSDNode>(Inputs[i])) 9837 continue; 9838 9839 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 9840 UE = Inputs[i].getNode()->use_end(); 9841 UI != UE; ++UI) { 9842 SDNode *User = *UI; 9843 if (User != N && !Visited.count(User)) 9844 return SDValue(); 9845 9846 // Make sure that we're not going to promote the non-output-value 9847 // operand(s) or SELECT or SELECT_CC. 9848 // FIXME: Although we could sometimes handle this, and it does occur in 9849 // practice that one of the condition inputs to the select is also one of 9850 // the outputs, we currently can't deal with this. 9851 if (User->getOpcode() == ISD::SELECT) { 9852 if (User->getOperand(0) == Inputs[i]) 9853 return SDValue(); 9854 } else if (User->getOpcode() == ISD::SELECT_CC) { 9855 if (User->getOperand(0) == Inputs[i] || 9856 User->getOperand(1) == Inputs[i]) 9857 return SDValue(); 9858 } 9859 } 9860 } 9861 9862 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 9863 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 9864 UE = PromOps[i].getNode()->use_end(); 9865 UI != UE; ++UI) { 9866 SDNode *User = *UI; 9867 if (User != N && !Visited.count(User)) 9868 return SDValue(); 9869 9870 // Make sure that we're not going to promote the non-output-value 9871 // operand(s) or SELECT or SELECT_CC. 9872 // FIXME: Although we could sometimes handle this, and it does occur in 9873 // practice that one of the condition inputs to the select is also one of 9874 // the outputs, we currently can't deal with this. 9875 if (User->getOpcode() == ISD::SELECT) { 9876 if (User->getOperand(0) == PromOps[i]) 9877 return SDValue(); 9878 } else if (User->getOpcode() == ISD::SELECT_CC) { 9879 if (User->getOperand(0) == PromOps[i] || 9880 User->getOperand(1) == PromOps[i]) 9881 return SDValue(); 9882 } 9883 } 9884 } 9885 9886 // Replace all inputs with the extension operand. 9887 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 9888 // Constants may have users outside the cluster of to-be-promoted nodes, 9889 // and so we need to replace those as we do the promotions. 9890 if (isa<ConstantSDNode>(Inputs[i])) 9891 continue; 9892 else 9893 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 9894 } 9895 9896 // Replace all operations (these are all the same, but have a different 9897 // (i1) return type). DAG.getNode will validate that the types of 9898 // a binary operator match, so go through the list in reverse so that 9899 // we've likely promoted both operands first. Any intermediate truncations or 9900 // extensions disappear. 9901 while (!PromOps.empty()) { 9902 SDValue PromOp = PromOps.back(); 9903 PromOps.pop_back(); 9904 9905 if (PromOp.getOpcode() == ISD::TRUNCATE || 9906 PromOp.getOpcode() == ISD::SIGN_EXTEND || 9907 PromOp.getOpcode() == ISD::ZERO_EXTEND || 9908 PromOp.getOpcode() == ISD::ANY_EXTEND) { 9909 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 9910 PromOp.getOperand(0).getValueType() != MVT::i1) { 9911 // The operand is not yet ready (see comment below). 9912 PromOps.insert(PromOps.begin(), PromOp); 9913 continue; 9914 } 9915 9916 SDValue RepValue = PromOp.getOperand(0); 9917 if (isa<ConstantSDNode>(RepValue)) 9918 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 9919 9920 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 9921 continue; 9922 } 9923 9924 unsigned C; 9925 switch (PromOp.getOpcode()) { 9926 default: C = 0; break; 9927 case ISD::SELECT: C = 1; break; 9928 case ISD::SELECT_CC: C = 2; break; 9929 } 9930 9931 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 9932 PromOp.getOperand(C).getValueType() != MVT::i1) || 9933 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 9934 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 9935 // The to-be-promoted operands of this node have not yet been 9936 // promoted (this should be rare because we're going through the 9937 // list backward, but if one of the operands has several users in 9938 // this cluster of to-be-promoted nodes, it is possible). 9939 PromOps.insert(PromOps.begin(), PromOp); 9940 continue; 9941 } 9942 9943 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 9944 PromOp.getNode()->op_end()); 9945 9946 // If there are any constant inputs, make sure they're replaced now. 9947 for (unsigned i = 0; i < 2; ++i) 9948 if (isa<ConstantSDNode>(Ops[C+i])) 9949 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 9950 9951 DAG.ReplaceAllUsesOfValueWith(PromOp, 9952 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 9953 } 9954 9955 // Now we're left with the initial truncation itself. 9956 if (N->getOpcode() == ISD::TRUNCATE) 9957 return N->getOperand(0); 9958 9959 // Otherwise, this is a comparison. The operands to be compared have just 9960 // changed type (to i1), but everything else is the same. 9961 return SDValue(N, 0); 9962 } 9963 9964 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 9965 DAGCombinerInfo &DCI) const { 9966 SelectionDAG &DAG = DCI.DAG; 9967 SDLoc dl(N); 9968 9969 // If we're tracking CR bits, we need to be careful that we don't have: 9970 // zext(binary-ops(trunc(x), trunc(y))) 9971 // or 9972 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 9973 // such that we're unnecessarily moving things into CR bits that can more 9974 // efficiently stay in GPRs. Note that if we're not certain that the high 9975 // bits are set as required by the final extension, we still may need to do 9976 // some masking to get the proper behavior. 9977 9978 // This same functionality is important on PPC64 when dealing with 9979 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 9980 // the return values of functions. Because it is so similar, it is handled 9981 // here as well. 9982 9983 if (N->getValueType(0) != MVT::i32 && 9984 N->getValueType(0) != MVT::i64) 9985 return SDValue(); 9986 9987 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || 9988 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) 9989 return SDValue(); 9990 9991 if (N->getOperand(0).getOpcode() != ISD::AND && 9992 N->getOperand(0).getOpcode() != ISD::OR && 9993 N->getOperand(0).getOpcode() != ISD::XOR && 9994 N->getOperand(0).getOpcode() != ISD::SELECT && 9995 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 9996 return SDValue(); 9997 9998 SmallVector<SDValue, 4> Inputs; 9999 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 10000 SmallPtrSet<SDNode *, 16> Visited; 10001 10002 // Visit all inputs, collect all binary operations (and, or, xor and 10003 // select) that are all fed by truncations. 10004 while (!BinOps.empty()) { 10005 SDValue BinOp = BinOps.back(); 10006 BinOps.pop_back(); 10007 10008 if (!Visited.insert(BinOp.getNode()).second) 10009 continue; 10010 10011 PromOps.push_back(BinOp); 10012 10013 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 10014 // The condition of the select is not promoted. 10015 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 10016 continue; 10017 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 10018 continue; 10019 10020 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 10021 isa<ConstantSDNode>(BinOp.getOperand(i))) { 10022 Inputs.push_back(BinOp.getOperand(i)); 10023 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 10024 BinOp.getOperand(i).getOpcode() == ISD::OR || 10025 BinOp.getOperand(i).getOpcode() == ISD::XOR || 10026 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 10027 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 10028 BinOps.push_back(BinOp.getOperand(i)); 10029 } else { 10030 // We have an input that is not a truncation or another binary 10031 // operation; we'll abort this transformation. 10032 return SDValue(); 10033 } 10034 } 10035 } 10036 10037 // The operands of a select that must be truncated when the select is 10038 // promoted because the operand is actually part of the to-be-promoted set. 10039 DenseMap<SDNode *, EVT> SelectTruncOp[2]; 10040 10041 // Make sure that this is a self-contained cluster of operations (which 10042 // is not quite the same thing as saying that everything has only one 10043 // use). 10044 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10045 if (isa<ConstantSDNode>(Inputs[i])) 10046 continue; 10047 10048 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 10049 UE = Inputs[i].getNode()->use_end(); 10050 UI != UE; ++UI) { 10051 SDNode *User = *UI; 10052 if (User != N && !Visited.count(User)) 10053 return SDValue(); 10054 10055 // If we're going to promote the non-output-value operand(s) or SELECT or 10056 // SELECT_CC, record them for truncation. 10057 if (User->getOpcode() == ISD::SELECT) { 10058 if (User->getOperand(0) == Inputs[i]) 10059 SelectTruncOp[0].insert(std::make_pair(User, 10060 User->getOperand(0).getValueType())); 10061 } else if (User->getOpcode() == ISD::SELECT_CC) { 10062 if (User->getOperand(0) == Inputs[i]) 10063 SelectTruncOp[0].insert(std::make_pair(User, 10064 User->getOperand(0).getValueType())); 10065 if (User->getOperand(1) == Inputs[i]) 10066 SelectTruncOp[1].insert(std::make_pair(User, 10067 User->getOperand(1).getValueType())); 10068 } 10069 } 10070 } 10071 10072 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 10073 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 10074 UE = PromOps[i].getNode()->use_end(); 10075 UI != UE; ++UI) { 10076 SDNode *User = *UI; 10077 if (User != N && !Visited.count(User)) 10078 return SDValue(); 10079 10080 // If we're going to promote the non-output-value operand(s) or SELECT or 10081 // SELECT_CC, record them for truncation. 10082 if (User->getOpcode() == ISD::SELECT) { 10083 if (User->getOperand(0) == PromOps[i]) 10084 SelectTruncOp[0].insert(std::make_pair(User, 10085 User->getOperand(0).getValueType())); 10086 } else if (User->getOpcode() == ISD::SELECT_CC) { 10087 if (User->getOperand(0) == PromOps[i]) 10088 SelectTruncOp[0].insert(std::make_pair(User, 10089 User->getOperand(0).getValueType())); 10090 if (User->getOperand(1) == PromOps[i]) 10091 SelectTruncOp[1].insert(std::make_pair(User, 10092 User->getOperand(1).getValueType())); 10093 } 10094 } 10095 } 10096 10097 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 10098 bool ReallyNeedsExt = false; 10099 if (N->getOpcode() != ISD::ANY_EXTEND) { 10100 // If all of the inputs are not already sign/zero extended, then 10101 // we'll still need to do that at the end. 10102 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10103 if (isa<ConstantSDNode>(Inputs[i])) 10104 continue; 10105 10106 unsigned OpBits = 10107 Inputs[i].getOperand(0).getValueSizeInBits(); 10108 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 10109 10110 if ((N->getOpcode() == ISD::ZERO_EXTEND && 10111 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 10112 APInt::getHighBitsSet(OpBits, 10113 OpBits-PromBits))) || 10114 (N->getOpcode() == ISD::SIGN_EXTEND && 10115 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 10116 (OpBits-(PromBits-1)))) { 10117 ReallyNeedsExt = true; 10118 break; 10119 } 10120 } 10121 } 10122 10123 // Replace all inputs, either with the truncation operand, or a 10124 // truncation or extension to the final output type. 10125 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10126 // Constant inputs need to be replaced with the to-be-promoted nodes that 10127 // use them because they might have users outside of the cluster of 10128 // promoted nodes. 10129 if (isa<ConstantSDNode>(Inputs[i])) 10130 continue; 10131 10132 SDValue InSrc = Inputs[i].getOperand(0); 10133 if (Inputs[i].getValueType() == N->getValueType(0)) 10134 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 10135 else if (N->getOpcode() == ISD::SIGN_EXTEND) 10136 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10137 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 10138 else if (N->getOpcode() == ISD::ZERO_EXTEND) 10139 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10140 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 10141 else 10142 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10143 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 10144 } 10145 10146 // Replace all operations (these are all the same, but have a different 10147 // (promoted) return type). DAG.getNode will validate that the types of 10148 // a binary operator match, so go through the list in reverse so that 10149 // we've likely promoted both operands first. 10150 while (!PromOps.empty()) { 10151 SDValue PromOp = PromOps.back(); 10152 PromOps.pop_back(); 10153 10154 unsigned C; 10155 switch (PromOp.getOpcode()) { 10156 default: C = 0; break; 10157 case ISD::SELECT: C = 1; break; 10158 case ISD::SELECT_CC: C = 2; break; 10159 } 10160 10161 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 10162 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 10163 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 10164 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 10165 // The to-be-promoted operands of this node have not yet been 10166 // promoted (this should be rare because we're going through the 10167 // list backward, but if one of the operands has several users in 10168 // this cluster of to-be-promoted nodes, it is possible). 10169 PromOps.insert(PromOps.begin(), PromOp); 10170 continue; 10171 } 10172 10173 // For SELECT and SELECT_CC nodes, we do a similar check for any 10174 // to-be-promoted comparison inputs. 10175 if (PromOp.getOpcode() == ISD::SELECT || 10176 PromOp.getOpcode() == ISD::SELECT_CC) { 10177 if ((SelectTruncOp[0].count(PromOp.getNode()) && 10178 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || 10179 (SelectTruncOp[1].count(PromOp.getNode()) && 10180 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { 10181 PromOps.insert(PromOps.begin(), PromOp); 10182 continue; 10183 } 10184 } 10185 10186 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 10187 PromOp.getNode()->op_end()); 10188 10189 // If this node has constant inputs, then they'll need to be promoted here. 10190 for (unsigned i = 0; i < 2; ++i) { 10191 if (!isa<ConstantSDNode>(Ops[C+i])) 10192 continue; 10193 if (Ops[C+i].getValueType() == N->getValueType(0)) 10194 continue; 10195 10196 if (N->getOpcode() == ISD::SIGN_EXTEND) 10197 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10198 else if (N->getOpcode() == ISD::ZERO_EXTEND) 10199 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10200 else 10201 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10202 } 10203 10204 // If we've promoted the comparison inputs of a SELECT or SELECT_CC, 10205 // truncate them again to the original value type. 10206 if (PromOp.getOpcode() == ISD::SELECT || 10207 PromOp.getOpcode() == ISD::SELECT_CC) { 10208 auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); 10209 if (SI0 != SelectTruncOp[0].end()) 10210 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); 10211 auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); 10212 if (SI1 != SelectTruncOp[1].end()) 10213 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); 10214 } 10215 10216 DAG.ReplaceAllUsesOfValueWith(PromOp, 10217 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 10218 } 10219 10220 // Now we're left with the initial extension itself. 10221 if (!ReallyNeedsExt) 10222 return N->getOperand(0); 10223 10224 // To zero extend, just mask off everything except for the first bit (in the 10225 // i1 case). 10226 if (N->getOpcode() == ISD::ZERO_EXTEND) 10227 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 10228 DAG.getConstant(APInt::getLowBitsSet( 10229 N->getValueSizeInBits(0), PromBits), 10230 dl, N->getValueType(0))); 10231 10232 assert(N->getOpcode() == ISD::SIGN_EXTEND && 10233 "Invalid extension type"); 10234 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); 10235 SDValue ShiftCst = 10236 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); 10237 return DAG.getNode( 10238 ISD::SRA, dl, N->getValueType(0), 10239 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), 10240 ShiftCst); 10241 } 10242 10243 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, 10244 DAGCombinerInfo &DCI) const { 10245 assert((N->getOpcode() == ISD::SINT_TO_FP || 10246 N->getOpcode() == ISD::UINT_TO_FP) && 10247 "Need an int -> FP conversion node here"); 10248 10249 if (!Subtarget.has64BitSupport()) 10250 return SDValue(); 10251 10252 SelectionDAG &DAG = DCI.DAG; 10253 SDLoc dl(N); 10254 SDValue Op(N, 0); 10255 10256 // Don't handle ppc_fp128 here or i1 conversions. 10257 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 10258 return SDValue(); 10259 if (Op.getOperand(0).getValueType() == MVT::i1) 10260 return SDValue(); 10261 10262 // For i32 intermediate values, unfortunately, the conversion functions 10263 // leave the upper 32 bits of the value are undefined. Within the set of 10264 // scalar instructions, we have no method for zero- or sign-extending the 10265 // value. Thus, we cannot handle i32 intermediate values here. 10266 if (Op.getOperand(0).getValueType() == MVT::i32) 10267 return SDValue(); 10268 10269 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 10270 "UINT_TO_FP is supported only with FPCVT"); 10271 10272 // If we have FCFIDS, then use it when converting to single-precision. 10273 // Otherwise, convert to double-precision and then round. 10274 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 10275 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 10276 : PPCISD::FCFIDS) 10277 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 10278 : PPCISD::FCFID); 10279 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 10280 ? MVT::f32 10281 : MVT::f64; 10282 10283 // If we're converting from a float, to an int, and back to a float again, 10284 // then we don't need the store/load pair at all. 10285 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && 10286 Subtarget.hasFPCVT()) || 10287 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { 10288 SDValue Src = Op.getOperand(0).getOperand(0); 10289 if (Src.getValueType() == MVT::f32) { 10290 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 10291 DCI.AddToWorklist(Src.getNode()); 10292 } else if (Src.getValueType() != MVT::f64) { 10293 // Make sure that we don't pick up a ppc_fp128 source value. 10294 return SDValue(); 10295 } 10296 10297 unsigned FCTOp = 10298 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 10299 PPCISD::FCTIDUZ; 10300 10301 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); 10302 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); 10303 10304 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { 10305 FP = DAG.getNode(ISD::FP_ROUND, dl, 10306 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 10307 DCI.AddToWorklist(FP.getNode()); 10308 } 10309 10310 return FP; 10311 } 10312 10313 return SDValue(); 10314 } 10315 10316 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for 10317 // builtins) into loads with swaps. 10318 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, 10319 DAGCombinerInfo &DCI) const { 10320 SelectionDAG &DAG = DCI.DAG; 10321 SDLoc dl(N); 10322 SDValue Chain; 10323 SDValue Base; 10324 MachineMemOperand *MMO; 10325 10326 switch (N->getOpcode()) { 10327 default: 10328 llvm_unreachable("Unexpected opcode for little endian VSX load"); 10329 case ISD::LOAD: { 10330 LoadSDNode *LD = cast<LoadSDNode>(N); 10331 Chain = LD->getChain(); 10332 Base = LD->getBasePtr(); 10333 MMO = LD->getMemOperand(); 10334 // If the MMO suggests this isn't a load of a full vector, leave 10335 // things alone. For a built-in, we have to make the change for 10336 // correctness, so if there is a size problem that will be a bug. 10337 if (MMO->getSize() < 16) 10338 return SDValue(); 10339 break; 10340 } 10341 case ISD::INTRINSIC_W_CHAIN: { 10342 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 10343 Chain = Intrin->getChain(); 10344 // Similarly to the store case below, Intrin->getBasePtr() doesn't get 10345 // us what we want. Get operand 2 instead. 10346 Base = Intrin->getOperand(2); 10347 MMO = Intrin->getMemOperand(); 10348 break; 10349 } 10350 } 10351 10352 MVT VecTy = N->getValueType(0).getSimpleVT(); 10353 SDValue LoadOps[] = { Chain, Base }; 10354 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, 10355 DAG.getVTList(VecTy, MVT::Other), 10356 LoadOps, VecTy, MMO); 10357 DCI.AddToWorklist(Load.getNode()); 10358 Chain = Load.getValue(1); 10359 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 10360 DAG.getVTList(VecTy, MVT::Other), Chain, Load); 10361 DCI.AddToWorklist(Swap.getNode()); 10362 return Swap; 10363 } 10364 10365 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for 10366 // builtins) into stores with swaps. 10367 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, 10368 DAGCombinerInfo &DCI) const { 10369 SelectionDAG &DAG = DCI.DAG; 10370 SDLoc dl(N); 10371 SDValue Chain; 10372 SDValue Base; 10373 unsigned SrcOpnd; 10374 MachineMemOperand *MMO; 10375 10376 switch (N->getOpcode()) { 10377 default: 10378 llvm_unreachable("Unexpected opcode for little endian VSX store"); 10379 case ISD::STORE: { 10380 StoreSDNode *ST = cast<StoreSDNode>(N); 10381 Chain = ST->getChain(); 10382 Base = ST->getBasePtr(); 10383 MMO = ST->getMemOperand(); 10384 SrcOpnd = 1; 10385 // If the MMO suggests this isn't a store of a full vector, leave 10386 // things alone. For a built-in, we have to make the change for 10387 // correctness, so if there is a size problem that will be a bug. 10388 if (MMO->getSize() < 16) 10389 return SDValue(); 10390 break; 10391 } 10392 case ISD::INTRINSIC_VOID: { 10393 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 10394 Chain = Intrin->getChain(); 10395 // Intrin->getBasePtr() oddly does not get what we want. 10396 Base = Intrin->getOperand(3); 10397 MMO = Intrin->getMemOperand(); 10398 SrcOpnd = 2; 10399 break; 10400 } 10401 } 10402 10403 SDValue Src = N->getOperand(SrcOpnd); 10404 MVT VecTy = Src.getValueType().getSimpleVT(); 10405 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 10406 DAG.getVTList(VecTy, MVT::Other), Chain, Src); 10407 DCI.AddToWorklist(Swap.getNode()); 10408 Chain = Swap.getValue(1); 10409 SDValue StoreOps[] = { Chain, Swap, Base }; 10410 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, 10411 DAG.getVTList(MVT::Other), 10412 StoreOps, VecTy, MMO); 10413 DCI.AddToWorklist(Store.getNode()); 10414 return Store; 10415 } 10416 10417 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 10418 DAGCombinerInfo &DCI) const { 10419 SelectionDAG &DAG = DCI.DAG; 10420 SDLoc dl(N); 10421 switch (N->getOpcode()) { 10422 default: break; 10423 case PPCISD::SHL: 10424 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. 10425 return N->getOperand(0); 10426 break; 10427 case PPCISD::SRL: 10428 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. 10429 return N->getOperand(0); 10430 break; 10431 case PPCISD::SRA: 10432 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 10433 if (C->isNullValue() || // 0 >>s V -> 0. 10434 C->isAllOnesValue()) // -1 >>s V -> -1. 10435 return N->getOperand(0); 10436 } 10437 break; 10438 case ISD::SIGN_EXTEND: 10439 case ISD::ZERO_EXTEND: 10440 case ISD::ANY_EXTEND: 10441 return DAGCombineExtBoolTrunc(N, DCI); 10442 case ISD::TRUNCATE: 10443 case ISD::SETCC: 10444 case ISD::SELECT_CC: 10445 return DAGCombineTruncBoolExt(N, DCI); 10446 case ISD::SINT_TO_FP: 10447 case ISD::UINT_TO_FP: 10448 return combineFPToIntToFP(N, DCI); 10449 case ISD::STORE: { 10450 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 10451 if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() && 10452 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 10453 N->getOperand(1).getValueType() == MVT::i32 && 10454 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 10455 SDValue Val = N->getOperand(1).getOperand(0); 10456 if (Val.getValueType() == MVT::f32) { 10457 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 10458 DCI.AddToWorklist(Val.getNode()); 10459 } 10460 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 10461 DCI.AddToWorklist(Val.getNode()); 10462 10463 SDValue Ops[] = { 10464 N->getOperand(0), Val, N->getOperand(2), 10465 DAG.getValueType(N->getOperand(1).getValueType()) 10466 }; 10467 10468 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 10469 DAG.getVTList(MVT::Other), Ops, 10470 cast<StoreSDNode>(N)->getMemoryVT(), 10471 cast<StoreSDNode>(N)->getMemOperand()); 10472 DCI.AddToWorklist(Val.getNode()); 10473 return Val; 10474 } 10475 10476 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 10477 if (cast<StoreSDNode>(N)->isUnindexed() && 10478 N->getOperand(1).getOpcode() == ISD::BSWAP && 10479 N->getOperand(1).getNode()->hasOneUse() && 10480 (N->getOperand(1).getValueType() == MVT::i32 || 10481 N->getOperand(1).getValueType() == MVT::i16 || 10482 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 10483 N->getOperand(1).getValueType() == MVT::i64))) { 10484 SDValue BSwapOp = N->getOperand(1).getOperand(0); 10485 // Do an any-extend to 32-bits if this is a half-word input. 10486 if (BSwapOp.getValueType() == MVT::i16) 10487 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 10488 10489 SDValue Ops[] = { 10490 N->getOperand(0), BSwapOp, N->getOperand(2), 10491 DAG.getValueType(N->getOperand(1).getValueType()) 10492 }; 10493 return 10494 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 10495 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 10496 cast<StoreSDNode>(N)->getMemOperand()); 10497 } 10498 10499 // For little endian, VSX stores require generating xxswapd/lxvd2x. 10500 EVT VT = N->getOperand(1).getValueType(); 10501 if (VT.isSimple()) { 10502 MVT StoreVT = VT.getSimpleVT(); 10503 if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && 10504 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || 10505 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) 10506 return expandVSXStoreForLE(N, DCI); 10507 } 10508 break; 10509 } 10510 case ISD::LOAD: { 10511 LoadSDNode *LD = cast<LoadSDNode>(N); 10512 EVT VT = LD->getValueType(0); 10513 10514 // For little endian, VSX loads require generating lxvd2x/xxswapd. 10515 if (VT.isSimple()) { 10516 MVT LoadVT = VT.getSimpleVT(); 10517 if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && 10518 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || 10519 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) 10520 return expandVSXLoadForLE(N, DCI); 10521 } 10522 10523 // We sometimes end up with a 64-bit integer load, from which we extract 10524 // two single-precision floating-point numbers. This happens with 10525 // std::complex<float>, and other similar structures, because of the way we 10526 // canonicalize structure copies. However, if we lack direct moves, 10527 // then the final bitcasts from the extracted integer values to the 10528 // floating-point numbers turn into store/load pairs. Even with direct moves, 10529 // just loading the two floating-point numbers is likely better. 10530 auto ReplaceTwoFloatLoad = [&]() { 10531 if (VT != MVT::i64) 10532 return false; 10533 10534 if (LD->getExtensionType() != ISD::NON_EXTLOAD || 10535 LD->isVolatile()) 10536 return false; 10537 10538 // We're looking for a sequence like this: 10539 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 10540 // t16: i64 = srl t13, Constant:i32<32> 10541 // t17: i32 = truncate t16 10542 // t18: f32 = bitcast t17 10543 // t19: i32 = truncate t13 10544 // t20: f32 = bitcast t19 10545 10546 if (!LD->hasNUsesOfValue(2, 0)) 10547 return false; 10548 10549 auto UI = LD->use_begin(); 10550 while (UI.getUse().getResNo() != 0) ++UI; 10551 SDNode *Trunc = *UI++; 10552 while (UI.getUse().getResNo() != 0) ++UI; 10553 SDNode *RightShift = *UI; 10554 if (Trunc->getOpcode() != ISD::TRUNCATE) 10555 std::swap(Trunc, RightShift); 10556 10557 if (Trunc->getOpcode() != ISD::TRUNCATE || 10558 Trunc->getValueType(0) != MVT::i32 || 10559 !Trunc->hasOneUse()) 10560 return false; 10561 if (RightShift->getOpcode() != ISD::SRL || 10562 !isa<ConstantSDNode>(RightShift->getOperand(1)) || 10563 RightShift->getConstantOperandVal(1) != 32 || 10564 !RightShift->hasOneUse()) 10565 return false; 10566 10567 SDNode *Trunc2 = *RightShift->use_begin(); 10568 if (Trunc2->getOpcode() != ISD::TRUNCATE || 10569 Trunc2->getValueType(0) != MVT::i32 || 10570 !Trunc2->hasOneUse()) 10571 return false; 10572 10573 SDNode *Bitcast = *Trunc->use_begin(); 10574 SDNode *Bitcast2 = *Trunc2->use_begin(); 10575 10576 if (Bitcast->getOpcode() != ISD::BITCAST || 10577 Bitcast->getValueType(0) != MVT::f32) 10578 return false; 10579 if (Bitcast2->getOpcode() != ISD::BITCAST || 10580 Bitcast2->getValueType(0) != MVT::f32) 10581 return false; 10582 10583 if (Subtarget.isLittleEndian()) 10584 std::swap(Bitcast, Bitcast2); 10585 10586 // Bitcast has the second float (in memory-layout order) and Bitcast2 10587 // has the first one. 10588 10589 SDValue BasePtr = LD->getBasePtr(); 10590 if (LD->isIndexed()) { 10591 assert(LD->getAddressingMode() == ISD::PRE_INC && 10592 "Non-pre-inc AM on PPC?"); 10593 BasePtr = 10594 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 10595 LD->getOffset()); 10596 } 10597 10598 SDValue FloatLoad = 10599 DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, 10600 LD->getPointerInfo(), false, LD->isNonTemporal(), 10601 LD->isInvariant(), LD->getAlignment(), LD->getAAInfo()); 10602 SDValue AddPtr = 10603 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), 10604 BasePtr, DAG.getIntPtrConstant(4, dl)); 10605 SDValue FloatLoad2 = 10606 DAG.getLoad(MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, 10607 LD->getPointerInfo().getWithOffset(4), false, 10608 LD->isNonTemporal(), LD->isInvariant(), 10609 MinAlign(LD->getAlignment(), 4), LD->getAAInfo()); 10610 10611 if (LD->isIndexed()) { 10612 // Note that DAGCombine should re-form any pre-increment load(s) from 10613 // what is produced here if that makes sense. 10614 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); 10615 } 10616 10617 DCI.CombineTo(Bitcast2, FloatLoad); 10618 DCI.CombineTo(Bitcast, FloatLoad2); 10619 10620 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), 10621 SDValue(FloatLoad2.getNode(), 1)); 10622 return true; 10623 }; 10624 10625 if (ReplaceTwoFloatLoad()) 10626 return SDValue(N, 0); 10627 10628 EVT MemVT = LD->getMemoryVT(); 10629 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); 10630 unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); 10631 Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); 10632 unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); 10633 if (LD->isUnindexed() && VT.isVector() && 10634 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && 10635 // P8 and later hardware should just use LOAD. 10636 !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || 10637 VT == MVT::v4i32 || VT == MVT::v4f32)) || 10638 (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && 10639 LD->getAlignment() >= ScalarABIAlignment)) && 10640 LD->getAlignment() < ABIAlignment) { 10641 // This is a type-legal unaligned Altivec or QPX load. 10642 SDValue Chain = LD->getChain(); 10643 SDValue Ptr = LD->getBasePtr(); 10644 bool isLittleEndian = Subtarget.isLittleEndian(); 10645 10646 // This implements the loading of unaligned vectors as described in 10647 // the venerable Apple Velocity Engine overview. Specifically: 10648 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 10649 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 10650 // 10651 // The general idea is to expand a sequence of one or more unaligned 10652 // loads into an alignment-based permutation-control instruction (lvsl 10653 // or lvsr), a series of regular vector loads (which always truncate 10654 // their input address to an aligned address), and a series of 10655 // permutations. The results of these permutations are the requested 10656 // loaded values. The trick is that the last "extra" load is not taken 10657 // from the address you might suspect (sizeof(vector) bytes after the 10658 // last requested load), but rather sizeof(vector) - 1 bytes after the 10659 // last requested vector. The point of this is to avoid a page fault if 10660 // the base address happened to be aligned. This works because if the 10661 // base address is aligned, then adding less than a full vector length 10662 // will cause the last vector in the sequence to be (re)loaded. 10663 // Otherwise, the next vector will be fetched as you might suspect was 10664 // necessary. 10665 10666 // We might be able to reuse the permutation generation from 10667 // a different base address offset from this one by an aligned amount. 10668 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 10669 // optimization later. 10670 Intrinsic::ID Intr, IntrLD, IntrPerm; 10671 MVT PermCntlTy, PermTy, LDTy; 10672 if (Subtarget.hasAltivec()) { 10673 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : 10674 Intrinsic::ppc_altivec_lvsl; 10675 IntrLD = Intrinsic::ppc_altivec_lvx; 10676 IntrPerm = Intrinsic::ppc_altivec_vperm; 10677 PermCntlTy = MVT::v16i8; 10678 PermTy = MVT::v4i32; 10679 LDTy = MVT::v4i32; 10680 } else { 10681 Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : 10682 Intrinsic::ppc_qpx_qvlpcls; 10683 IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : 10684 Intrinsic::ppc_qpx_qvlfs; 10685 IntrPerm = Intrinsic::ppc_qpx_qvfperm; 10686 PermCntlTy = MVT::v4f64; 10687 PermTy = MVT::v4f64; 10688 LDTy = MemVT.getSimpleVT(); 10689 } 10690 10691 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); 10692 10693 // Create the new MMO for the new base load. It is like the original MMO, 10694 // but represents an area in memory almost twice the vector size centered 10695 // on the original address. If the address is unaligned, we might start 10696 // reading up to (sizeof(vector)-1) bytes below the address of the 10697 // original unaligned load. 10698 MachineFunction &MF = DAG.getMachineFunction(); 10699 MachineMemOperand *BaseMMO = 10700 MF.getMachineMemOperand(LD->getMemOperand(), 10701 -(long)MemVT.getStoreSize()+1, 10702 2*MemVT.getStoreSize()-1); 10703 10704 // Create the new base load. 10705 SDValue LDXIntID = 10706 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); 10707 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; 10708 SDValue BaseLoad = 10709 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 10710 DAG.getVTList(PermTy, MVT::Other), 10711 BaseLoadOps, LDTy, BaseMMO); 10712 10713 // Note that the value of IncOffset (which is provided to the next 10714 // load's pointer info offset value, and thus used to calculate the 10715 // alignment), and the value of IncValue (which is actually used to 10716 // increment the pointer value) are different! This is because we 10717 // require the next load to appear to be aligned, even though it 10718 // is actually offset from the base pointer by a lesser amount. 10719 int IncOffset = VT.getSizeInBits() / 8; 10720 int IncValue = IncOffset; 10721 10722 // Walk (both up and down) the chain looking for another load at the real 10723 // (aligned) offset (the alignment of the other load does not matter in 10724 // this case). If found, then do not use the offset reduction trick, as 10725 // that will prevent the loads from being later combined (as they would 10726 // otherwise be duplicates). 10727 if (!findConsecutiveLoad(LD, DAG)) 10728 --IncValue; 10729 10730 SDValue Increment = 10731 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); 10732 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 10733 10734 MachineMemOperand *ExtraMMO = 10735 MF.getMachineMemOperand(LD->getMemOperand(), 10736 1, 2*MemVT.getStoreSize()-1); 10737 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; 10738 SDValue ExtraLoad = 10739 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 10740 DAG.getVTList(PermTy, MVT::Other), 10741 ExtraLoadOps, LDTy, ExtraMMO); 10742 10743 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 10744 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 10745 10746 // Because vperm has a big-endian bias, we must reverse the order 10747 // of the input vectors and complement the permute control vector 10748 // when generating little endian code. We have already handled the 10749 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 10750 // and ExtraLoad here. 10751 SDValue Perm; 10752 if (isLittleEndian) 10753 Perm = BuildIntrinsicOp(IntrPerm, 10754 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 10755 else 10756 Perm = BuildIntrinsicOp(IntrPerm, 10757 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 10758 10759 if (VT != PermTy) 10760 Perm = Subtarget.hasAltivec() ? 10761 DAG.getNode(ISD::BITCAST, dl, VT, Perm) : 10762 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX 10763 DAG.getTargetConstant(1, dl, MVT::i64)); 10764 // second argument is 1 because this rounding 10765 // is always exact. 10766 10767 // The output of the permutation is our loaded result, the TokenFactor is 10768 // our new chain. 10769 DCI.CombineTo(N, Perm, TF); 10770 return SDValue(N, 0); 10771 } 10772 } 10773 break; 10774 case ISD::INTRINSIC_WO_CHAIN: { 10775 bool isLittleEndian = Subtarget.isLittleEndian(); 10776 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 10777 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr 10778 : Intrinsic::ppc_altivec_lvsl); 10779 if ((IID == Intr || 10780 IID == Intrinsic::ppc_qpx_qvlpcld || 10781 IID == Intrinsic::ppc_qpx_qvlpcls) && 10782 N->getOperand(1)->getOpcode() == ISD::ADD) { 10783 SDValue Add = N->getOperand(1); 10784 10785 int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? 10786 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; 10787 10788 if (DAG.MaskedValueIsZero( 10789 Add->getOperand(1), 10790 APInt::getAllOnesValue(Bits /* alignment */) 10791 .zext( 10792 Add.getValueType().getScalarType().getSizeInBits()))) { 10793 SDNode *BasePtr = Add->getOperand(0).getNode(); 10794 for (SDNode::use_iterator UI = BasePtr->use_begin(), 10795 UE = BasePtr->use_end(); 10796 UI != UE; ++UI) { 10797 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 10798 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) { 10799 // We've found another LVSL/LVSR, and this address is an aligned 10800 // multiple of that one. The results will be the same, so use the 10801 // one we've just found instead. 10802 10803 return SDValue(*UI, 0); 10804 } 10805 } 10806 } 10807 10808 if (isa<ConstantSDNode>(Add->getOperand(1))) { 10809 SDNode *BasePtr = Add->getOperand(0).getNode(); 10810 for (SDNode::use_iterator UI = BasePtr->use_begin(), 10811 UE = BasePtr->use_end(); UI != UE; ++UI) { 10812 if (UI->getOpcode() == ISD::ADD && 10813 isa<ConstantSDNode>(UI->getOperand(1)) && 10814 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() - 10815 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) % 10816 (1ULL << Bits) == 0) { 10817 SDNode *OtherAdd = *UI; 10818 for (SDNode::use_iterator VI = OtherAdd->use_begin(), 10819 VE = OtherAdd->use_end(); VI != VE; ++VI) { 10820 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 10821 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) { 10822 return SDValue(*VI, 0); 10823 } 10824 } 10825 } 10826 } 10827 } 10828 } 10829 } 10830 10831 break; 10832 case ISD::INTRINSIC_W_CHAIN: { 10833 // For little endian, VSX loads require generating lxvd2x/xxswapd. 10834 if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { 10835 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 10836 default: 10837 break; 10838 case Intrinsic::ppc_vsx_lxvw4x: 10839 case Intrinsic::ppc_vsx_lxvd2x: 10840 return expandVSXLoadForLE(N, DCI); 10841 } 10842 } 10843 break; 10844 } 10845 case ISD::INTRINSIC_VOID: { 10846 // For little endian, VSX stores require generating xxswapd/stxvd2x. 10847 if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { 10848 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 10849 default: 10850 break; 10851 case Intrinsic::ppc_vsx_stxvw4x: 10852 case Intrinsic::ppc_vsx_stxvd2x: 10853 return expandVSXStoreForLE(N, DCI); 10854 } 10855 } 10856 break; 10857 } 10858 case ISD::BSWAP: 10859 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 10860 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 10861 N->getOperand(0).hasOneUse() && 10862 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 10863 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 10864 N->getValueType(0) == MVT::i64))) { 10865 SDValue Load = N->getOperand(0); 10866 LoadSDNode *LD = cast<LoadSDNode>(Load); 10867 // Create the byte-swapping load. 10868 SDValue Ops[] = { 10869 LD->getChain(), // Chain 10870 LD->getBasePtr(), // Ptr 10871 DAG.getValueType(N->getValueType(0)) // VT 10872 }; 10873 SDValue BSLoad = 10874 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 10875 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 10876 MVT::i64 : MVT::i32, MVT::Other), 10877 Ops, LD->getMemoryVT(), LD->getMemOperand()); 10878 10879 // If this is an i16 load, insert the truncate. 10880 SDValue ResVal = BSLoad; 10881 if (N->getValueType(0) == MVT::i16) 10882 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 10883 10884 // First, combine the bswap away. This makes the value produced by the 10885 // load dead. 10886 DCI.CombineTo(N, ResVal); 10887 10888 // Next, combine the load away, we give it a bogus result value but a real 10889 // chain result. The result value is dead because the bswap is dead. 10890 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 10891 10892 // Return N so it doesn't get rechecked! 10893 return SDValue(N, 0); 10894 } 10895 10896 break; 10897 case PPCISD::VCMP: { 10898 // If a VCMPo node already exists with exactly the same operands as this 10899 // node, use its result instead of this node (VCMPo computes both a CR6 and 10900 // a normal output). 10901 // 10902 if (!N->getOperand(0).hasOneUse() && 10903 !N->getOperand(1).hasOneUse() && 10904 !N->getOperand(2).hasOneUse()) { 10905 10906 // Scan all of the users of the LHS, looking for VCMPo's that match. 10907 SDNode *VCMPoNode = nullptr; 10908 10909 SDNode *LHSN = N->getOperand(0).getNode(); 10910 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 10911 UI != E; ++UI) 10912 if (UI->getOpcode() == PPCISD::VCMPo && 10913 UI->getOperand(1) == N->getOperand(1) && 10914 UI->getOperand(2) == N->getOperand(2) && 10915 UI->getOperand(0) == N->getOperand(0)) { 10916 VCMPoNode = *UI; 10917 break; 10918 } 10919 10920 // If there is no VCMPo node, or if the flag value has a single use, don't 10921 // transform this. 10922 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 10923 break; 10924 10925 // Look at the (necessarily single) use of the flag value. If it has a 10926 // chain, this transformation is more complex. Note that multiple things 10927 // could use the value result, which we should ignore. 10928 SDNode *FlagUser = nullptr; 10929 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 10930 FlagUser == nullptr; ++UI) { 10931 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 10932 SDNode *User = *UI; 10933 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 10934 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 10935 FlagUser = User; 10936 break; 10937 } 10938 } 10939 } 10940 10941 // If the user is a MFOCRF instruction, we know this is safe. 10942 // Otherwise we give up for right now. 10943 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 10944 return SDValue(VCMPoNode, 0); 10945 } 10946 break; 10947 } 10948 case ISD::BRCOND: { 10949 SDValue Cond = N->getOperand(1); 10950 SDValue Target = N->getOperand(2); 10951 10952 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 10953 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 10954 Intrinsic::ppc_is_decremented_ctr_nonzero) { 10955 10956 // We now need to make the intrinsic dead (it cannot be instruction 10957 // selected). 10958 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 10959 assert(Cond.getNode()->hasOneUse() && 10960 "Counter decrement has more than one use"); 10961 10962 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 10963 N->getOperand(0), Target); 10964 } 10965 } 10966 break; 10967 case ISD::BR_CC: { 10968 // If this is a branch on an altivec predicate comparison, lower this so 10969 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 10970 // lowering is done pre-legalize, because the legalizer lowers the predicate 10971 // compare down to code that is difficult to reassemble. 10972 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 10973 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 10974 10975 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 10976 // value. If so, pass-through the AND to get to the intrinsic. 10977 if (LHS.getOpcode() == ISD::AND && 10978 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 10979 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 10980 Intrinsic::ppc_is_decremented_ctr_nonzero && 10981 isa<ConstantSDNode>(LHS.getOperand(1)) && 10982 !isNullConstant(LHS.getOperand(1))) 10983 LHS = LHS.getOperand(0); 10984 10985 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 10986 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 10987 Intrinsic::ppc_is_decremented_ctr_nonzero && 10988 isa<ConstantSDNode>(RHS)) { 10989 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 10990 "Counter decrement comparison is not EQ or NE"); 10991 10992 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 10993 bool isBDNZ = (CC == ISD::SETEQ && Val) || 10994 (CC == ISD::SETNE && !Val); 10995 10996 // We now need to make the intrinsic dead (it cannot be instruction 10997 // selected). 10998 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 10999 assert(LHS.getNode()->hasOneUse() && 11000 "Counter decrement has more than one use"); 11001 11002 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 11003 N->getOperand(0), N->getOperand(4)); 11004 } 11005 11006 int CompareOpc; 11007 bool isDot; 11008 11009 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 11010 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 11011 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { 11012 assert(isDot && "Can't compare against a vector result!"); 11013 11014 // If this is a comparison against something other than 0/1, then we know 11015 // that the condition is never/always true. 11016 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 11017 if (Val != 0 && Val != 1) { 11018 if (CC == ISD::SETEQ) // Cond never true, remove branch. 11019 return N->getOperand(0); 11020 // Always !=, turn it into an unconditional branch. 11021 return DAG.getNode(ISD::BR, dl, MVT::Other, 11022 N->getOperand(0), N->getOperand(4)); 11023 } 11024 11025 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 11026 11027 // Create the PPCISD altivec 'dot' comparison node. 11028 SDValue Ops[] = { 11029 LHS.getOperand(2), // LHS of compare 11030 LHS.getOperand(3), // RHS of compare 11031 DAG.getConstant(CompareOpc, dl, MVT::i32) 11032 }; 11033 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 11034 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 11035 11036 // Unpack the result based on how the target uses it. 11037 PPC::Predicate CompOpc; 11038 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 11039 default: // Can't happen, don't crash on invalid number though. 11040 case 0: // Branch on the value of the EQ bit of CR6. 11041 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 11042 break; 11043 case 1: // Branch on the inverted value of the EQ bit of CR6. 11044 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 11045 break; 11046 case 2: // Branch on the value of the LT bit of CR6. 11047 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 11048 break; 11049 case 3: // Branch on the inverted value of the LT bit of CR6. 11050 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 11051 break; 11052 } 11053 11054 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 11055 DAG.getConstant(CompOpc, dl, MVT::i32), 11056 DAG.getRegister(PPC::CR6, MVT::i32), 11057 N->getOperand(4), CompNode.getValue(1)); 11058 } 11059 break; 11060 } 11061 } 11062 11063 return SDValue(); 11064 } 11065 11066 SDValue 11067 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 11068 SelectionDAG &DAG, 11069 std::vector<SDNode *> *Created) const { 11070 // fold (sdiv X, pow2) 11071 EVT VT = N->getValueType(0); 11072 if (VT == MVT::i64 && !Subtarget.isPPC64()) 11073 return SDValue(); 11074 if ((VT != MVT::i32 && VT != MVT::i64) || 11075 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) 11076 return SDValue(); 11077 11078 SDLoc DL(N); 11079 SDValue N0 = N->getOperand(0); 11080 11081 bool IsNegPow2 = (-Divisor).isPowerOf2(); 11082 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); 11083 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); 11084 11085 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); 11086 if (Created) 11087 Created->push_back(Op.getNode()); 11088 11089 if (IsNegPow2) { 11090 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); 11091 if (Created) 11092 Created->push_back(Op.getNode()); 11093 } 11094 11095 return Op; 11096 } 11097 11098 //===----------------------------------------------------------------------===// 11099 // Inline Assembly Support 11100 //===----------------------------------------------------------------------===// 11101 11102 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 11103 APInt &KnownZero, 11104 APInt &KnownOne, 11105 const SelectionDAG &DAG, 11106 unsigned Depth) const { 11107 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 11108 switch (Op.getOpcode()) { 11109 default: break; 11110 case PPCISD::LBRX: { 11111 // lhbrx is known to have the top bits cleared out. 11112 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 11113 KnownZero = 0xFFFF0000; 11114 break; 11115 } 11116 case ISD::INTRINSIC_WO_CHAIN: { 11117 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 11118 default: break; 11119 case Intrinsic::ppc_altivec_vcmpbfp_p: 11120 case Intrinsic::ppc_altivec_vcmpeqfp_p: 11121 case Intrinsic::ppc_altivec_vcmpequb_p: 11122 case Intrinsic::ppc_altivec_vcmpequh_p: 11123 case Intrinsic::ppc_altivec_vcmpequw_p: 11124 case Intrinsic::ppc_altivec_vcmpequd_p: 11125 case Intrinsic::ppc_altivec_vcmpgefp_p: 11126 case Intrinsic::ppc_altivec_vcmpgtfp_p: 11127 case Intrinsic::ppc_altivec_vcmpgtsb_p: 11128 case Intrinsic::ppc_altivec_vcmpgtsh_p: 11129 case Intrinsic::ppc_altivec_vcmpgtsw_p: 11130 case Intrinsic::ppc_altivec_vcmpgtsd_p: 11131 case Intrinsic::ppc_altivec_vcmpgtub_p: 11132 case Intrinsic::ppc_altivec_vcmpgtuh_p: 11133 case Intrinsic::ppc_altivec_vcmpgtuw_p: 11134 case Intrinsic::ppc_altivec_vcmpgtud_p: 11135 KnownZero = ~1U; // All bits but the low one are known to be zero. 11136 break; 11137 } 11138 } 11139 } 11140 } 11141 11142 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { 11143 switch (Subtarget.getDarwinDirective()) { 11144 default: break; 11145 case PPC::DIR_970: 11146 case PPC::DIR_PWR4: 11147 case PPC::DIR_PWR5: 11148 case PPC::DIR_PWR5X: 11149 case PPC::DIR_PWR6: 11150 case PPC::DIR_PWR6X: 11151 case PPC::DIR_PWR7: 11152 case PPC::DIR_PWR8: { 11153 if (!ML) 11154 break; 11155 11156 const PPCInstrInfo *TII = Subtarget.getInstrInfo(); 11157 11158 // For small loops (between 5 and 8 instructions), align to a 32-byte 11159 // boundary so that the entire loop fits in one instruction-cache line. 11160 uint64_t LoopSize = 0; 11161 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) 11162 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { 11163 LoopSize += TII->GetInstSizeInBytes(J); 11164 if (LoopSize > 32) 11165 break; 11166 } 11167 11168 if (LoopSize > 16 && LoopSize <= 32) 11169 return 5; 11170 11171 break; 11172 } 11173 } 11174 11175 return TargetLowering::getPrefLoopAlignment(ML); 11176 } 11177 11178 /// getConstraintType - Given a constraint, return the type of 11179 /// constraint it is for this target. 11180 PPCTargetLowering::ConstraintType 11181 PPCTargetLowering::getConstraintType(StringRef Constraint) const { 11182 if (Constraint.size() == 1) { 11183 switch (Constraint[0]) { 11184 default: break; 11185 case 'b': 11186 case 'r': 11187 case 'f': 11188 case 'd': 11189 case 'v': 11190 case 'y': 11191 return C_RegisterClass; 11192 case 'Z': 11193 // FIXME: While Z does indicate a memory constraint, it specifically 11194 // indicates an r+r address (used in conjunction with the 'y' modifier 11195 // in the replacement string). Currently, we're forcing the base 11196 // register to be r0 in the asm printer (which is interpreted as zero) 11197 // and forming the complete address in the second register. This is 11198 // suboptimal. 11199 return C_Memory; 11200 } 11201 } else if (Constraint == "wc") { // individual CR bits. 11202 return C_RegisterClass; 11203 } else if (Constraint == "wa" || Constraint == "wd" || 11204 Constraint == "wf" || Constraint == "ws") { 11205 return C_RegisterClass; // VSX registers. 11206 } 11207 return TargetLowering::getConstraintType(Constraint); 11208 } 11209 11210 /// Examine constraint type and operand type and determine a weight value. 11211 /// This object must already have been set up with the operand type 11212 /// and the current alternative constraint selected. 11213 TargetLowering::ConstraintWeight 11214 PPCTargetLowering::getSingleConstraintMatchWeight( 11215 AsmOperandInfo &info, const char *constraint) const { 11216 ConstraintWeight weight = CW_Invalid; 11217 Value *CallOperandVal = info.CallOperandVal; 11218 // If we don't have a value, we can't do a match, 11219 // but allow it at the lowest weight. 11220 if (!CallOperandVal) 11221 return CW_Default; 11222 Type *type = CallOperandVal->getType(); 11223 11224 // Look at the constraint type. 11225 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 11226 return CW_Register; // an individual CR bit. 11227 else if ((StringRef(constraint) == "wa" || 11228 StringRef(constraint) == "wd" || 11229 StringRef(constraint) == "wf") && 11230 type->isVectorTy()) 11231 return CW_Register; 11232 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 11233 return CW_Register; 11234 11235 switch (*constraint) { 11236 default: 11237 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 11238 break; 11239 case 'b': 11240 if (type->isIntegerTy()) 11241 weight = CW_Register; 11242 break; 11243 case 'f': 11244 if (type->isFloatTy()) 11245 weight = CW_Register; 11246 break; 11247 case 'd': 11248 if (type->isDoubleTy()) 11249 weight = CW_Register; 11250 break; 11251 case 'v': 11252 if (type->isVectorTy()) 11253 weight = CW_Register; 11254 break; 11255 case 'y': 11256 weight = CW_Register; 11257 break; 11258 case 'Z': 11259 weight = CW_Memory; 11260 break; 11261 } 11262 return weight; 11263 } 11264 11265 std::pair<unsigned, const TargetRegisterClass *> 11266 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 11267 StringRef Constraint, 11268 MVT VT) const { 11269 if (Constraint.size() == 1) { 11270 // GCC RS6000 Constraint Letters 11271 switch (Constraint[0]) { 11272 case 'b': // R1-R31 11273 if (VT == MVT::i64 && Subtarget.isPPC64()) 11274 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 11275 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 11276 case 'r': // R0-R31 11277 if (VT == MVT::i64 && Subtarget.isPPC64()) 11278 return std::make_pair(0U, &PPC::G8RCRegClass); 11279 return std::make_pair(0U, &PPC::GPRCRegClass); 11280 // 'd' and 'f' constraints are both defined to be "the floating point 11281 // registers", where one is for 32-bit and the other for 64-bit. We don't 11282 // really care overly much here so just give them all the same reg classes. 11283 case 'd': 11284 case 'f': 11285 if (VT == MVT::f32 || VT == MVT::i32) 11286 return std::make_pair(0U, &PPC::F4RCRegClass); 11287 if (VT == MVT::f64 || VT == MVT::i64) 11288 return std::make_pair(0U, &PPC::F8RCRegClass); 11289 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 11290 return std::make_pair(0U, &PPC::QFRCRegClass); 11291 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 11292 return std::make_pair(0U, &PPC::QSRCRegClass); 11293 break; 11294 case 'v': 11295 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 11296 return std::make_pair(0U, &PPC::QFRCRegClass); 11297 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 11298 return std::make_pair(0U, &PPC::QSRCRegClass); 11299 if (Subtarget.hasAltivec()) 11300 return std::make_pair(0U, &PPC::VRRCRegClass); 11301 case 'y': // crrc 11302 return std::make_pair(0U, &PPC::CRRCRegClass); 11303 } 11304 } else if (Constraint == "wc" && Subtarget.useCRBits()) { 11305 // An individual CR bit. 11306 return std::make_pair(0U, &PPC::CRBITRCRegClass); 11307 } else if ((Constraint == "wa" || Constraint == "wd" || 11308 Constraint == "wf") && Subtarget.hasVSX()) { 11309 return std::make_pair(0U, &PPC::VSRCRegClass); 11310 } else if (Constraint == "ws" && Subtarget.hasVSX()) { 11311 if (VT == MVT::f32 && Subtarget.hasP8Vector()) 11312 return std::make_pair(0U, &PPC::VSSRCRegClass); 11313 else 11314 return std::make_pair(0U, &PPC::VSFRCRegClass); 11315 } 11316 11317 std::pair<unsigned, const TargetRegisterClass *> R = 11318 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 11319 11320 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 11321 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 11322 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 11323 // register. 11324 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 11325 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 11326 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 11327 PPC::GPRCRegClass.contains(R.first)) 11328 return std::make_pair(TRI->getMatchingSuperReg(R.first, 11329 PPC::sub_32, &PPC::G8RCRegClass), 11330 &PPC::G8RCRegClass); 11331 11332 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 11333 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 11334 R.first = PPC::CR0; 11335 R.second = &PPC::CRRCRegClass; 11336 } 11337 11338 return R; 11339 } 11340 11341 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 11342 /// vector. If it is invalid, don't add anything to Ops. 11343 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 11344 std::string &Constraint, 11345 std::vector<SDValue>&Ops, 11346 SelectionDAG &DAG) const { 11347 SDValue Result; 11348 11349 // Only support length 1 constraints. 11350 if (Constraint.length() > 1) return; 11351 11352 char Letter = Constraint[0]; 11353 switch (Letter) { 11354 default: break; 11355 case 'I': 11356 case 'J': 11357 case 'K': 11358 case 'L': 11359 case 'M': 11360 case 'N': 11361 case 'O': 11362 case 'P': { 11363 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 11364 if (!CST) return; // Must be an immediate to match. 11365 SDLoc dl(Op); 11366 int64_t Value = CST->getSExtValue(); 11367 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 11368 // numbers are printed as such. 11369 switch (Letter) { 11370 default: llvm_unreachable("Unknown constraint letter!"); 11371 case 'I': // "I" is a signed 16-bit constant. 11372 if (isInt<16>(Value)) 11373 Result = DAG.getTargetConstant(Value, dl, TCVT); 11374 break; 11375 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 11376 if (isShiftedUInt<16, 16>(Value)) 11377 Result = DAG.getTargetConstant(Value, dl, TCVT); 11378 break; 11379 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 11380 if (isShiftedInt<16, 16>(Value)) 11381 Result = DAG.getTargetConstant(Value, dl, TCVT); 11382 break; 11383 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 11384 if (isUInt<16>(Value)) 11385 Result = DAG.getTargetConstant(Value, dl, TCVT); 11386 break; 11387 case 'M': // "M" is a constant that is greater than 31. 11388 if (Value > 31) 11389 Result = DAG.getTargetConstant(Value, dl, TCVT); 11390 break; 11391 case 'N': // "N" is a positive constant that is an exact power of two. 11392 if (Value > 0 && isPowerOf2_64(Value)) 11393 Result = DAG.getTargetConstant(Value, dl, TCVT); 11394 break; 11395 case 'O': // "O" is the constant zero. 11396 if (Value == 0) 11397 Result = DAG.getTargetConstant(Value, dl, TCVT); 11398 break; 11399 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 11400 if (isInt<16>(-Value)) 11401 Result = DAG.getTargetConstant(Value, dl, TCVT); 11402 break; 11403 } 11404 break; 11405 } 11406 } 11407 11408 if (Result.getNode()) { 11409 Ops.push_back(Result); 11410 return; 11411 } 11412 11413 // Handle standard constraint letters. 11414 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 11415 } 11416 11417 // isLegalAddressingMode - Return true if the addressing mode represented 11418 // by AM is legal for this target, for a load/store of the specified type. 11419 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, 11420 const AddrMode &AM, Type *Ty, 11421 unsigned AS) const { 11422 // PPC does not allow r+i addressing modes for vectors! 11423 if (Ty->isVectorTy() && AM.BaseOffs != 0) 11424 return false; 11425 11426 // PPC allows a sign-extended 16-bit immediate field. 11427 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 11428 return false; 11429 11430 // No global is ever allowed as a base. 11431 if (AM.BaseGV) 11432 return false; 11433 11434 // PPC only support r+r, 11435 switch (AM.Scale) { 11436 case 0: // "r+i" or just "i", depending on HasBaseReg. 11437 break; 11438 case 1: 11439 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 11440 return false; 11441 // Otherwise we have r+r or r+i. 11442 break; 11443 case 2: 11444 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 11445 return false; 11446 // Allow 2*r as r+r. 11447 break; 11448 default: 11449 // No other scales are supported. 11450 return false; 11451 } 11452 11453 return true; 11454 } 11455 11456 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 11457 SelectionDAG &DAG) const { 11458 MachineFunction &MF = DAG.getMachineFunction(); 11459 MachineFrameInfo *MFI = MF.getFrameInfo(); 11460 MFI->setReturnAddressIsTaken(true); 11461 11462 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 11463 return SDValue(); 11464 11465 SDLoc dl(Op); 11466 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 11467 11468 // Make sure the function does not optimize away the store of the RA to 11469 // the stack. 11470 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 11471 FuncInfo->setLRStoreRequired(); 11472 bool isPPC64 = Subtarget.isPPC64(); 11473 auto PtrVT = getPointerTy(MF.getDataLayout()); 11474 11475 if (Depth > 0) { 11476 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 11477 SDValue Offset = 11478 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, 11479 isPPC64 ? MVT::i64 : MVT::i32); 11480 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 11481 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), 11482 MachinePointerInfo(), false, false, false, 0); 11483 } 11484 11485 // Just load the return address off the stack. 11486 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 11487 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, 11488 MachinePointerInfo(), false, false, false, 0); 11489 } 11490 11491 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 11492 SelectionDAG &DAG) const { 11493 SDLoc dl(Op); 11494 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 11495 11496 MachineFunction &MF = DAG.getMachineFunction(); 11497 MachineFrameInfo *MFI = MF.getFrameInfo(); 11498 MFI->setFrameAddressIsTaken(true); 11499 11500 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); 11501 bool isPPC64 = PtrVT == MVT::i64; 11502 11503 // Naked functions never have a frame pointer, and so we use r1. For all 11504 // other functions, this decision must be delayed until during PEI. 11505 unsigned FrameReg; 11506 if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) 11507 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 11508 else 11509 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 11510 11511 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 11512 PtrVT); 11513 while (Depth--) 11514 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 11515 FrameAddr, MachinePointerInfo(), false, false, 11516 false, 0); 11517 return FrameAddr; 11518 } 11519 11520 // FIXME? Maybe this could be a TableGen attribute on some registers and 11521 // this table could be generated automatically from RegInfo. 11522 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, 11523 SelectionDAG &DAG) const { 11524 bool isPPC64 = Subtarget.isPPC64(); 11525 bool isDarwinABI = Subtarget.isDarwinABI(); 11526 11527 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 11528 (!isPPC64 && VT != MVT::i32)) 11529 report_fatal_error("Invalid register global variable type"); 11530 11531 bool is64Bit = isPPC64 && VT == MVT::i64; 11532 unsigned Reg = StringSwitch<unsigned>(RegName) 11533 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 11534 .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) 11535 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 11536 (is64Bit ? PPC::X13 : PPC::R13)) 11537 .Default(0); 11538 11539 if (Reg) 11540 return Reg; 11541 report_fatal_error("Invalid register name global variable"); 11542 } 11543 11544 bool 11545 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 11546 // The PowerPC target isn't yet aware of offsets. 11547 return false; 11548 } 11549 11550 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 11551 const CallInst &I, 11552 unsigned Intrinsic) const { 11553 11554 switch (Intrinsic) { 11555 case Intrinsic::ppc_qpx_qvlfd: 11556 case Intrinsic::ppc_qpx_qvlfs: 11557 case Intrinsic::ppc_qpx_qvlfcd: 11558 case Intrinsic::ppc_qpx_qvlfcs: 11559 case Intrinsic::ppc_qpx_qvlfiwa: 11560 case Intrinsic::ppc_qpx_qvlfiwz: 11561 case Intrinsic::ppc_altivec_lvx: 11562 case Intrinsic::ppc_altivec_lvxl: 11563 case Intrinsic::ppc_altivec_lvebx: 11564 case Intrinsic::ppc_altivec_lvehx: 11565 case Intrinsic::ppc_altivec_lvewx: 11566 case Intrinsic::ppc_vsx_lxvd2x: 11567 case Intrinsic::ppc_vsx_lxvw4x: { 11568 EVT VT; 11569 switch (Intrinsic) { 11570 case Intrinsic::ppc_altivec_lvebx: 11571 VT = MVT::i8; 11572 break; 11573 case Intrinsic::ppc_altivec_lvehx: 11574 VT = MVT::i16; 11575 break; 11576 case Intrinsic::ppc_altivec_lvewx: 11577 VT = MVT::i32; 11578 break; 11579 case Intrinsic::ppc_vsx_lxvd2x: 11580 VT = MVT::v2f64; 11581 break; 11582 case Intrinsic::ppc_qpx_qvlfd: 11583 VT = MVT::v4f64; 11584 break; 11585 case Intrinsic::ppc_qpx_qvlfs: 11586 VT = MVT::v4f32; 11587 break; 11588 case Intrinsic::ppc_qpx_qvlfcd: 11589 VT = MVT::v2f64; 11590 break; 11591 case Intrinsic::ppc_qpx_qvlfcs: 11592 VT = MVT::v2f32; 11593 break; 11594 default: 11595 VT = MVT::v4i32; 11596 break; 11597 } 11598 11599 Info.opc = ISD::INTRINSIC_W_CHAIN; 11600 Info.memVT = VT; 11601 Info.ptrVal = I.getArgOperand(0); 11602 Info.offset = -VT.getStoreSize()+1; 11603 Info.size = 2*VT.getStoreSize()-1; 11604 Info.align = 1; 11605 Info.vol = false; 11606 Info.readMem = true; 11607 Info.writeMem = false; 11608 return true; 11609 } 11610 case Intrinsic::ppc_qpx_qvlfda: 11611 case Intrinsic::ppc_qpx_qvlfsa: 11612 case Intrinsic::ppc_qpx_qvlfcda: 11613 case Intrinsic::ppc_qpx_qvlfcsa: 11614 case Intrinsic::ppc_qpx_qvlfiwaa: 11615 case Intrinsic::ppc_qpx_qvlfiwza: { 11616 EVT VT; 11617 switch (Intrinsic) { 11618 case Intrinsic::ppc_qpx_qvlfda: 11619 VT = MVT::v4f64; 11620 break; 11621 case Intrinsic::ppc_qpx_qvlfsa: 11622 VT = MVT::v4f32; 11623 break; 11624 case Intrinsic::ppc_qpx_qvlfcda: 11625 VT = MVT::v2f64; 11626 break; 11627 case Intrinsic::ppc_qpx_qvlfcsa: 11628 VT = MVT::v2f32; 11629 break; 11630 default: 11631 VT = MVT::v4i32; 11632 break; 11633 } 11634 11635 Info.opc = ISD::INTRINSIC_W_CHAIN; 11636 Info.memVT = VT; 11637 Info.ptrVal = I.getArgOperand(0); 11638 Info.offset = 0; 11639 Info.size = VT.getStoreSize(); 11640 Info.align = 1; 11641 Info.vol = false; 11642 Info.readMem = true; 11643 Info.writeMem = false; 11644 return true; 11645 } 11646 case Intrinsic::ppc_qpx_qvstfd: 11647 case Intrinsic::ppc_qpx_qvstfs: 11648 case Intrinsic::ppc_qpx_qvstfcd: 11649 case Intrinsic::ppc_qpx_qvstfcs: 11650 case Intrinsic::ppc_qpx_qvstfiw: 11651 case Intrinsic::ppc_altivec_stvx: 11652 case Intrinsic::ppc_altivec_stvxl: 11653 case Intrinsic::ppc_altivec_stvebx: 11654 case Intrinsic::ppc_altivec_stvehx: 11655 case Intrinsic::ppc_altivec_stvewx: 11656 case Intrinsic::ppc_vsx_stxvd2x: 11657 case Intrinsic::ppc_vsx_stxvw4x: { 11658 EVT VT; 11659 switch (Intrinsic) { 11660 case Intrinsic::ppc_altivec_stvebx: 11661 VT = MVT::i8; 11662 break; 11663 case Intrinsic::ppc_altivec_stvehx: 11664 VT = MVT::i16; 11665 break; 11666 case Intrinsic::ppc_altivec_stvewx: 11667 VT = MVT::i32; 11668 break; 11669 case Intrinsic::ppc_vsx_stxvd2x: 11670 VT = MVT::v2f64; 11671 break; 11672 case Intrinsic::ppc_qpx_qvstfd: 11673 VT = MVT::v4f64; 11674 break; 11675 case Intrinsic::ppc_qpx_qvstfs: 11676 VT = MVT::v4f32; 11677 break; 11678 case Intrinsic::ppc_qpx_qvstfcd: 11679 VT = MVT::v2f64; 11680 break; 11681 case Intrinsic::ppc_qpx_qvstfcs: 11682 VT = MVT::v2f32; 11683 break; 11684 default: 11685 VT = MVT::v4i32; 11686 break; 11687 } 11688 11689 Info.opc = ISD::INTRINSIC_VOID; 11690 Info.memVT = VT; 11691 Info.ptrVal = I.getArgOperand(1); 11692 Info.offset = -VT.getStoreSize()+1; 11693 Info.size = 2*VT.getStoreSize()-1; 11694 Info.align = 1; 11695 Info.vol = false; 11696 Info.readMem = false; 11697 Info.writeMem = true; 11698 return true; 11699 } 11700 case Intrinsic::ppc_qpx_qvstfda: 11701 case Intrinsic::ppc_qpx_qvstfsa: 11702 case Intrinsic::ppc_qpx_qvstfcda: 11703 case Intrinsic::ppc_qpx_qvstfcsa: 11704 case Intrinsic::ppc_qpx_qvstfiwa: { 11705 EVT VT; 11706 switch (Intrinsic) { 11707 case Intrinsic::ppc_qpx_qvstfda: 11708 VT = MVT::v4f64; 11709 break; 11710 case Intrinsic::ppc_qpx_qvstfsa: 11711 VT = MVT::v4f32; 11712 break; 11713 case Intrinsic::ppc_qpx_qvstfcda: 11714 VT = MVT::v2f64; 11715 break; 11716 case Intrinsic::ppc_qpx_qvstfcsa: 11717 VT = MVT::v2f32; 11718 break; 11719 default: 11720 VT = MVT::v4i32; 11721 break; 11722 } 11723 11724 Info.opc = ISD::INTRINSIC_VOID; 11725 Info.memVT = VT; 11726 Info.ptrVal = I.getArgOperand(1); 11727 Info.offset = 0; 11728 Info.size = VT.getStoreSize(); 11729 Info.align = 1; 11730 Info.vol = false; 11731 Info.readMem = false; 11732 Info.writeMem = true; 11733 return true; 11734 } 11735 default: 11736 break; 11737 } 11738 11739 return false; 11740 } 11741 11742 /// getOptimalMemOpType - Returns the target specific optimal type for load 11743 /// and store operations as a result of memset, memcpy, and memmove 11744 /// lowering. If DstAlign is zero that means it's safe to destination 11745 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 11746 /// means there isn't a need to check it against alignment requirement, 11747 /// probably because the source does not need to be loaded. If 'IsMemset' is 11748 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 11749 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 11750 /// source is constant so it does not need to be loaded. 11751 /// It returns EVT::Other if the type should be determined using generic 11752 /// target-independent logic. 11753 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 11754 unsigned DstAlign, unsigned SrcAlign, 11755 bool IsMemset, bool ZeroMemset, 11756 bool MemcpyStrSrc, 11757 MachineFunction &MF) const { 11758 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { 11759 const Function *F = MF.getFunction(); 11760 // When expanding a memset, require at least two QPX instructions to cover 11761 // the cost of loading the value to be stored from the constant pool. 11762 if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && 11763 (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && 11764 !F->hasFnAttribute(Attribute::NoImplicitFloat)) { 11765 return MVT::v4f64; 11766 } 11767 11768 // We should use Altivec/VSX loads and stores when available. For unaligned 11769 // addresses, unaligned VSX loads are only fast starting with the P8. 11770 if (Subtarget.hasAltivec() && Size >= 16 && 11771 (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || 11772 ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) 11773 return MVT::v4i32; 11774 } 11775 11776 if (Subtarget.isPPC64()) { 11777 return MVT::i64; 11778 } 11779 11780 return MVT::i32; 11781 } 11782 11783 /// \brief Returns true if it is beneficial to convert a load of a constant 11784 /// to just the constant itself. 11785 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 11786 Type *Ty) const { 11787 assert(Ty->isIntegerTy()); 11788 11789 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 11790 return !(BitSize == 0 || BitSize > 64); 11791 } 11792 11793 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 11794 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 11795 return false; 11796 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 11797 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 11798 return NumBits1 == 64 && NumBits2 == 32; 11799 } 11800 11801 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 11802 if (!VT1.isInteger() || !VT2.isInteger()) 11803 return false; 11804 unsigned NumBits1 = VT1.getSizeInBits(); 11805 unsigned NumBits2 = VT2.getSizeInBits(); 11806 return NumBits1 == 64 && NumBits2 == 32; 11807 } 11808 11809 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 11810 // Generally speaking, zexts are not free, but they are free when they can be 11811 // folded with other operations. 11812 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) { 11813 EVT MemVT = LD->getMemoryVT(); 11814 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || 11815 (Subtarget.isPPC64() && MemVT == MVT::i32)) && 11816 (LD->getExtensionType() == ISD::NON_EXTLOAD || 11817 LD->getExtensionType() == ISD::ZEXTLOAD)) 11818 return true; 11819 } 11820 11821 // FIXME: Add other cases... 11822 // - 32-bit shifts with a zext to i64 11823 // - zext after ctlz, bswap, etc. 11824 // - zext after and by a constant mask 11825 11826 return TargetLowering::isZExtFree(Val, VT2); 11827 } 11828 11829 bool PPCTargetLowering::isFPExtFree(EVT VT) const { 11830 assert(VT.isFloatingPoint()); 11831 return true; 11832 } 11833 11834 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 11835 return isInt<16>(Imm) || isUInt<16>(Imm); 11836 } 11837 11838 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 11839 return isInt<16>(Imm) || isUInt<16>(Imm); 11840 } 11841 11842 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 11843 unsigned, 11844 unsigned, 11845 bool *Fast) const { 11846 if (DisablePPCUnaligned) 11847 return false; 11848 11849 // PowerPC supports unaligned memory access for simple non-vector types. 11850 // Although accessing unaligned addresses is not as efficient as accessing 11851 // aligned addresses, it is generally more efficient than manual expansion, 11852 // and generally only traps for software emulation when crossing page 11853 // boundaries. 11854 11855 if (!VT.isSimple()) 11856 return false; 11857 11858 if (VT.getSimpleVT().isVector()) { 11859 if (Subtarget.hasVSX()) { 11860 if (VT != MVT::v2f64 && VT != MVT::v2i64 && 11861 VT != MVT::v4f32 && VT != MVT::v4i32) 11862 return false; 11863 } else { 11864 return false; 11865 } 11866 } 11867 11868 if (VT == MVT::ppcf128) 11869 return false; 11870 11871 if (Fast) 11872 *Fast = true; 11873 11874 return true; 11875 } 11876 11877 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 11878 VT = VT.getScalarType(); 11879 11880 if (!VT.isSimple()) 11881 return false; 11882 11883 switch (VT.getSimpleVT().SimpleTy) { 11884 case MVT::f32: 11885 case MVT::f64: 11886 return true; 11887 default: 11888 break; 11889 } 11890 11891 return false; 11892 } 11893 11894 const MCPhysReg * 11895 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { 11896 // LR is a callee-save register, but we must treat it as clobbered by any call 11897 // site. Hence we include LR in the scratch registers, which are in turn added 11898 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies 11899 // to CTR, which is used by any indirect call. 11900 static const MCPhysReg ScratchRegs[] = { 11901 PPC::X12, PPC::LR8, PPC::CTR8, 0 11902 }; 11903 11904 return ScratchRegs; 11905 } 11906 11907 unsigned PPCTargetLowering::getExceptionPointerRegister( 11908 const Constant *PersonalityFn) const { 11909 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; 11910 } 11911 11912 unsigned PPCTargetLowering::getExceptionSelectorRegister( 11913 const Constant *PersonalityFn) const { 11914 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; 11915 } 11916 11917 bool 11918 PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 11919 EVT VT , unsigned DefinedValues) const { 11920 if (VT == MVT::v2i64) 11921 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves 11922 11923 if (Subtarget.hasQPX()) { 11924 if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1) 11925 return true; 11926 } 11927 11928 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 11929 } 11930 11931 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 11932 if (DisableILPPref || Subtarget.enableMachineScheduler()) 11933 return TargetLowering::getSchedulingPreference(N); 11934 11935 return Sched::ILP; 11936 } 11937 11938 // Create a fast isel object. 11939 FastISel * 11940 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 11941 const TargetLibraryInfo *LibInfo) const { 11942 return PPC::createFastISel(FuncInfo, LibInfo); 11943 } 11944 11945 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { 11946 if (Subtarget.isDarwinABI()) return; 11947 if (!Subtarget.isPPC64()) return; 11948 11949 // Update IsSplitCSR in PPCFunctionInfo 11950 PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>(); 11951 PFI->setIsSplitCSR(true); 11952 } 11953 11954 void PPCTargetLowering::insertCopiesSplitCSR( 11955 MachineBasicBlock *Entry, 11956 const SmallVectorImpl<MachineBasicBlock *> &Exits) const { 11957 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 11958 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); 11959 if (!IStart) 11960 return; 11961 11962 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 11963 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); 11964 MachineBasicBlock::iterator MBBI = Entry->begin(); 11965 for (const MCPhysReg *I = IStart; *I; ++I) { 11966 const TargetRegisterClass *RC = nullptr; 11967 if (PPC::G8RCRegClass.contains(*I)) 11968 RC = &PPC::G8RCRegClass; 11969 else if (PPC::F8RCRegClass.contains(*I)) 11970 RC = &PPC::F8RCRegClass; 11971 else if (PPC::CRRCRegClass.contains(*I)) 11972 RC = &PPC::CRRCRegClass; 11973 else if (PPC::VRRCRegClass.contains(*I)) 11974 RC = &PPC::VRRCRegClass; 11975 else 11976 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 11977 11978 unsigned NewVR = MRI->createVirtualRegister(RC); 11979 // Create copy from CSR to a virtual register. 11980 // FIXME: this currently does not emit CFI pseudo-instructions, it works 11981 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be 11982 // nounwind. If we want to generalize this later, we may need to emit 11983 // CFI pseudo-instructions. 11984 assert(Entry->getParent()->getFunction()->hasFnAttribute( 11985 Attribute::NoUnwind) && 11986 "Function should be nounwind in insertCopiesSplitCSR!"); 11987 Entry->addLiveIn(*I); 11988 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) 11989 .addReg(*I); 11990 11991 // Insert the copy-back instructions right before the terminator 11992 for (auto *Exit : Exits) 11993 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), 11994 TII->get(TargetOpcode::COPY), *I) 11995 .addReg(NewVR); 11996 } 11997 } 11998