1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PPCISelLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCISelLowering.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPCCallingConv.h" 17 #include "PPCCCState.h" 18 #include "PPCMachineFunctionInfo.h" 19 #include "PPCPerfectShuffle.h" 20 #include "PPCTargetMachine.h" 21 #include "PPCTargetObjectFile.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/ADT/StringSwitch.h" 25 #include "llvm/ADT/Triple.h" 26 #include "llvm/CodeGen/CallingConvLower.h" 27 #include "llvm/CodeGen/MachineFrameInfo.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineLoopInfo.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/SelectionDAG.h" 33 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 34 #include "llvm/IR/CallingConv.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/Function.h" 38 #include "llvm/IR/Intrinsics.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/MathExtras.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetOptions.h" 45 #include <list> 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "ppc-lowering" 50 51 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 52 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 53 54 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 55 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 56 57 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 58 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 59 60 static cl::opt<bool> DisableSCO("disable-ppc-sco", 61 cl::desc("disable sibling call optimization on ppc"), cl::Hidden); 62 63 STATISTIC(NumTailCalls, "Number of tail calls"); 64 STATISTIC(NumSiblingCalls, "Number of sibling calls"); 65 66 // FIXME: Remove this once the bug has been fixed! 67 extern cl::opt<bool> ANDIGlueBug; 68 69 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, 70 const PPCSubtarget &STI) 71 : TargetLowering(TM), Subtarget(STI) { 72 // Use _setjmp/_longjmp instead of setjmp/longjmp. 73 setUseUnderscoreSetJmp(true); 74 setUseUnderscoreLongJmp(true); 75 76 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 77 // arguments are at least 4/8 bytes aligned. 78 bool isPPC64 = Subtarget.isPPC64(); 79 setMinStackArgumentAlignment(isPPC64 ? 8:4); 80 81 // Set up the register classes. 82 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 83 if (!useSoftFloat()) { 84 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 85 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 86 } 87 88 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 89 for (MVT VT : MVT::integer_valuetypes()) { 90 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 91 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 92 } 93 94 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 95 96 // PowerPC has pre-inc load and store's. 97 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 98 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 99 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 100 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 101 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 102 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); 103 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); 104 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 105 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 106 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 107 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 108 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 109 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); 110 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); 111 112 if (Subtarget.useCRBits()) { 113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 114 115 if (isPPC64 || Subtarget.hasFPCVT()) { 116 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 117 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 118 isPPC64 ? MVT::i64 : MVT::i32); 119 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 120 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, 121 isPPC64 ? MVT::i64 : MVT::i32); 122 } else { 123 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 124 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 125 } 126 127 // PowerPC does not support direct load / store of condition registers 128 setOperationAction(ISD::LOAD, MVT::i1, Custom); 129 setOperationAction(ISD::STORE, MVT::i1, Custom); 130 131 // FIXME: Remove this once the ANDI glue bug is fixed: 132 if (ANDIGlueBug) 133 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 134 135 for (MVT VT : MVT::integer_valuetypes()) { 136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 137 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 138 setTruncStoreAction(VT, MVT::i1, Expand); 139 } 140 141 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 142 } 143 144 // This is used in the ppcf128->int sequence. Note it has different semantics 145 // from FP_ROUND: that rounds to nearest, this rounds to zero. 146 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 147 148 // We do not currently implement these libm ops for PowerPC. 149 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 150 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 151 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 152 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 153 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 154 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 155 156 // PowerPC has no SREM/UREM instructions 157 setOperationAction(ISD::SREM, MVT::i32, Expand); 158 setOperationAction(ISD::UREM, MVT::i32, Expand); 159 setOperationAction(ISD::SREM, MVT::i64, Expand); 160 setOperationAction(ISD::UREM, MVT::i64, Expand); 161 162 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 163 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 164 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 165 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 166 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 167 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 168 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 169 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 170 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 171 172 // We don't support sin/cos/sqrt/fmod/pow 173 setOperationAction(ISD::FSIN , MVT::f64, Expand); 174 setOperationAction(ISD::FCOS , MVT::f64, Expand); 175 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 176 setOperationAction(ISD::FREM , MVT::f64, Expand); 177 setOperationAction(ISD::FPOW , MVT::f64, Expand); 178 setOperationAction(ISD::FMA , MVT::f64, Legal); 179 setOperationAction(ISD::FSIN , MVT::f32, Expand); 180 setOperationAction(ISD::FCOS , MVT::f32, Expand); 181 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 182 setOperationAction(ISD::FREM , MVT::f32, Expand); 183 setOperationAction(ISD::FPOW , MVT::f32, Expand); 184 setOperationAction(ISD::FMA , MVT::f32, Legal); 185 186 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 187 188 // If we're enabling GP optimizations, use hardware square root 189 if (!Subtarget.hasFSQRT() && 190 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && 191 Subtarget.hasFRE())) 192 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 193 194 if (!Subtarget.hasFSQRT() && 195 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && 196 Subtarget.hasFRES())) 197 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 198 199 if (Subtarget.hasFCPSGN()) { 200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 202 } else { 203 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 204 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 205 } 206 207 if (Subtarget.hasFPRND()) { 208 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 209 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 210 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 211 setOperationAction(ISD::FROUND, MVT::f64, Legal); 212 213 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 214 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 215 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 216 setOperationAction(ISD::FROUND, MVT::f32, Legal); 217 } 218 219 // PowerPC does not have BSWAP, CTPOP or CTTZ 220 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 221 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 222 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 223 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 224 225 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { 226 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 227 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 228 } else { 229 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 230 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 231 } 232 233 // PowerPC does not have ROTR 234 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 235 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 236 237 if (!Subtarget.useCRBits()) { 238 // PowerPC does not have Select 239 setOperationAction(ISD::SELECT, MVT::i32, Expand); 240 setOperationAction(ISD::SELECT, MVT::i64, Expand); 241 setOperationAction(ISD::SELECT, MVT::f32, Expand); 242 setOperationAction(ISD::SELECT, MVT::f64, Expand); 243 } 244 245 // PowerPC wants to turn select_cc of FP into fsel when possible. 246 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 247 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 248 249 // PowerPC wants to optimize integer setcc a bit 250 if (!Subtarget.useCRBits()) 251 setOperationAction(ISD::SETCC, MVT::i32, Custom); 252 253 // PowerPC does not have BRCOND which requires SetCC 254 if (!Subtarget.useCRBits()) 255 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 256 257 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 258 259 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 260 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 261 262 // PowerPC does not have [U|S]INT_TO_FP 263 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 264 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 265 266 if (Subtarget.hasDirectMove() && isPPC64) { 267 setOperationAction(ISD::BITCAST, MVT::f32, Legal); 268 setOperationAction(ISD::BITCAST, MVT::i32, Legal); 269 setOperationAction(ISD::BITCAST, MVT::i64, Legal); 270 setOperationAction(ISD::BITCAST, MVT::f64, Legal); 271 } else { 272 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 273 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 274 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 275 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 276 } 277 278 // We cannot sextinreg(i1). Expand to shifts. 279 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 280 281 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 282 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 283 // support continuation, user-level threading, and etc.. As a result, no 284 // other SjLj exception interfaces are implemented and please don't build 285 // your own exception handling based on them. 286 // LLVM/Clang supports zero-cost DWARF exception handling. 287 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 288 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 289 290 // We want to legalize GlobalAddress and ConstantPool nodes into the 291 // appropriate instructions to materialize the address. 292 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 293 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 294 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 295 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 296 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 297 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 298 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 299 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 300 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 301 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 302 303 // TRAP is legal. 304 setOperationAction(ISD::TRAP, MVT::Other, Legal); 305 306 // TRAMPOLINE is custom lowered. 307 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 308 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 309 310 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 311 setOperationAction(ISD::VASTART , MVT::Other, Custom); 312 313 if (Subtarget.isSVR4ABI()) { 314 if (isPPC64) { 315 // VAARG always uses double-word chunks, so promote anything smaller. 316 setOperationAction(ISD::VAARG, MVT::i1, Promote); 317 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 318 setOperationAction(ISD::VAARG, MVT::i8, Promote); 319 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 320 setOperationAction(ISD::VAARG, MVT::i16, Promote); 321 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 322 setOperationAction(ISD::VAARG, MVT::i32, Promote); 323 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 324 setOperationAction(ISD::VAARG, MVT::Other, Expand); 325 } else { 326 // VAARG is custom lowered with the 32-bit SVR4 ABI. 327 setOperationAction(ISD::VAARG, MVT::Other, Custom); 328 setOperationAction(ISD::VAARG, MVT::i64, Custom); 329 } 330 } else 331 setOperationAction(ISD::VAARG, MVT::Other, Expand); 332 333 if (Subtarget.isSVR4ABI() && !isPPC64) 334 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 335 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 336 else 337 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 338 339 // Use the default implementation. 340 setOperationAction(ISD::VAEND , MVT::Other, Expand); 341 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 342 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 343 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 344 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 345 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); 346 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); 347 348 // We want to custom lower some of our intrinsics. 349 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 350 351 // To handle counter-based loop conditions. 352 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 353 354 // Comparisons that require checking two conditions. 355 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 356 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 357 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 358 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 359 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 360 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 361 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 362 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 363 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 364 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 365 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 366 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 367 368 if (Subtarget.has64BitSupport()) { 369 // They also have instructions for converting between i64 and fp. 370 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 371 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 372 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 373 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 374 // This is just the low 32 bits of a (signed) fp->i64 conversion. 375 // We cannot do this with Promote because i64 is not a legal type. 376 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 377 378 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 379 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 380 } else { 381 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 382 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 383 } 384 385 // With the instructions enabled under FPCVT, we can do everything. 386 if (Subtarget.hasFPCVT()) { 387 if (Subtarget.has64BitSupport()) { 388 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 389 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 390 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 391 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 392 } 393 394 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 395 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 396 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 397 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 398 } 399 400 if (Subtarget.use64BitRegs()) { 401 // 64-bit PowerPC implementations can support i64 types directly 402 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 403 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 404 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 405 // 64-bit PowerPC wants to expand i128 shifts itself. 406 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 407 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 408 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 409 } else { 410 // 32-bit PowerPC wants to expand i64 shifts itself. 411 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 412 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 413 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 414 } 415 416 if (Subtarget.hasAltivec()) { 417 // First set operation action for all vector types to expand. Then we 418 // will selectively turn on ones that can be effectively codegen'd. 419 for (MVT VT : MVT::vector_valuetypes()) { 420 // add/sub are legal for all supported vector VT's. 421 setOperationAction(ISD::ADD, VT, Legal); 422 setOperationAction(ISD::SUB, VT, Legal); 423 424 // Vector instructions introduced in P8 425 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { 426 setOperationAction(ISD::CTPOP, VT, Legal); 427 setOperationAction(ISD::CTLZ, VT, Legal); 428 } 429 else { 430 setOperationAction(ISD::CTPOP, VT, Expand); 431 setOperationAction(ISD::CTLZ, VT, Expand); 432 } 433 434 // We promote all shuffles to v16i8. 435 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 436 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 437 438 // We promote all non-typed operations to v4i32. 439 setOperationAction(ISD::AND , VT, Promote); 440 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 441 setOperationAction(ISD::OR , VT, Promote); 442 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 443 setOperationAction(ISD::XOR , VT, Promote); 444 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 445 setOperationAction(ISD::LOAD , VT, Promote); 446 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 447 setOperationAction(ISD::SELECT, VT, Promote); 448 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 449 setOperationAction(ISD::SELECT_CC, VT, Promote); 450 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); 451 setOperationAction(ISD::STORE, VT, Promote); 452 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 453 454 // No other operations are legal. 455 setOperationAction(ISD::MUL , VT, Expand); 456 setOperationAction(ISD::SDIV, VT, Expand); 457 setOperationAction(ISD::SREM, VT, Expand); 458 setOperationAction(ISD::UDIV, VT, Expand); 459 setOperationAction(ISD::UREM, VT, Expand); 460 setOperationAction(ISD::FDIV, VT, Expand); 461 setOperationAction(ISD::FREM, VT, Expand); 462 setOperationAction(ISD::FNEG, VT, Expand); 463 setOperationAction(ISD::FSQRT, VT, Expand); 464 setOperationAction(ISD::FLOG, VT, Expand); 465 setOperationAction(ISD::FLOG10, VT, Expand); 466 setOperationAction(ISD::FLOG2, VT, Expand); 467 setOperationAction(ISD::FEXP, VT, Expand); 468 setOperationAction(ISD::FEXP2, VT, Expand); 469 setOperationAction(ISD::FSIN, VT, Expand); 470 setOperationAction(ISD::FCOS, VT, Expand); 471 setOperationAction(ISD::FABS, VT, Expand); 472 setOperationAction(ISD::FPOWI, VT, Expand); 473 setOperationAction(ISD::FFLOOR, VT, Expand); 474 setOperationAction(ISD::FCEIL, VT, Expand); 475 setOperationAction(ISD::FTRUNC, VT, Expand); 476 setOperationAction(ISD::FRINT, VT, Expand); 477 setOperationAction(ISD::FNEARBYINT, VT, Expand); 478 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 479 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 480 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 481 setOperationAction(ISD::MULHU, VT, Expand); 482 setOperationAction(ISD::MULHS, VT, Expand); 483 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 484 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 485 setOperationAction(ISD::UDIVREM, VT, Expand); 486 setOperationAction(ISD::SDIVREM, VT, Expand); 487 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 488 setOperationAction(ISD::FPOW, VT, Expand); 489 setOperationAction(ISD::BSWAP, VT, Expand); 490 setOperationAction(ISD::CTTZ, VT, Expand); 491 setOperationAction(ISD::VSELECT, VT, Expand); 492 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 493 setOperationAction(ISD::ROTL, VT, Expand); 494 setOperationAction(ISD::ROTR, VT, Expand); 495 496 for (MVT InnerVT : MVT::vector_valuetypes()) { 497 setTruncStoreAction(VT, InnerVT, Expand); 498 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 499 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 500 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 501 } 502 } 503 504 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 505 // with merges, splats, etc. 506 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 507 508 setOperationAction(ISD::AND , MVT::v4i32, Legal); 509 setOperationAction(ISD::OR , MVT::v4i32, Legal); 510 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 511 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 512 setOperationAction(ISD::SELECT, MVT::v4i32, 513 Subtarget.useCRBits() ? Legal : Expand); 514 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 515 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 516 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 517 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 518 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 519 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 520 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 521 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 522 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 523 524 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 525 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 526 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 527 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 528 529 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 530 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 531 532 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 533 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 534 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 535 } 536 537 if (Subtarget.hasP8Altivec()) 538 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 539 else 540 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 541 542 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 543 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 544 545 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 546 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 547 548 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 549 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 550 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 551 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 552 553 // Altivec does not contain unordered floating-point compare instructions 554 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 555 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 556 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 557 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 558 559 if (Subtarget.hasVSX()) { 560 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 561 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 562 if (Subtarget.hasP8Vector()) { 563 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 564 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); 565 } 566 if (Subtarget.hasDirectMove() && isPPC64) { 567 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); 568 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); 569 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); 570 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); 571 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); 572 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); 573 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); 574 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 575 } 576 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 577 578 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 579 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 580 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 581 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 582 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 583 584 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 585 586 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 587 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 588 589 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 590 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 591 592 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 593 setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); 594 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); 595 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 596 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); 597 598 // Share the Altivec comparison restrictions. 599 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 600 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 601 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 602 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 603 604 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 605 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 606 607 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 608 609 if (Subtarget.hasP8Vector()) 610 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); 611 612 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 613 614 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); 615 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 616 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 617 618 if (Subtarget.hasP8Altivec()) { 619 setOperationAction(ISD::SHL, MVT::v2i64, Legal); 620 setOperationAction(ISD::SRA, MVT::v2i64, Legal); 621 setOperationAction(ISD::SRL, MVT::v2i64, Legal); 622 623 setOperationAction(ISD::SETCC, MVT::v2i64, Legal); 624 } 625 else { 626 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 627 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 628 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 629 630 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 631 632 // VSX v2i64 only supports non-arithmetic operations. 633 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 634 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 635 } 636 637 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 638 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 639 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 640 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 641 642 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 643 644 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 645 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 646 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 647 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 648 649 // Vector operation legalization checks the result type of 650 // SIGN_EXTEND_INREG, overall legalization checks the inner type. 651 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); 652 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); 653 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); 654 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); 655 656 setOperationAction(ISD::FNEG, MVT::v4f32, Legal); 657 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 658 setOperationAction(ISD::FABS, MVT::v4f32, Legal); 659 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 660 661 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 662 } 663 664 if (Subtarget.hasP8Altivec()) { 665 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); 666 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); 667 } 668 if (Subtarget.hasP9Vector()) { 669 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); 670 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); 671 } 672 } 673 674 if (Subtarget.hasQPX()) { 675 setOperationAction(ISD::FADD, MVT::v4f64, Legal); 676 setOperationAction(ISD::FSUB, MVT::v4f64, Legal); 677 setOperationAction(ISD::FMUL, MVT::v4f64, Legal); 678 setOperationAction(ISD::FREM, MVT::v4f64, Expand); 679 680 setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); 681 setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); 682 683 setOperationAction(ISD::LOAD , MVT::v4f64, Custom); 684 setOperationAction(ISD::STORE , MVT::v4f64, Custom); 685 686 setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); 687 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); 688 689 if (!Subtarget.useCRBits()) 690 setOperationAction(ISD::SELECT, MVT::v4f64, Expand); 691 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); 692 693 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); 694 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); 695 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); 696 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); 697 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); 698 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); 699 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); 700 701 setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); 702 setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); 703 704 setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); 705 setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); 706 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); 707 708 setOperationAction(ISD::FNEG , MVT::v4f64, Legal); 709 setOperationAction(ISD::FABS , MVT::v4f64, Legal); 710 setOperationAction(ISD::FSIN , MVT::v4f64, Expand); 711 setOperationAction(ISD::FCOS , MVT::v4f64, Expand); 712 setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); 713 setOperationAction(ISD::FPOW , MVT::v4f64, Expand); 714 setOperationAction(ISD::FLOG , MVT::v4f64, Expand); 715 setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); 716 setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); 717 setOperationAction(ISD::FEXP , MVT::v4f64, Expand); 718 setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); 719 720 setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); 721 setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); 722 723 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); 724 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); 725 726 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); 727 728 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 729 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 730 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 731 setOperationAction(ISD::FREM, MVT::v4f32, Expand); 732 733 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); 734 setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); 735 736 setOperationAction(ISD::LOAD , MVT::v4f32, Custom); 737 setOperationAction(ISD::STORE , MVT::v4f32, Custom); 738 739 if (!Subtarget.useCRBits()) 740 setOperationAction(ISD::SELECT, MVT::v4f32, Expand); 741 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 742 743 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); 744 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); 745 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); 746 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); 747 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); 748 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 749 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 750 751 setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); 752 setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); 753 754 setOperationAction(ISD::FNEG , MVT::v4f32, Legal); 755 setOperationAction(ISD::FABS , MVT::v4f32, Legal); 756 setOperationAction(ISD::FSIN , MVT::v4f32, Expand); 757 setOperationAction(ISD::FCOS , MVT::v4f32, Expand); 758 setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); 759 setOperationAction(ISD::FPOW , MVT::v4f32, Expand); 760 setOperationAction(ISD::FLOG , MVT::v4f32, Expand); 761 setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); 762 setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); 763 setOperationAction(ISD::FEXP , MVT::v4f32, Expand); 764 setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); 765 766 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 767 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 768 769 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); 770 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); 771 772 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); 773 774 setOperationAction(ISD::AND , MVT::v4i1, Legal); 775 setOperationAction(ISD::OR , MVT::v4i1, Legal); 776 setOperationAction(ISD::XOR , MVT::v4i1, Legal); 777 778 if (!Subtarget.useCRBits()) 779 setOperationAction(ISD::SELECT, MVT::v4i1, Expand); 780 setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); 781 782 setOperationAction(ISD::LOAD , MVT::v4i1, Custom); 783 setOperationAction(ISD::STORE , MVT::v4i1, Custom); 784 785 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); 786 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); 787 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); 788 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); 789 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); 790 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); 791 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); 792 793 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); 794 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); 795 796 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); 797 798 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); 799 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); 800 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); 801 setOperationAction(ISD::FROUND, MVT::v4f64, Legal); 802 803 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 804 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 805 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 806 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 807 808 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); 809 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 810 811 // These need to set FE_INEXACT, and so cannot be vectorized here. 812 setOperationAction(ISD::FRINT, MVT::v4f64, Expand); 813 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 814 815 if (TM.Options.UnsafeFPMath) { 816 setOperationAction(ISD::FDIV, MVT::v4f64, Legal); 817 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); 818 819 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 820 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 821 } else { 822 setOperationAction(ISD::FDIV, MVT::v4f64, Expand); 823 setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); 824 825 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 826 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 827 } 828 } 829 830 if (Subtarget.has64BitSupport()) 831 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 832 833 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); 834 835 if (!isPPC64) { 836 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 837 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 838 } 839 840 setBooleanContents(ZeroOrOneBooleanContent); 841 842 if (Subtarget.hasAltivec()) { 843 // Altivec instructions set fields to all zeros or all ones. 844 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 845 } 846 847 if (!isPPC64) { 848 // These libcalls are not available in 32-bit. 849 setLibcallName(RTLIB::SHL_I128, nullptr); 850 setLibcallName(RTLIB::SRL_I128, nullptr); 851 setLibcallName(RTLIB::SRA_I128, nullptr); 852 } 853 854 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); 855 856 // We have target-specific dag combine patterns for the following nodes: 857 setTargetDAGCombine(ISD::SINT_TO_FP); 858 setTargetDAGCombine(ISD::BUILD_VECTOR); 859 if (Subtarget.hasFPCVT()) 860 setTargetDAGCombine(ISD::UINT_TO_FP); 861 setTargetDAGCombine(ISD::LOAD); 862 setTargetDAGCombine(ISD::STORE); 863 setTargetDAGCombine(ISD::BR_CC); 864 if (Subtarget.useCRBits()) 865 setTargetDAGCombine(ISD::BRCOND); 866 setTargetDAGCombine(ISD::BSWAP); 867 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 868 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 869 setTargetDAGCombine(ISD::INTRINSIC_VOID); 870 871 setTargetDAGCombine(ISD::SIGN_EXTEND); 872 setTargetDAGCombine(ISD::ZERO_EXTEND); 873 setTargetDAGCombine(ISD::ANY_EXTEND); 874 875 if (Subtarget.useCRBits()) { 876 setTargetDAGCombine(ISD::TRUNCATE); 877 setTargetDAGCombine(ISD::SETCC); 878 setTargetDAGCombine(ISD::SELECT_CC); 879 } 880 881 // Use reciprocal estimates. 882 if (TM.Options.UnsafeFPMath) { 883 setTargetDAGCombine(ISD::FDIV); 884 setTargetDAGCombine(ISD::FSQRT); 885 } 886 887 // Darwin long double math library functions have $LDBL128 appended. 888 if (Subtarget.isDarwin()) { 889 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 890 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 891 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 892 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 893 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 894 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 895 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 896 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 897 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 898 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 899 } 900 901 // With 32 condition bits, we don't need to sink (and duplicate) compares 902 // aggressively in CodeGenPrep. 903 if (Subtarget.useCRBits()) { 904 setHasMultipleConditionRegisters(); 905 setJumpIsExpensive(); 906 } 907 908 setMinFunctionAlignment(2); 909 if (Subtarget.isDarwin()) 910 setPrefFunctionAlignment(4); 911 912 switch (Subtarget.getDarwinDirective()) { 913 default: break; 914 case PPC::DIR_970: 915 case PPC::DIR_A2: 916 case PPC::DIR_E500mc: 917 case PPC::DIR_E5500: 918 case PPC::DIR_PWR4: 919 case PPC::DIR_PWR5: 920 case PPC::DIR_PWR5X: 921 case PPC::DIR_PWR6: 922 case PPC::DIR_PWR6X: 923 case PPC::DIR_PWR7: 924 case PPC::DIR_PWR8: 925 case PPC::DIR_PWR9: 926 setPrefFunctionAlignment(4); 927 setPrefLoopAlignment(4); 928 break; 929 } 930 931 if (Subtarget.enableMachineScheduler()) 932 setSchedulingPreference(Sched::Source); 933 else 934 setSchedulingPreference(Sched::Hybrid); 935 936 computeRegisterProperties(STI.getRegisterInfo()); 937 938 // The Freescale cores do better with aggressive inlining of memcpy and 939 // friends. GCC uses same threshold of 128 bytes (= 32 word stores). 940 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 941 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 942 MaxStoresPerMemset = 32; 943 MaxStoresPerMemsetOptSize = 16; 944 MaxStoresPerMemcpy = 32; 945 MaxStoresPerMemcpyOptSize = 8; 946 MaxStoresPerMemmove = 32; 947 MaxStoresPerMemmoveOptSize = 8; 948 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { 949 // The A2 also benefits from (very) aggressive inlining of memcpy and 950 // friends. The overhead of a the function call, even when warm, can be 951 // over one hundred cycles. 952 MaxStoresPerMemset = 128; 953 MaxStoresPerMemcpy = 128; 954 MaxStoresPerMemmove = 128; 955 } 956 } 957 958 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 959 /// the desired ByVal argument alignment. 960 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 961 unsigned MaxMaxAlign) { 962 if (MaxAlign == MaxMaxAlign) 963 return; 964 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 965 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 966 MaxAlign = 32; 967 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 968 MaxAlign = 16; 969 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 970 unsigned EltAlign = 0; 971 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 972 if (EltAlign > MaxAlign) 973 MaxAlign = EltAlign; 974 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 975 for (auto *EltTy : STy->elements()) { 976 unsigned EltAlign = 0; 977 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); 978 if (EltAlign > MaxAlign) 979 MaxAlign = EltAlign; 980 if (MaxAlign == MaxMaxAlign) 981 break; 982 } 983 } 984 } 985 986 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 987 /// function arguments in the caller parameter area. 988 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, 989 const DataLayout &DL) const { 990 // Darwin passes everything on 4 byte boundary. 991 if (Subtarget.isDarwin()) 992 return 4; 993 994 // 16byte and wider vectors are passed on 16byte boundary. 995 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 996 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 997 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 998 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 999 return Align; 1000 } 1001 1002 bool PPCTargetLowering::useSoftFloat() const { 1003 return Subtarget.useSoftFloat(); 1004 } 1005 1006 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 1007 switch ((PPCISD::NodeType)Opcode) { 1008 case PPCISD::FIRST_NUMBER: break; 1009 case PPCISD::FSEL: return "PPCISD::FSEL"; 1010 case PPCISD::FCFID: return "PPCISD::FCFID"; 1011 case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; 1012 case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; 1013 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; 1014 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 1015 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 1016 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; 1017 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; 1018 case PPCISD::FRE: return "PPCISD::FRE"; 1019 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 1020 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 1021 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 1022 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 1023 case PPCISD::VPERM: return "PPCISD::VPERM"; 1024 case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; 1025 case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; 1026 case PPCISD::VECSHL: return "PPCISD::VECSHL"; 1027 case PPCISD::CMPB: return "PPCISD::CMPB"; 1028 case PPCISD::Hi: return "PPCISD::Hi"; 1029 case PPCISD::Lo: return "PPCISD::Lo"; 1030 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 1031 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 1032 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; 1033 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 1034 case PPCISD::SRL: return "PPCISD::SRL"; 1035 case PPCISD::SRA: return "PPCISD::SRA"; 1036 case PPCISD::SHL: return "PPCISD::SHL"; 1037 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; 1038 case PPCISD::CALL: return "PPCISD::CALL"; 1039 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 1040 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 1041 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 1042 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; 1043 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 1044 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; 1045 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 1046 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 1047 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 1048 case PPCISD::MFVSR: return "PPCISD::MFVSR"; 1049 case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; 1050 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; 1051 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; 1052 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; 1053 case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; 1054 case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; 1055 case PPCISD::VCMP: return "PPCISD::VCMP"; 1056 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 1057 case PPCISD::LBRX: return "PPCISD::LBRX"; 1058 case PPCISD::STBRX: return "PPCISD::STBRX"; 1059 case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; 1060 case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; 1061 case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; 1062 case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; 1063 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 1064 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 1065 case PPCISD::BDZ: return "PPCISD::BDZ"; 1066 case PPCISD::MFFS: return "PPCISD::MFFS"; 1067 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 1068 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 1069 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 1070 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 1071 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 1072 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; 1073 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 1074 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 1075 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 1076 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 1077 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 1078 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 1079 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; 1080 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 1081 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 1082 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 1083 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; 1084 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 1085 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 1086 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 1087 case PPCISD::SC: return "PPCISD::SC"; 1088 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; 1089 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; 1090 case PPCISD::RFEBB: return "PPCISD::RFEBB"; 1091 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; 1092 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; 1093 case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; 1094 case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; 1095 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; 1096 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; 1097 case PPCISD::QBFLT: return "PPCISD::QBFLT"; 1098 case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; 1099 } 1100 return nullptr; 1101 } 1102 1103 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, 1104 EVT VT) const { 1105 if (!VT.isVector()) 1106 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 1107 1108 if (Subtarget.hasQPX()) 1109 return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); 1110 1111 return VT.changeVectorElementTypeToInteger(); 1112 } 1113 1114 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { 1115 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); 1116 return true; 1117 } 1118 1119 //===----------------------------------------------------------------------===// 1120 // Node matching predicates, for use by the tblgen matching code. 1121 //===----------------------------------------------------------------------===// 1122 1123 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 1124 static bool isFloatingPointZero(SDValue Op) { 1125 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 1126 return CFP->getValueAPF().isZero(); 1127 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 1128 // Maybe this has already been legalized into the constant pool? 1129 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 1130 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 1131 return CFP->getValueAPF().isZero(); 1132 } 1133 return false; 1134 } 1135 1136 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 1137 /// true if Op is undef or if it matches the specified value. 1138 static bool isConstantOrUndef(int Op, int Val) { 1139 return Op < 0 || Op == Val; 1140 } 1141 1142 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 1143 /// VPKUHUM instruction. 1144 /// The ShuffleKind distinguishes between big-endian operations with 1145 /// two different inputs (0), either-endian operations with two identical 1146 /// inputs (1), and little-endian operations with two different inputs (2). 1147 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1148 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1149 SelectionDAG &DAG) { 1150 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1151 if (ShuffleKind == 0) { 1152 if (IsLE) 1153 return false; 1154 for (unsigned i = 0; i != 16; ++i) 1155 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 1156 return false; 1157 } else if (ShuffleKind == 2) { 1158 if (!IsLE) 1159 return false; 1160 for (unsigned i = 0; i != 16; ++i) 1161 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 1162 return false; 1163 } else if (ShuffleKind == 1) { 1164 unsigned j = IsLE ? 0 : 1; 1165 for (unsigned i = 0; i != 8; ++i) 1166 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 1167 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 1168 return false; 1169 } 1170 return true; 1171 } 1172 1173 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 1174 /// VPKUWUM instruction. 1175 /// The ShuffleKind distinguishes between big-endian operations with 1176 /// two different inputs (0), either-endian operations with two identical 1177 /// inputs (1), and little-endian operations with two different inputs (2). 1178 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1179 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1180 SelectionDAG &DAG) { 1181 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1182 if (ShuffleKind == 0) { 1183 if (IsLE) 1184 return false; 1185 for (unsigned i = 0; i != 16; i += 2) 1186 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 1187 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 1188 return false; 1189 } else if (ShuffleKind == 2) { 1190 if (!IsLE) 1191 return false; 1192 for (unsigned i = 0; i != 16; i += 2) 1193 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1194 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 1195 return false; 1196 } else if (ShuffleKind == 1) { 1197 unsigned j = IsLE ? 0 : 2; 1198 for (unsigned i = 0; i != 8; i += 2) 1199 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1200 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1201 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1202 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 1203 return false; 1204 } 1205 return true; 1206 } 1207 1208 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a 1209 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the 1210 /// current subtarget. 1211 /// 1212 /// The ShuffleKind distinguishes between big-endian operations with 1213 /// two different inputs (0), either-endian operations with two identical 1214 /// inputs (1), and little-endian operations with two different inputs (2). 1215 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1216 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1217 SelectionDAG &DAG) { 1218 const PPCSubtarget& Subtarget = 1219 static_cast<const PPCSubtarget&>(DAG.getSubtarget()); 1220 if (!Subtarget.hasP8Vector()) 1221 return false; 1222 1223 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1224 if (ShuffleKind == 0) { 1225 if (IsLE) 1226 return false; 1227 for (unsigned i = 0; i != 16; i += 4) 1228 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || 1229 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || 1230 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || 1231 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) 1232 return false; 1233 } else if (ShuffleKind == 2) { 1234 if (!IsLE) 1235 return false; 1236 for (unsigned i = 0; i != 16; i += 4) 1237 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1238 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || 1239 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || 1240 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) 1241 return false; 1242 } else if (ShuffleKind == 1) { 1243 unsigned j = IsLE ? 0 : 4; 1244 for (unsigned i = 0; i != 8; i += 4) 1245 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1246 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1247 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || 1248 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || 1249 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1250 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || 1251 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || 1252 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) 1253 return false; 1254 } 1255 return true; 1256 } 1257 1258 /// isVMerge - Common function, used to match vmrg* shuffles. 1259 /// 1260 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 1261 unsigned LHSStart, unsigned RHSStart) { 1262 if (N->getValueType(0) != MVT::v16i8) 1263 return false; 1264 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 1265 "Unsupported merge size!"); 1266 1267 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 1268 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 1269 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 1270 LHSStart+j+i*UnitSize) || 1271 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 1272 RHSStart+j+i*UnitSize)) 1273 return false; 1274 } 1275 return true; 1276 } 1277 1278 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 1279 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 1280 /// The ShuffleKind distinguishes between big-endian merges with two 1281 /// different inputs (0), either-endian merges with two identical inputs (1), 1282 /// and little-endian merges with two different inputs (2). For the latter, 1283 /// the input operands are swapped (see PPCInstrAltivec.td). 1284 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1285 unsigned ShuffleKind, SelectionDAG &DAG) { 1286 if (DAG.getDataLayout().isLittleEndian()) { 1287 if (ShuffleKind == 1) // unary 1288 return isVMerge(N, UnitSize, 0, 0); 1289 else if (ShuffleKind == 2) // swapped 1290 return isVMerge(N, UnitSize, 0, 16); 1291 else 1292 return false; 1293 } else { 1294 if (ShuffleKind == 1) // unary 1295 return isVMerge(N, UnitSize, 8, 8); 1296 else if (ShuffleKind == 0) // normal 1297 return isVMerge(N, UnitSize, 8, 24); 1298 else 1299 return false; 1300 } 1301 } 1302 1303 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 1304 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 1305 /// The ShuffleKind distinguishes between big-endian merges with two 1306 /// different inputs (0), either-endian merges with two identical inputs (1), 1307 /// and little-endian merges with two different inputs (2). For the latter, 1308 /// the input operands are swapped (see PPCInstrAltivec.td). 1309 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1310 unsigned ShuffleKind, SelectionDAG &DAG) { 1311 if (DAG.getDataLayout().isLittleEndian()) { 1312 if (ShuffleKind == 1) // unary 1313 return isVMerge(N, UnitSize, 8, 8); 1314 else if (ShuffleKind == 2) // swapped 1315 return isVMerge(N, UnitSize, 8, 24); 1316 else 1317 return false; 1318 } else { 1319 if (ShuffleKind == 1) // unary 1320 return isVMerge(N, UnitSize, 0, 0); 1321 else if (ShuffleKind == 0) // normal 1322 return isVMerge(N, UnitSize, 0, 16); 1323 else 1324 return false; 1325 } 1326 } 1327 1328 /** 1329 * \brief Common function used to match vmrgew and vmrgow shuffles 1330 * 1331 * The indexOffset determines whether to look for even or odd words in 1332 * the shuffle mask. This is based on the of the endianness of the target 1333 * machine. 1334 * - Little Endian: 1335 * - Use offset of 0 to check for odd elements 1336 * - Use offset of 4 to check for even elements 1337 * - Big Endian: 1338 * - Use offset of 0 to check for even elements 1339 * - Use offset of 4 to check for odd elements 1340 * A detailed description of the vector element ordering for little endian and 1341 * big endian can be found at 1342 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html 1343 * Targeting your applications - what little endian and big endian IBM XL C/C++ 1344 * compiler differences mean to you 1345 * 1346 * The mask to the shuffle vector instruction specifies the indices of the 1347 * elements from the two input vectors to place in the result. The elements are 1348 * numbered in array-access order, starting with the first vector. These vectors 1349 * are always of type v16i8, thus each vector will contain 16 elements of size 1350 * 8. More info on the shuffle vector can be found in the 1351 * http://llvm.org/docs/LangRef.html#shufflevector-instruction 1352 * Language Reference. 1353 * 1354 * The RHSStartValue indicates whether the same input vectors are used (unary) 1355 * or two different input vectors are used, based on the following: 1356 * - If the instruction uses the same vector for both inputs, the range of the 1357 * indices will be 0 to 15. In this case, the RHSStart value passed should 1358 * be 0. 1359 * - If the instruction has two different vectors then the range of the 1360 * indices will be 0 to 31. In this case, the RHSStart value passed should 1361 * be 16 (indices 0-15 specify elements in the first vector while indices 16 1362 * to 31 specify elements in the second vector). 1363 * 1364 * \param[in] N The shuffle vector SD Node to analyze 1365 * \param[in] IndexOffset Specifies whether to look for even or odd elements 1366 * \param[in] RHSStartValue Specifies the starting index for the righthand input 1367 * vector to the shuffle_vector instruction 1368 * \return true iff this shuffle vector represents an even or odd word merge 1369 */ 1370 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, 1371 unsigned RHSStartValue) { 1372 if (N->getValueType(0) != MVT::v16i8) 1373 return false; 1374 1375 for (unsigned i = 0; i < 2; ++i) 1376 for (unsigned j = 0; j < 4; ++j) 1377 if (!isConstantOrUndef(N->getMaskElt(i*4+j), 1378 i*RHSStartValue+j+IndexOffset) || 1379 !isConstantOrUndef(N->getMaskElt(i*4+j+8), 1380 i*RHSStartValue+j+IndexOffset+8)) 1381 return false; 1382 return true; 1383 } 1384 1385 /** 1386 * \brief Determine if the specified shuffle mask is suitable for the vmrgew or 1387 * vmrgow instructions. 1388 * 1389 * \param[in] N The shuffle vector SD Node to analyze 1390 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) 1391 * \param[in] ShuffleKind Identify the type of merge: 1392 * - 0 = big-endian merge with two different inputs; 1393 * - 1 = either-endian merge with two identical inputs; 1394 * - 2 = little-endian merge with two different inputs (inputs are swapped for 1395 * little-endian merges). 1396 * \param[in] DAG The current SelectionDAG 1397 * \return true iff this shuffle mask 1398 */ 1399 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, 1400 unsigned ShuffleKind, SelectionDAG &DAG) { 1401 if (DAG.getDataLayout().isLittleEndian()) { 1402 unsigned indexOffset = CheckEven ? 4 : 0; 1403 if (ShuffleKind == 1) // Unary 1404 return isVMerge(N, indexOffset, 0); 1405 else if (ShuffleKind == 2) // swapped 1406 return isVMerge(N, indexOffset, 16); 1407 else 1408 return false; 1409 } 1410 else { 1411 unsigned indexOffset = CheckEven ? 0 : 4; 1412 if (ShuffleKind == 1) // Unary 1413 return isVMerge(N, indexOffset, 0); 1414 else if (ShuffleKind == 0) // Normal 1415 return isVMerge(N, indexOffset, 16); 1416 else 1417 return false; 1418 } 1419 return false; 1420 } 1421 1422 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 1423 /// amount, otherwise return -1. 1424 /// The ShuffleKind distinguishes between big-endian operations with two 1425 /// different inputs (0), either-endian operations with two identical inputs 1426 /// (1), and little-endian operations with two different inputs (2). For the 1427 /// latter, the input operands are swapped (see PPCInstrAltivec.td). 1428 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 1429 SelectionDAG &DAG) { 1430 if (N->getValueType(0) != MVT::v16i8) 1431 return -1; 1432 1433 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1434 1435 // Find the first non-undef value in the shuffle mask. 1436 unsigned i; 1437 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1438 /*search*/; 1439 1440 if (i == 16) return -1; // all undef. 1441 1442 // Otherwise, check to see if the rest of the elements are consecutively 1443 // numbered from this value. 1444 unsigned ShiftAmt = SVOp->getMaskElt(i); 1445 if (ShiftAmt < i) return -1; 1446 1447 ShiftAmt -= i; 1448 bool isLE = DAG.getDataLayout().isLittleEndian(); 1449 1450 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1451 // Check the rest of the elements to see if they are consecutive. 1452 for (++i; i != 16; ++i) 1453 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1454 return -1; 1455 } else if (ShuffleKind == 1) { 1456 // Check the rest of the elements to see if they are consecutive. 1457 for (++i; i != 16; ++i) 1458 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1459 return -1; 1460 } else 1461 return -1; 1462 1463 if (isLE) 1464 ShiftAmt = 16 - ShiftAmt; 1465 1466 return ShiftAmt; 1467 } 1468 1469 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1470 /// specifies a splat of a single element that is suitable for input to 1471 /// VSPLTB/VSPLTH/VSPLTW. 1472 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1473 assert(N->getValueType(0) == MVT::v16i8 && 1474 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1475 1476 // The consecutive indices need to specify an element, not part of two 1477 // different elements. So abandon ship early if this isn't the case. 1478 if (N->getMaskElt(0) % EltSize != 0) 1479 return false; 1480 1481 // This is a splat operation if each element of the permute is the same, and 1482 // if the value doesn't reference the second vector. 1483 unsigned ElementBase = N->getMaskElt(0); 1484 1485 // FIXME: Handle UNDEF elements too! 1486 if (ElementBase >= 16) 1487 return false; 1488 1489 // Check that the indices are consecutive, in the case of a multi-byte element 1490 // splatted with a v16i8 mask. 1491 for (unsigned i = 1; i != EltSize; ++i) 1492 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1493 return false; 1494 1495 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1496 if (N->getMaskElt(i) < 0) continue; 1497 for (unsigned j = 0; j != EltSize; ++j) 1498 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1499 return false; 1500 } 1501 return true; 1502 } 1503 1504 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, 1505 unsigned &InsertAtByte, bool &Swap, bool IsLE) { 1506 1507 // Check that the mask is shuffling words 1508 for (unsigned i = 0; i < 4; ++i) { 1509 unsigned B0 = N->getMaskElt(i*4); 1510 unsigned B1 = N->getMaskElt(i*4+1); 1511 unsigned B2 = N->getMaskElt(i*4+2); 1512 unsigned B3 = N->getMaskElt(i*4+3); 1513 if (B0 % 4) 1514 return false; 1515 if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1) 1516 return false; 1517 } 1518 1519 // Now we look at mask elements 0,4,8,12 1520 unsigned M0 = N->getMaskElt(0) / 4; 1521 unsigned M1 = N->getMaskElt(4) / 4; 1522 unsigned M2 = N->getMaskElt(8) / 4; 1523 unsigned M3 = N->getMaskElt(12) / 4; 1524 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; 1525 unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; 1526 1527 // Below, let H and L be arbitrary elements of the shuffle mask 1528 // where H is in the range [4,7] and L is in the range [0,3]. 1529 // H, 1, 2, 3 or L, 5, 6, 7 1530 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || 1531 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { 1532 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; 1533 InsertAtByte = IsLE ? 12 : 0; 1534 Swap = M0 < 4; 1535 return true; 1536 } 1537 // 0, H, 2, 3 or 4, L, 6, 7 1538 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || 1539 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { 1540 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; 1541 InsertAtByte = IsLE ? 8 : 4; 1542 Swap = M1 < 4; 1543 return true; 1544 } 1545 // 0, 1, H, 3 or 4, 5, L, 7 1546 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || 1547 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { 1548 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; 1549 InsertAtByte = IsLE ? 4 : 8; 1550 Swap = M2 < 4; 1551 return true; 1552 } 1553 // 0, 1, 2, H or 4, 5, 6, L 1554 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || 1555 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { 1556 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; 1557 InsertAtByte = IsLE ? 0 : 12; 1558 Swap = M3 < 4; 1559 return true; 1560 } 1561 1562 // If both vector operands for the shuffle are the same vector, the mask will 1563 // contain only elements from the first one and the second one will be undef. 1564 if (N->getOperand(1).isUndef()) { 1565 ShiftElts = 0; 1566 Swap = true; 1567 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; 1568 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { 1569 InsertAtByte = IsLE ? 12 : 0; 1570 return true; 1571 } 1572 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { 1573 InsertAtByte = IsLE ? 8 : 4; 1574 return true; 1575 } 1576 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { 1577 InsertAtByte = IsLE ? 4 : 8; 1578 return true; 1579 } 1580 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { 1581 InsertAtByte = IsLE ? 0 : 12; 1582 return true; 1583 } 1584 } 1585 1586 return false; 1587 } 1588 1589 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 1590 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 1591 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 1592 SelectionDAG &DAG) { 1593 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1594 assert(isSplatShuffleMask(SVOp, EltSize)); 1595 if (DAG.getDataLayout().isLittleEndian()) 1596 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 1597 else 1598 return SVOp->getMaskElt(0) / EltSize; 1599 } 1600 1601 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 1602 /// by using a vspltis[bhw] instruction of the specified element size, return 1603 /// the constant being splatted. The ByteSize field indicates the number of 1604 /// bytes of each element [124] -> [bhw]. 1605 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 1606 SDValue OpVal(nullptr, 0); 1607 1608 // If ByteSize of the splat is bigger than the element size of the 1609 // build_vector, then we have a case where we are checking for a splat where 1610 // multiple elements of the buildvector are folded together into a single 1611 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 1612 unsigned EltSize = 16/N->getNumOperands(); 1613 if (EltSize < ByteSize) { 1614 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 1615 SDValue UniquedVals[4]; 1616 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 1617 1618 // See if all of the elements in the buildvector agree across. 1619 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1620 if (N->getOperand(i).isUndef()) continue; 1621 // If the element isn't a constant, bail fully out. 1622 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 1623 1624 1625 if (!UniquedVals[i&(Multiple-1)].getNode()) 1626 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 1627 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 1628 return SDValue(); // no match. 1629 } 1630 1631 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 1632 // either constant or undef values that are identical for each chunk. See 1633 // if these chunks can form into a larger vspltis*. 1634 1635 // Check to see if all of the leading entries are either 0 or -1. If 1636 // neither, then this won't fit into the immediate field. 1637 bool LeadingZero = true; 1638 bool LeadingOnes = true; 1639 for (unsigned i = 0; i != Multiple-1; ++i) { 1640 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 1641 1642 LeadingZero &= isNullConstant(UniquedVals[i]); 1643 LeadingOnes &= isAllOnesConstant(UniquedVals[i]); 1644 } 1645 // Finally, check the least significant entry. 1646 if (LeadingZero) { 1647 if (!UniquedVals[Multiple-1].getNode()) 1648 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef 1649 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 1650 if (Val < 16) // 0,0,0,4 -> vspltisw(4) 1651 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 1652 } 1653 if (LeadingOnes) { 1654 if (!UniquedVals[Multiple-1].getNode()) 1655 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef 1656 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 1657 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 1658 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 1659 } 1660 1661 return SDValue(); 1662 } 1663 1664 // Check to see if this buildvec has a single non-undef value in its elements. 1665 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1666 if (N->getOperand(i).isUndef()) continue; 1667 if (!OpVal.getNode()) 1668 OpVal = N->getOperand(i); 1669 else if (OpVal != N->getOperand(i)) 1670 return SDValue(); 1671 } 1672 1673 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 1674 1675 unsigned ValSizeInBytes = EltSize; 1676 uint64_t Value = 0; 1677 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1678 Value = CN->getZExtValue(); 1679 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1680 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 1681 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 1682 } 1683 1684 // If the splat value is larger than the element value, then we can never do 1685 // this splat. The only case that we could fit the replicated bits into our 1686 // immediate field for would be zero, and we prefer to use vxor for it. 1687 if (ValSizeInBytes < ByteSize) return SDValue(); 1688 1689 // If the element value is larger than the splat value, check if it consists 1690 // of a repeated bit pattern of size ByteSize. 1691 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) 1692 return SDValue(); 1693 1694 // Properly sign extend the value. 1695 int MaskVal = SignExtend32(Value, ByteSize * 8); 1696 1697 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 1698 if (MaskVal == 0) return SDValue(); 1699 1700 // Finally, if this value fits in a 5 bit sext field, return it 1701 if (SignExtend32<5>(MaskVal) == MaskVal) 1702 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); 1703 return SDValue(); 1704 } 1705 1706 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift 1707 /// amount, otherwise return -1. 1708 int PPC::isQVALIGNIShuffleMask(SDNode *N) { 1709 EVT VT = N->getValueType(0); 1710 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) 1711 return -1; 1712 1713 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1714 1715 // Find the first non-undef value in the shuffle mask. 1716 unsigned i; 1717 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) 1718 /*search*/; 1719 1720 if (i == 4) return -1; // all undef. 1721 1722 // Otherwise, check to see if the rest of the elements are consecutively 1723 // numbered from this value. 1724 unsigned ShiftAmt = SVOp->getMaskElt(i); 1725 if (ShiftAmt < i) return -1; 1726 ShiftAmt -= i; 1727 1728 // Check the rest of the elements to see if they are consecutive. 1729 for (++i; i != 4; ++i) 1730 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1731 return -1; 1732 1733 return ShiftAmt; 1734 } 1735 1736 //===----------------------------------------------------------------------===// 1737 // Addressing Mode Selection 1738 //===----------------------------------------------------------------------===// 1739 1740 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 1741 /// or 64-bit immediate, and if the value can be accurately represented as a 1742 /// sign extension from a 16-bit value. If so, this returns true and the 1743 /// immediate. 1744 static bool isIntS16Immediate(SDNode *N, short &Imm) { 1745 if (!isa<ConstantSDNode>(N)) 1746 return false; 1747 1748 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1749 if (N->getValueType(0) == MVT::i32) 1750 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1751 else 1752 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1753 } 1754 static bool isIntS16Immediate(SDValue Op, short &Imm) { 1755 return isIntS16Immediate(Op.getNode(), Imm); 1756 } 1757 1758 /// SelectAddressRegReg - Given the specified addressed, check to see if it 1759 /// can be represented as an indexed [r+r] operation. Returns false if it 1760 /// can be more efficiently represented with [r+imm]. 1761 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1762 SDValue &Index, 1763 SelectionDAG &DAG) const { 1764 short imm = 0; 1765 if (N.getOpcode() == ISD::ADD) { 1766 if (isIntS16Immediate(N.getOperand(1), imm)) 1767 return false; // r+i 1768 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1769 return false; // r+i 1770 1771 Base = N.getOperand(0); 1772 Index = N.getOperand(1); 1773 return true; 1774 } else if (N.getOpcode() == ISD::OR) { 1775 if (isIntS16Immediate(N.getOperand(1), imm)) 1776 return false; // r+i can fold it if we can. 1777 1778 // If this is an or of disjoint bitfields, we can codegen this as an add 1779 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1780 // disjoint. 1781 APInt LHSKnownZero, LHSKnownOne; 1782 APInt RHSKnownZero, RHSKnownOne; 1783 DAG.computeKnownBits(N.getOperand(0), 1784 LHSKnownZero, LHSKnownOne); 1785 1786 if (LHSKnownZero.getBoolValue()) { 1787 DAG.computeKnownBits(N.getOperand(1), 1788 RHSKnownZero, RHSKnownOne); 1789 // If all of the bits are known zero on the LHS or RHS, the add won't 1790 // carry. 1791 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1792 Base = N.getOperand(0); 1793 Index = N.getOperand(1); 1794 return true; 1795 } 1796 } 1797 } 1798 1799 return false; 1800 } 1801 1802 // If we happen to be doing an i64 load or store into a stack slot that has 1803 // less than a 4-byte alignment, then the frame-index elimination may need to 1804 // use an indexed load or store instruction (because the offset may not be a 1805 // multiple of 4). The extra register needed to hold the offset comes from the 1806 // register scavenger, and it is possible that the scavenger will need to use 1807 // an emergency spill slot. As a result, we need to make sure that a spill slot 1808 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1809 // stack slot. 1810 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1811 // FIXME: This does not handle the LWA case. 1812 if (VT != MVT::i64) 1813 return; 1814 1815 // NOTE: We'll exclude negative FIs here, which come from argument 1816 // lowering, because there are no known test cases triggering this problem 1817 // using packed structures (or similar). We can remove this exclusion if 1818 // we find such a test case. The reason why this is so test-case driven is 1819 // because this entire 'fixup' is only to prevent crashes (from the 1820 // register scavenger) on not-really-valid inputs. For example, if we have: 1821 // %a = alloca i1 1822 // %b = bitcast i1* %a to i64* 1823 // store i64* a, i64 b 1824 // then the store should really be marked as 'align 1', but is not. If it 1825 // were marked as 'align 1' then the indexed form would have been 1826 // instruction-selected initially, and the problem this 'fixup' is preventing 1827 // won't happen regardless. 1828 if (FrameIdx < 0) 1829 return; 1830 1831 MachineFunction &MF = DAG.getMachineFunction(); 1832 MachineFrameInfo *MFI = MF.getFrameInfo(); 1833 1834 unsigned Align = MFI->getObjectAlignment(FrameIdx); 1835 if (Align >= 4) 1836 return; 1837 1838 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1839 FuncInfo->setHasNonRISpills(); 1840 } 1841 1842 /// Returns true if the address N can be represented by a base register plus 1843 /// a signed 16-bit displacement [r+imm], and if it is not better 1844 /// represented as reg+reg. If Aligned is true, only accept displacements 1845 /// suitable for STD and friends, i.e. multiples of 4. 1846 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1847 SDValue &Base, 1848 SelectionDAG &DAG, 1849 bool Aligned) const { 1850 // FIXME dl should come from parent load or store, not from address 1851 SDLoc dl(N); 1852 // If this can be more profitably realized as r+r, fail. 1853 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1854 return false; 1855 1856 if (N.getOpcode() == ISD::ADD) { 1857 short imm = 0; 1858 if (isIntS16Immediate(N.getOperand(1), imm) && 1859 (!Aligned || (imm & 3) == 0)) { 1860 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 1861 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1862 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1863 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1864 } else { 1865 Base = N.getOperand(0); 1866 } 1867 return true; // [r+i] 1868 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1869 // Match LOAD (ADD (X, Lo(G))). 1870 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1871 && "Cannot handle constant offsets yet!"); 1872 Disp = N.getOperand(1).getOperand(0); // The global address. 1873 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1874 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1875 Disp.getOpcode() == ISD::TargetConstantPool || 1876 Disp.getOpcode() == ISD::TargetJumpTable); 1877 Base = N.getOperand(0); 1878 return true; // [&g+r] 1879 } 1880 } else if (N.getOpcode() == ISD::OR) { 1881 short imm = 0; 1882 if (isIntS16Immediate(N.getOperand(1), imm) && 1883 (!Aligned || (imm & 3) == 0)) { 1884 // If this is an or of disjoint bitfields, we can codegen this as an add 1885 // (for better address arithmetic) if the LHS and RHS of the OR are 1886 // provably disjoint. 1887 APInt LHSKnownZero, LHSKnownOne; 1888 DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1889 1890 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1891 // If all of the bits are known zero on the LHS or RHS, the add won't 1892 // carry. 1893 if (FrameIndexSDNode *FI = 1894 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1895 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1896 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1897 } else { 1898 Base = N.getOperand(0); 1899 } 1900 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 1901 return true; 1902 } 1903 } 1904 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1905 // Loading from a constant address. 1906 1907 // If this address fits entirely in a 16-bit sext immediate field, codegen 1908 // this as "d, 0" 1909 short Imm; 1910 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1911 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); 1912 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1913 CN->getValueType(0)); 1914 return true; 1915 } 1916 1917 // Handle 32-bit sext immediates with LIS + addr mode. 1918 if ((CN->getValueType(0) == MVT::i32 || 1919 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1920 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1921 int Addr = (int)CN->getZExtValue(); 1922 1923 // Otherwise, break this down into an LIS + disp. 1924 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); 1925 1926 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, 1927 MVT::i32); 1928 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1929 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1930 return true; 1931 } 1932 } 1933 1934 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); 1935 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1936 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1937 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1938 } else 1939 Base = N; 1940 return true; // [r+0] 1941 } 1942 1943 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1944 /// represented as an indexed [r+r] operation. 1945 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1946 SDValue &Index, 1947 SelectionDAG &DAG) const { 1948 // Check to see if we can easily represent this as an [r+r] address. This 1949 // will fail if it thinks that the address is more profitably represented as 1950 // reg+imm, e.g. where imm = 0. 1951 if (SelectAddressRegReg(N, Base, Index, DAG)) 1952 return true; 1953 1954 // If the operand is an addition, always emit this as [r+r], since this is 1955 // better (for code size, and execution, as the memop does the add for free) 1956 // than emitting an explicit add. 1957 if (N.getOpcode() == ISD::ADD) { 1958 Base = N.getOperand(0); 1959 Index = N.getOperand(1); 1960 return true; 1961 } 1962 1963 // Otherwise, do it the hard way, using R0 as the base register. 1964 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1965 N.getValueType()); 1966 Index = N; 1967 return true; 1968 } 1969 1970 /// getPreIndexedAddressParts - returns true by value, base pointer and 1971 /// offset pointer and addressing mode by reference if the node's address 1972 /// can be legally represented as pre-indexed load / store address. 1973 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1974 SDValue &Offset, 1975 ISD::MemIndexedMode &AM, 1976 SelectionDAG &DAG) const { 1977 if (DisablePPCPreinc) return false; 1978 1979 bool isLoad = true; 1980 SDValue Ptr; 1981 EVT VT; 1982 unsigned Alignment; 1983 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1984 Ptr = LD->getBasePtr(); 1985 VT = LD->getMemoryVT(); 1986 Alignment = LD->getAlignment(); 1987 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1988 Ptr = ST->getBasePtr(); 1989 VT = ST->getMemoryVT(); 1990 Alignment = ST->getAlignment(); 1991 isLoad = false; 1992 } else 1993 return false; 1994 1995 // PowerPC doesn't have preinc load/store instructions for vectors (except 1996 // for QPX, which does have preinc r+r forms). 1997 if (VT.isVector()) { 1998 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { 1999 return false; 2000 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { 2001 AM = ISD::PRE_INC; 2002 return true; 2003 } 2004 } 2005 2006 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 2007 2008 // Common code will reject creating a pre-inc form if the base pointer 2009 // is a frame index, or if N is a store and the base pointer is either 2010 // the same as or a predecessor of the value being stored. Check for 2011 // those situations here, and try with swapped Base/Offset instead. 2012 bool Swap = false; 2013 2014 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 2015 Swap = true; 2016 else if (!isLoad) { 2017 SDValue Val = cast<StoreSDNode>(N)->getValue(); 2018 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 2019 Swap = true; 2020 } 2021 2022 if (Swap) 2023 std::swap(Base, Offset); 2024 2025 AM = ISD::PRE_INC; 2026 return true; 2027 } 2028 2029 // LDU/STU can only handle immediates that are a multiple of 4. 2030 if (VT != MVT::i64) { 2031 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 2032 return false; 2033 } else { 2034 // LDU/STU need an address with at least 4-byte alignment. 2035 if (Alignment < 4) 2036 return false; 2037 2038 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 2039 return false; 2040 } 2041 2042 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2043 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 2044 // sext i32 to i64 when addr mode is r+i. 2045 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 2046 LD->getExtensionType() == ISD::SEXTLOAD && 2047 isa<ConstantSDNode>(Offset)) 2048 return false; 2049 } 2050 2051 AM = ISD::PRE_INC; 2052 return true; 2053 } 2054 2055 //===----------------------------------------------------------------------===// 2056 // LowerOperation implementation 2057 //===----------------------------------------------------------------------===// 2058 2059 /// Return true if we should reference labels using a PICBase, set the HiOpFlags 2060 /// and LoOpFlags to the target MO flags. 2061 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, 2062 unsigned &HiOpFlags, unsigned &LoOpFlags, 2063 const GlobalValue *GV = nullptr) { 2064 HiOpFlags = PPCII::MO_HA; 2065 LoOpFlags = PPCII::MO_LO; 2066 2067 // Don't use the pic base if not in PIC relocation model. 2068 if (IsPIC) { 2069 HiOpFlags |= PPCII::MO_PIC_FLAG; 2070 LoOpFlags |= PPCII::MO_PIC_FLAG; 2071 } 2072 2073 // If this is a reference to a global value that requires a non-lazy-ptr, make 2074 // sure that instruction lowering adds it. 2075 if (GV && Subtarget.hasLazyResolverStub(GV)) { 2076 HiOpFlags |= PPCII::MO_NLP_FLAG; 2077 LoOpFlags |= PPCII::MO_NLP_FLAG; 2078 2079 if (GV->hasHiddenVisibility()) { 2080 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2081 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2082 } 2083 } 2084 } 2085 2086 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 2087 SelectionDAG &DAG) { 2088 SDLoc DL(HiPart); 2089 EVT PtrVT = HiPart.getValueType(); 2090 SDValue Zero = DAG.getConstant(0, DL, PtrVT); 2091 2092 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 2093 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 2094 2095 // With PIC, the first instruction is actually "GR+hi(&G)". 2096 if (isPIC) 2097 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 2098 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 2099 2100 // Generate non-pic code that has direct accesses to the constant pool. 2101 // The address of the global is just (hi(&g)+lo(&g)). 2102 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 2103 } 2104 2105 static void setUsesTOCBasePtr(MachineFunction &MF) { 2106 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2107 FuncInfo->setUsesTOCBasePtr(); 2108 } 2109 2110 static void setUsesTOCBasePtr(SelectionDAG &DAG) { 2111 setUsesTOCBasePtr(DAG.getMachineFunction()); 2112 } 2113 2114 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, 2115 SDValue GA) { 2116 EVT VT = Is64Bit ? MVT::i64 : MVT::i32; 2117 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : 2118 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); 2119 2120 SDValue Ops[] = { GA, Reg }; 2121 return DAG.getMemIntrinsicNode( 2122 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, 2123 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true, 2124 false, 0); 2125 } 2126 2127 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 2128 SelectionDAG &DAG) const { 2129 EVT PtrVT = Op.getValueType(); 2130 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2131 const Constant *C = CP->getConstVal(); 2132 2133 // 64-bit SVR4 ABI code is always position-independent. 2134 // The actual address of the GlobalValue is stored in the TOC. 2135 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2136 setUsesTOCBasePtr(DAG); 2137 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 2138 return getTOCEntry(DAG, SDLoc(CP), true, GA); 2139 } 2140 2141 unsigned MOHiFlag, MOLoFlag; 2142 bool IsPIC = isPositionIndependent(); 2143 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2144 2145 if (IsPIC && Subtarget.isSVR4ABI()) { 2146 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 2147 PPCII::MO_PIC_FLAG); 2148 return getTOCEntry(DAG, SDLoc(CP), false, GA); 2149 } 2150 2151 SDValue CPIHi = 2152 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 2153 SDValue CPILo = 2154 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 2155 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); 2156 } 2157 2158 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 2159 EVT PtrVT = Op.getValueType(); 2160 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2161 2162 // 64-bit SVR4 ABI code is always position-independent. 2163 // The actual address of the GlobalValue is stored in the TOC. 2164 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2165 setUsesTOCBasePtr(DAG); 2166 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 2167 return getTOCEntry(DAG, SDLoc(JT), true, GA); 2168 } 2169 2170 unsigned MOHiFlag, MOLoFlag; 2171 bool IsPIC = isPositionIndependent(); 2172 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2173 2174 if (IsPIC && Subtarget.isSVR4ABI()) { 2175 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 2176 PPCII::MO_PIC_FLAG); 2177 return getTOCEntry(DAG, SDLoc(GA), false, GA); 2178 } 2179 2180 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 2181 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 2182 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); 2183 } 2184 2185 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 2186 SelectionDAG &DAG) const { 2187 EVT PtrVT = Op.getValueType(); 2188 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 2189 const BlockAddress *BA = BASDN->getBlockAddress(); 2190 2191 // 64-bit SVR4 ABI code is always position-independent. 2192 // The actual BlockAddress is stored in the TOC. 2193 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2194 setUsesTOCBasePtr(DAG); 2195 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 2196 return getTOCEntry(DAG, SDLoc(BASDN), true, GA); 2197 } 2198 2199 unsigned MOHiFlag, MOLoFlag; 2200 bool IsPIC = isPositionIndependent(); 2201 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2202 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 2203 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 2204 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); 2205 } 2206 2207 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 2208 SelectionDAG &DAG) const { 2209 2210 // FIXME: TLS addresses currently use medium model code sequences, 2211 // which is the most useful form. Eventually support for small and 2212 // large models could be added if users need it, at the cost of 2213 // additional complexity. 2214 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2215 if (DAG.getTarget().Options.EmulatedTLS) 2216 return LowerToTLSEmulatedModel(GA, DAG); 2217 2218 SDLoc dl(GA); 2219 const GlobalValue *GV = GA->getGlobal(); 2220 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2221 bool is64bit = Subtarget.isPPC64(); 2222 const Module *M = DAG.getMachineFunction().getFunction()->getParent(); 2223 PICLevel::Level picLevel = M->getPICLevel(); 2224 2225 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 2226 2227 if (Model == TLSModel::LocalExec) { 2228 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2229 PPCII::MO_TPREL_HA); 2230 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2231 PPCII::MO_TPREL_LO); 2232 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 2233 is64bit ? MVT::i64 : MVT::i32); 2234 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 2235 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 2236 } 2237 2238 if (Model == TLSModel::InitialExec) { 2239 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2240 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2241 PPCII::MO_TLS); 2242 SDValue GOTPtr; 2243 if (is64bit) { 2244 setUsesTOCBasePtr(DAG); 2245 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2246 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 2247 PtrVT, GOTReg, TGA); 2248 } else 2249 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 2250 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 2251 PtrVT, TGA, GOTPtr); 2252 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 2253 } 2254 2255 if (Model == TLSModel::GeneralDynamic) { 2256 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2257 SDValue GOTPtr; 2258 if (is64bit) { 2259 setUsesTOCBasePtr(DAG); 2260 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2261 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 2262 GOTReg, TGA); 2263 } else { 2264 if (picLevel == PICLevel::SmallPIC) 2265 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2266 else 2267 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2268 } 2269 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, 2270 GOTPtr, TGA, TGA); 2271 } 2272 2273 if (Model == TLSModel::LocalDynamic) { 2274 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2275 SDValue GOTPtr; 2276 if (is64bit) { 2277 setUsesTOCBasePtr(DAG); 2278 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2279 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 2280 GOTReg, TGA); 2281 } else { 2282 if (picLevel == PICLevel::SmallPIC) 2283 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2284 else 2285 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2286 } 2287 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, 2288 PtrVT, GOTPtr, TGA, TGA); 2289 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, 2290 PtrVT, TLSAddr, TGA); 2291 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 2292 } 2293 2294 llvm_unreachable("Unknown TLS model!"); 2295 } 2296 2297 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 2298 SelectionDAG &DAG) const { 2299 EVT PtrVT = Op.getValueType(); 2300 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 2301 SDLoc DL(GSDN); 2302 const GlobalValue *GV = GSDN->getGlobal(); 2303 2304 // 64-bit SVR4 ABI code is always position-independent. 2305 // The actual address of the GlobalValue is stored in the TOC. 2306 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2307 setUsesTOCBasePtr(DAG); 2308 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 2309 return getTOCEntry(DAG, DL, true, GA); 2310 } 2311 2312 unsigned MOHiFlag, MOLoFlag; 2313 bool IsPIC = isPositionIndependent(); 2314 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); 2315 2316 if (IsPIC && Subtarget.isSVR4ABI()) { 2317 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 2318 GSDN->getOffset(), 2319 PPCII::MO_PIC_FLAG); 2320 return getTOCEntry(DAG, DL, false, GA); 2321 } 2322 2323 SDValue GAHi = 2324 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 2325 SDValue GALo = 2326 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 2327 2328 SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG); 2329 2330 // If the global reference is actually to a non-lazy-pointer, we have to do an 2331 // extra load to get the address of the global. 2332 if (MOHiFlag & PPCII::MO_NLP_FLAG) 2333 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 2334 return Ptr; 2335 } 2336 2337 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 2338 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2339 SDLoc dl(Op); 2340 2341 if (Op.getValueType() == MVT::v2i64) { 2342 // When the operands themselves are v2i64 values, we need to do something 2343 // special because VSX has no underlying comparison operations for these. 2344 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 2345 // Equality can be handled by casting to the legal type for Altivec 2346 // comparisons, everything else needs to be expanded. 2347 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2348 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 2349 DAG.getSetCC(dl, MVT::v4i32, 2350 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 2351 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 2352 CC)); 2353 } 2354 2355 return SDValue(); 2356 } 2357 2358 // We handle most of these in the usual way. 2359 return Op; 2360 } 2361 2362 // If we're comparing for equality to zero, expose the fact that this is 2363 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can 2364 // fold the new nodes. 2365 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 2366 if (C->isNullValue() && CC == ISD::SETEQ) { 2367 EVT VT = Op.getOperand(0).getValueType(); 2368 SDValue Zext = Op.getOperand(0); 2369 if (VT.bitsLT(MVT::i32)) { 2370 VT = MVT::i32; 2371 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 2372 } 2373 unsigned Log2b = Log2_32(VT.getSizeInBits()); 2374 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 2375 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 2376 DAG.getConstant(Log2b, dl, MVT::i32)); 2377 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 2378 } 2379 // Leave comparisons against 0 and -1 alone for now, since they're usually 2380 // optimized. FIXME: revisit this when we can custom lower all setcc 2381 // optimizations. 2382 if (C->isAllOnesValue() || C->isNullValue()) 2383 return SDValue(); 2384 } 2385 2386 // If we have an integer seteq/setne, turn it into a compare against zero 2387 // by xor'ing the rhs with the lhs, which is faster than setting a 2388 // condition register, reading it back out, and masking the correct bit. The 2389 // normal approach here uses sub to do this instead of xor. Using xor exposes 2390 // the result to other bit-twiddling opportunities. 2391 EVT LHSVT = Op.getOperand(0).getValueType(); 2392 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 2393 EVT VT = Op.getValueType(); 2394 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 2395 Op.getOperand(1)); 2396 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); 2397 } 2398 return SDValue(); 2399 } 2400 2401 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { 2402 SDNode *Node = Op.getNode(); 2403 EVT VT = Node->getValueType(0); 2404 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2405 SDValue InChain = Node->getOperand(0); 2406 SDValue VAListPtr = Node->getOperand(1); 2407 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 2408 SDLoc dl(Node); 2409 2410 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 2411 2412 // gpr_index 2413 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2414 VAListPtr, MachinePointerInfo(SV), MVT::i8); 2415 InChain = GprIndex.getValue(1); 2416 2417 if (VT == MVT::i64) { 2418 // Check if GprIndex is even 2419 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 2420 DAG.getConstant(1, dl, MVT::i32)); 2421 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 2422 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); 2423 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 2424 DAG.getConstant(1, dl, MVT::i32)); 2425 // Align GprIndex to be even if it isn't 2426 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 2427 GprIndex); 2428 } 2429 2430 // fpr index is 1 byte after gpr 2431 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2432 DAG.getConstant(1, dl, MVT::i32)); 2433 2434 // fpr 2435 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2436 FprPtr, MachinePointerInfo(SV), MVT::i8); 2437 InChain = FprIndex.getValue(1); 2438 2439 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2440 DAG.getConstant(8, dl, MVT::i32)); 2441 2442 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2443 DAG.getConstant(4, dl, MVT::i32)); 2444 2445 // areas 2446 SDValue OverflowArea = 2447 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); 2448 InChain = OverflowArea.getValue(1); 2449 2450 SDValue RegSaveArea = 2451 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); 2452 InChain = RegSaveArea.getValue(1); 2453 2454 // select overflow_area if index > 8 2455 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 2456 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); 2457 2458 // adjustment constant gpr_index * 4/8 2459 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 2460 VT.isInteger() ? GprIndex : FprIndex, 2461 DAG.getConstant(VT.isInteger() ? 4 : 8, dl, 2462 MVT::i32)); 2463 2464 // OurReg = RegSaveArea + RegConstant 2465 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 2466 RegConstant); 2467 2468 // Floating types are 32 bytes into RegSaveArea 2469 if (VT.isFloatingPoint()) 2470 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 2471 DAG.getConstant(32, dl, MVT::i32)); 2472 2473 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 2474 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 2475 VT.isInteger() ? GprIndex : FprIndex, 2476 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, 2477 MVT::i32)); 2478 2479 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 2480 VT.isInteger() ? VAListPtr : FprPtr, 2481 MachinePointerInfo(SV), MVT::i8); 2482 2483 // determine if we should load from reg_save_area or overflow_area 2484 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 2485 2486 // increase overflow_area by 4/8 if gpr/fpr > 8 2487 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 2488 DAG.getConstant(VT.isInteger() ? 4 : 8, 2489 dl, MVT::i32)); 2490 2491 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 2492 OverflowAreaPlusN); 2493 2494 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, 2495 MachinePointerInfo(), MVT::i32); 2496 2497 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); 2498 } 2499 2500 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { 2501 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 2502 2503 // We have to copy the entire va_list struct: 2504 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 2505 return DAG.getMemcpy(Op.getOperand(0), Op, 2506 Op.getOperand(1), Op.getOperand(2), 2507 DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, 2508 false, MachinePointerInfo(), MachinePointerInfo()); 2509 } 2510 2511 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 2512 SelectionDAG &DAG) const { 2513 return Op.getOperand(0); 2514 } 2515 2516 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 2517 SelectionDAG &DAG) const { 2518 SDValue Chain = Op.getOperand(0); 2519 SDValue Trmp = Op.getOperand(1); // trampoline 2520 SDValue FPtr = Op.getOperand(2); // nested function 2521 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 2522 SDLoc dl(Op); 2523 2524 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2525 bool isPPC64 = (PtrVT == MVT::i64); 2526 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 2527 2528 TargetLowering::ArgListTy Args; 2529 TargetLowering::ArgListEntry Entry; 2530 2531 Entry.Ty = IntPtrTy; 2532 Entry.Node = Trmp; Args.push_back(Entry); 2533 2534 // TrampSize == (isPPC64 ? 48 : 40); 2535 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, 2536 isPPC64 ? MVT::i64 : MVT::i32); 2537 Args.push_back(Entry); 2538 2539 Entry.Node = FPtr; Args.push_back(Entry); 2540 Entry.Node = Nest; Args.push_back(Entry); 2541 2542 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 2543 TargetLowering::CallLoweringInfo CLI(DAG); 2544 CLI.setDebugLoc(dl).setChain(Chain) 2545 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 2546 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 2547 std::move(Args)); 2548 2549 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2550 return CallResult.second; 2551 } 2552 2553 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { 2554 MachineFunction &MF = DAG.getMachineFunction(); 2555 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2556 EVT PtrVT = getPointerTy(MF.getDataLayout()); 2557 2558 SDLoc dl(Op); 2559 2560 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 2561 // vastart just stores the address of the VarArgsFrameIndex slot into the 2562 // memory location argument. 2563 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2564 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2565 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2566 MachinePointerInfo(SV)); 2567 } 2568 2569 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 2570 // We suppose the given va_list is already allocated. 2571 // 2572 // typedef struct { 2573 // char gpr; /* index into the array of 8 GPRs 2574 // * stored in the register save area 2575 // * gpr=0 corresponds to r3, 2576 // * gpr=1 to r4, etc. 2577 // */ 2578 // char fpr; /* index into the array of 8 FPRs 2579 // * stored in the register save area 2580 // * fpr=0 corresponds to f1, 2581 // * fpr=1 to f2, etc. 2582 // */ 2583 // char *overflow_arg_area; 2584 // /* location on stack that holds 2585 // * the next overflow argument 2586 // */ 2587 // char *reg_save_area; 2588 // /* where r3:r10 and f1:f8 (if saved) 2589 // * are stored 2590 // */ 2591 // } va_list[1]; 2592 2593 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); 2594 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); 2595 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 2596 PtrVT); 2597 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2598 PtrVT); 2599 2600 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 2601 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); 2602 2603 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 2604 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); 2605 2606 uint64_t FPROffset = 1; 2607 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); 2608 2609 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2610 2611 // Store first byte : number of int regs 2612 SDValue firstStore = 2613 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), 2614 MachinePointerInfo(SV), MVT::i8); 2615 uint64_t nextOffset = FPROffset; 2616 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 2617 ConstFPROffset); 2618 2619 // Store second byte : number of float regs 2620 SDValue secondStore = 2621 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 2622 MachinePointerInfo(SV, nextOffset), MVT::i8); 2623 nextOffset += StackOffset; 2624 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 2625 2626 // Store second word : arguments given on stack 2627 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 2628 MachinePointerInfo(SV, nextOffset)); 2629 nextOffset += FrameOffset; 2630 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 2631 2632 // Store third word : arguments given in registers 2633 return DAG.getStore(thirdStore, dl, FR, nextPtr, 2634 MachinePointerInfo(SV, nextOffset)); 2635 } 2636 2637 #include "PPCGenCallingConv.inc" 2638 2639 // Function whose sole purpose is to kill compiler warnings 2640 // stemming from unused functions included from PPCGenCallingConv.inc. 2641 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 2642 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 2643 } 2644 2645 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 2646 CCValAssign::LocInfo &LocInfo, 2647 ISD::ArgFlagsTy &ArgFlags, 2648 CCState &State) { 2649 return true; 2650 } 2651 2652 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 2653 MVT &LocVT, 2654 CCValAssign::LocInfo &LocInfo, 2655 ISD::ArgFlagsTy &ArgFlags, 2656 CCState &State) { 2657 static const MCPhysReg ArgRegs[] = { 2658 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2659 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2660 }; 2661 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2662 2663 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2664 2665 // Skip one register if the first unallocated register has an even register 2666 // number and there are still argument registers available which have not been 2667 // allocated yet. RegNum is actually an index into ArgRegs, which means we 2668 // need to skip a register if RegNum is odd. 2669 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 2670 State.AllocateReg(ArgRegs[RegNum]); 2671 } 2672 2673 // Always return false here, as this function only makes sure that the first 2674 // unallocated register has an odd register number and does not actually 2675 // allocate a register for the current argument. 2676 return false; 2677 } 2678 2679 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 2680 MVT &LocVT, 2681 CCValAssign::LocInfo &LocInfo, 2682 ISD::ArgFlagsTy &ArgFlags, 2683 CCState &State) { 2684 static const MCPhysReg ArgRegs[] = { 2685 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2686 PPC::F8 2687 }; 2688 2689 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2690 2691 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2692 2693 // If there is only one Floating-point register left we need to put both f64 2694 // values of a split ppc_fp128 value on the stack. 2695 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 2696 State.AllocateReg(ArgRegs[RegNum]); 2697 } 2698 2699 // Always return false here, as this function only makes sure that the two f64 2700 // values a ppc_fp128 value is split into are both passed in registers or both 2701 // passed on the stack and does not actually allocate a register for the 2702 // current argument. 2703 return false; 2704 } 2705 2706 /// FPR - The set of FP registers that should be allocated for arguments, 2707 /// on Darwin. 2708 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, 2709 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, 2710 PPC::F11, PPC::F12, PPC::F13}; 2711 2712 /// QFPR - The set of QPX registers that should be allocated for arguments. 2713 static const MCPhysReg QFPR[] = { 2714 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, 2715 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; 2716 2717 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 2718 /// the stack. 2719 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 2720 unsigned PtrByteSize) { 2721 unsigned ArgSize = ArgVT.getStoreSize(); 2722 if (Flags.isByVal()) 2723 ArgSize = Flags.getByValSize(); 2724 2725 // Round up to multiples of the pointer size, except for array members, 2726 // which are always packed. 2727 if (!Flags.isInConsecutiveRegs()) 2728 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2729 2730 return ArgSize; 2731 } 2732 2733 /// CalculateStackSlotAlignment - Calculates the alignment of this argument 2734 /// on the stack. 2735 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 2736 ISD::ArgFlagsTy Flags, 2737 unsigned PtrByteSize) { 2738 unsigned Align = PtrByteSize; 2739 2740 // Altivec parameters are padded to a 16 byte boundary. 2741 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2742 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2743 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 2744 ArgVT == MVT::v1i128) 2745 Align = 16; 2746 // QPX vector types stored in double-precision are padded to a 32 byte 2747 // boundary. 2748 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) 2749 Align = 32; 2750 2751 // ByVal parameters are aligned as requested. 2752 if (Flags.isByVal()) { 2753 unsigned BVAlign = Flags.getByValAlign(); 2754 if (BVAlign > PtrByteSize) { 2755 if (BVAlign % PtrByteSize != 0) 2756 llvm_unreachable( 2757 "ByVal alignment is not a multiple of the pointer size"); 2758 2759 Align = BVAlign; 2760 } 2761 } 2762 2763 // Array members are always packed to their original alignment. 2764 if (Flags.isInConsecutiveRegs()) { 2765 // If the array member was split into multiple registers, the first 2766 // needs to be aligned to the size of the full type. (Except for 2767 // ppcf128, which is only aligned as its f64 components.) 2768 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 2769 Align = OrigVT.getStoreSize(); 2770 else 2771 Align = ArgVT.getStoreSize(); 2772 } 2773 2774 return Align; 2775 } 2776 2777 /// CalculateStackSlotUsed - Return whether this argument will use its 2778 /// stack slot (instead of being passed in registers). ArgOffset, 2779 /// AvailableFPRs, and AvailableVRs must hold the current argument 2780 /// position, and will be updated to account for this argument. 2781 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 2782 ISD::ArgFlagsTy Flags, 2783 unsigned PtrByteSize, 2784 unsigned LinkageSize, 2785 unsigned ParamAreaSize, 2786 unsigned &ArgOffset, 2787 unsigned &AvailableFPRs, 2788 unsigned &AvailableVRs, bool HasQPX) { 2789 bool UseMemory = false; 2790 2791 // Respect alignment of argument on the stack. 2792 unsigned Align = 2793 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 2794 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2795 // If there's no space left in the argument save area, we must 2796 // use memory (this check also catches zero-sized arguments). 2797 if (ArgOffset >= LinkageSize + ParamAreaSize) 2798 UseMemory = true; 2799 2800 // Allocate argument on the stack. 2801 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2802 if (Flags.isInConsecutiveRegsLast()) 2803 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2804 // If we overran the argument save area, we must use memory 2805 // (this check catches arguments passed partially in memory) 2806 if (ArgOffset > LinkageSize + ParamAreaSize) 2807 UseMemory = true; 2808 2809 // However, if the argument is actually passed in an FPR or a VR, 2810 // we don't use memory after all. 2811 if (!Flags.isByVal()) { 2812 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || 2813 // QPX registers overlap with the scalar FP registers. 2814 (HasQPX && (ArgVT == MVT::v4f32 || 2815 ArgVT == MVT::v4f64 || 2816 ArgVT == MVT::v4i1))) 2817 if (AvailableFPRs > 0) { 2818 --AvailableFPRs; 2819 return false; 2820 } 2821 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2822 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2823 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 2824 ArgVT == MVT::v1i128) 2825 if (AvailableVRs > 0) { 2826 --AvailableVRs; 2827 return false; 2828 } 2829 } 2830 2831 return UseMemory; 2832 } 2833 2834 /// EnsureStackAlignment - Round stack frame size up from NumBytes to 2835 /// ensure minimum alignment required for target. 2836 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, 2837 unsigned NumBytes) { 2838 unsigned TargetAlign = Lowering->getStackAlignment(); 2839 unsigned AlignMask = TargetAlign - 1; 2840 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2841 return NumBytes; 2842 } 2843 2844 SDValue PPCTargetLowering::LowerFormalArguments( 2845 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2846 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 2847 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2848 if (Subtarget.isSVR4ABI()) { 2849 if (Subtarget.isPPC64()) 2850 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 2851 dl, DAG, InVals); 2852 else 2853 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 2854 dl, DAG, InVals); 2855 } else { 2856 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 2857 dl, DAG, InVals); 2858 } 2859 } 2860 2861 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( 2862 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2863 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 2864 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2865 2866 // 32-bit SVR4 ABI Stack Frame Layout: 2867 // +-----------------------------------+ 2868 // +--> | Back chain | 2869 // | +-----------------------------------+ 2870 // | | Floating-point register save area | 2871 // | +-----------------------------------+ 2872 // | | General register save area | 2873 // | +-----------------------------------+ 2874 // | | CR save word | 2875 // | +-----------------------------------+ 2876 // | | VRSAVE save word | 2877 // | +-----------------------------------+ 2878 // | | Alignment padding | 2879 // | +-----------------------------------+ 2880 // | | Vector register save area | 2881 // | +-----------------------------------+ 2882 // | | Local variable space | 2883 // | +-----------------------------------+ 2884 // | | Parameter list area | 2885 // | +-----------------------------------+ 2886 // | | LR save word | 2887 // | +-----------------------------------+ 2888 // SP--> +--- | Back chain | 2889 // +-----------------------------------+ 2890 // 2891 // Specifications: 2892 // System V Application Binary Interface PowerPC Processor Supplement 2893 // AltiVec Technology Programming Interface Manual 2894 2895 MachineFunction &MF = DAG.getMachineFunction(); 2896 MachineFrameInfo *MFI = MF.getFrameInfo(); 2897 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2898 2899 EVT PtrVT = getPointerTy(MF.getDataLayout()); 2900 // Potential tail calls could cause overwriting of argument stack slots. 2901 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2902 (CallConv == CallingConv::Fast)); 2903 unsigned PtrByteSize = 4; 2904 2905 // Assign locations to all of the incoming arguments. 2906 SmallVector<CCValAssign, 16> ArgLocs; 2907 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 2908 *DAG.getContext()); 2909 2910 // Reserve space for the linkage area on the stack. 2911 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 2912 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 2913 if (useSoftFloat()) 2914 CCInfo.PreAnalyzeFormalArguments(Ins); 2915 2916 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 2917 CCInfo.clearWasPPCF128(); 2918 2919 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2920 CCValAssign &VA = ArgLocs[i]; 2921 2922 // Arguments stored in registers. 2923 if (VA.isRegLoc()) { 2924 const TargetRegisterClass *RC; 2925 EVT ValVT = VA.getValVT(); 2926 2927 switch (ValVT.getSimpleVT().SimpleTy) { 2928 default: 2929 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 2930 case MVT::i1: 2931 case MVT::i32: 2932 RC = &PPC::GPRCRegClass; 2933 break; 2934 case MVT::f32: 2935 if (Subtarget.hasP8Vector()) 2936 RC = &PPC::VSSRCRegClass; 2937 else 2938 RC = &PPC::F4RCRegClass; 2939 break; 2940 case MVT::f64: 2941 if (Subtarget.hasVSX()) 2942 RC = &PPC::VSFRCRegClass; 2943 else 2944 RC = &PPC::F8RCRegClass; 2945 break; 2946 case MVT::v16i8: 2947 case MVT::v8i16: 2948 case MVT::v4i32: 2949 RC = &PPC::VRRCRegClass; 2950 break; 2951 case MVT::v4f32: 2952 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; 2953 break; 2954 case MVT::v2f64: 2955 case MVT::v2i64: 2956 RC = &PPC::VSHRCRegClass; 2957 break; 2958 case MVT::v4f64: 2959 RC = &PPC::QFRCRegClass; 2960 break; 2961 case MVT::v4i1: 2962 RC = &PPC::QBRCRegClass; 2963 break; 2964 } 2965 2966 // Transform the arguments stored in physical registers into virtual ones. 2967 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2968 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 2969 ValVT == MVT::i1 ? MVT::i32 : ValVT); 2970 2971 if (ValVT == MVT::i1) 2972 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 2973 2974 InVals.push_back(ArgValue); 2975 } else { 2976 // Argument stored in memory. 2977 assert(VA.isMemLoc()); 2978 2979 unsigned ArgSize = VA.getLocVT().getStoreSize(); 2980 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 2981 isImmutable); 2982 2983 // Create load nodes to retrieve arguments from the stack. 2984 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2985 InVals.push_back( 2986 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); 2987 } 2988 } 2989 2990 // Assign locations to all of the incoming aggregate by value arguments. 2991 // Aggregates passed by value are stored in the local variable space of the 2992 // caller's stack frame, right above the parameter list area. 2993 SmallVector<CCValAssign, 16> ByValArgLocs; 2994 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2995 ByValArgLocs, *DAG.getContext()); 2996 2997 // Reserve stack space for the allocations in CCInfo. 2998 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 2999 3000 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 3001 3002 // Area that is at least reserved in the caller of this function. 3003 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 3004 MinReservedArea = std::max(MinReservedArea, LinkageSize); 3005 3006 // Set the size that is at least reserved in caller of this function. Tail 3007 // call optimized function's reserved stack space needs to be aligned so that 3008 // taking the difference between two stack areas will result in an aligned 3009 // stack. 3010 MinReservedArea = 3011 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3012 FuncInfo->setMinReservedArea(MinReservedArea); 3013 3014 SmallVector<SDValue, 8> MemOps; 3015 3016 // If the function takes variable number of arguments, make a frame index for 3017 // the start of the first vararg value... for expansion of llvm.va_start. 3018 if (isVarArg) { 3019 static const MCPhysReg GPArgRegs[] = { 3020 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3021 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3022 }; 3023 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 3024 3025 static const MCPhysReg FPArgRegs[] = { 3026 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 3027 PPC::F8 3028 }; 3029 unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 3030 3031 if (useSoftFloat()) 3032 NumFPArgRegs = 0; 3033 3034 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); 3035 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); 3036 3037 // Make room for NumGPArgRegs and NumFPArgRegs. 3038 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 3039 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; 3040 3041 FuncInfo->setVarArgsStackOffset( 3042 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 3043 CCInfo.getNextStackOffset(), true)); 3044 3045 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 3046 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3047 3048 // The fixed integer arguments of a variadic function are stored to the 3049 // VarArgsFrameIndex on the stack so that they may be loaded by 3050 // dereferencing the result of va_next. 3051 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 3052 // Get an existing live-in vreg, or add a new one. 3053 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 3054 if (!VReg) 3055 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 3056 3057 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3058 SDValue Store = 3059 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3060 MemOps.push_back(Store); 3061 // Increment the address by four for the next argument to store 3062 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3063 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3064 } 3065 3066 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 3067 // is set. 3068 // The double arguments are stored to the VarArgsFrameIndex 3069 // on the stack. 3070 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 3071 // Get an existing live-in vreg, or add a new one. 3072 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 3073 if (!VReg) 3074 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 3075 3076 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 3077 SDValue Store = 3078 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3079 MemOps.push_back(Store); 3080 // Increment the address by eight for the next argument to store 3081 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, 3082 PtrVT); 3083 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3084 } 3085 } 3086 3087 if (!MemOps.empty()) 3088 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3089 3090 return Chain; 3091 } 3092 3093 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3094 // value to MVT::i64 and then truncate to the correct register size. 3095 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, 3096 EVT ObjectVT, SelectionDAG &DAG, 3097 SDValue ArgVal, 3098 const SDLoc &dl) const { 3099 if (Flags.isSExt()) 3100 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 3101 DAG.getValueType(ObjectVT)); 3102 else if (Flags.isZExt()) 3103 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 3104 DAG.getValueType(ObjectVT)); 3105 3106 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 3107 } 3108 3109 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( 3110 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3111 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3112 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3113 // TODO: add description of PPC stack frame format, or at least some docs. 3114 // 3115 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3116 bool isLittleEndian = Subtarget.isLittleEndian(); 3117 MachineFunction &MF = DAG.getMachineFunction(); 3118 MachineFrameInfo *MFI = MF.getFrameInfo(); 3119 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3120 3121 assert(!(CallConv == CallingConv::Fast && isVarArg) && 3122 "fastcc not supported on varargs functions"); 3123 3124 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3125 // Potential tail calls could cause overwriting of argument stack slots. 3126 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3127 (CallConv == CallingConv::Fast)); 3128 unsigned PtrByteSize = 8; 3129 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3130 3131 static const MCPhysReg GPR[] = { 3132 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3133 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3134 }; 3135 static const MCPhysReg VR[] = { 3136 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3137 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3138 }; 3139 static const MCPhysReg VSRH[] = { 3140 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 3141 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 3142 }; 3143 3144 const unsigned Num_GPR_Regs = array_lengthof(GPR); 3145 const unsigned Num_FPR_Regs = 13; 3146 const unsigned Num_VR_Regs = array_lengthof(VR); 3147 const unsigned Num_QFPR_Regs = Num_FPR_Regs; 3148 3149 // Do a first pass over the arguments to determine whether the ABI 3150 // guarantees that our caller has allocated the parameter save area 3151 // on its stack frame. In the ELFv1 ABI, this is always the case; 3152 // in the ELFv2 ABI, it is true if this is a vararg function or if 3153 // any parameter is located in a stack slot. 3154 3155 bool HasParameterArea = !isELFv2ABI || isVarArg; 3156 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 3157 unsigned NumBytes = LinkageSize; 3158 unsigned AvailableFPRs = Num_FPR_Regs; 3159 unsigned AvailableVRs = Num_VR_Regs; 3160 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 3161 if (Ins[i].Flags.isNest()) 3162 continue; 3163 3164 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 3165 PtrByteSize, LinkageSize, ParamAreaSize, 3166 NumBytes, AvailableFPRs, AvailableVRs, 3167 Subtarget.hasQPX())) 3168 HasParameterArea = true; 3169 } 3170 3171 // Add DAG nodes to load the arguments or copy them out of registers. On 3172 // entry to a function on PPC, the arguments start after the linkage area, 3173 // although the first ones are often in registers. 3174 3175 unsigned ArgOffset = LinkageSize; 3176 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3177 unsigned &QFPR_idx = FPR_idx; 3178 SmallVector<SDValue, 8> MemOps; 3179 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3180 unsigned CurArgIdx = 0; 3181 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3182 SDValue ArgVal; 3183 bool needsLoad = false; 3184 EVT ObjectVT = Ins[ArgNo].VT; 3185 EVT OrigVT = Ins[ArgNo].ArgVT; 3186 unsigned ObjSize = ObjectVT.getStoreSize(); 3187 unsigned ArgSize = ObjSize; 3188 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3189 if (Ins[ArgNo].isOrigArg()) { 3190 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3191 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3192 } 3193 // We re-align the argument offset for each argument, except when using the 3194 // fast calling convention, when we need to make sure we do that only when 3195 // we'll actually use a stack slot. 3196 unsigned CurArgOffset, Align; 3197 auto ComputeArgOffset = [&]() { 3198 /* Respect alignment of argument on the stack. */ 3199 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); 3200 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 3201 CurArgOffset = ArgOffset; 3202 }; 3203 3204 if (CallConv != CallingConv::Fast) { 3205 ComputeArgOffset(); 3206 3207 /* Compute GPR index associated with argument offset. */ 3208 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3209 GPR_idx = std::min(GPR_idx, Num_GPR_Regs); 3210 } 3211 3212 // FIXME the codegen can be much improved in some cases. 3213 // We do not have to keep everything in memory. 3214 if (Flags.isByVal()) { 3215 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3216 3217 if (CallConv == CallingConv::Fast) 3218 ComputeArgOffset(); 3219 3220 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3221 ObjSize = Flags.getByValSize(); 3222 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3223 // Empty aggregate parameters do not take up registers. Examples: 3224 // struct { } a; 3225 // union { } b; 3226 // int c[0]; 3227 // etc. However, we have to provide a place-holder in InVals, so 3228 // pretend we have an 8-byte item at the current address for that 3229 // purpose. 3230 if (!ObjSize) { 3231 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 3232 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3233 InVals.push_back(FIN); 3234 continue; 3235 } 3236 3237 // Create a stack object covering all stack doublewords occupied 3238 // by the argument. If the argument is (fully or partially) on 3239 // the stack, or if the argument is fully in registers but the 3240 // caller has allocated the parameter save anyway, we can refer 3241 // directly to the caller's stack frame. Otherwise, create a 3242 // local copy in our own frame. 3243 int FI; 3244 if (HasParameterArea || 3245 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) 3246 FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true); 3247 else 3248 FI = MFI->CreateStackObject(ArgSize, Align, false); 3249 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3250 3251 // Handle aggregates smaller than 8 bytes. 3252 if (ObjSize < PtrByteSize) { 3253 // The value of the object is its address, which differs from the 3254 // address of the enclosing doubleword on big-endian systems. 3255 SDValue Arg = FIN; 3256 if (!isLittleEndian) { 3257 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); 3258 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); 3259 } 3260 InVals.push_back(Arg); 3261 3262 if (GPR_idx != Num_GPR_Regs) { 3263 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3264 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3265 SDValue Store; 3266 3267 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 3268 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 3269 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 3270 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, 3271 MachinePointerInfo(&*FuncArg), ObjType); 3272 } else { 3273 // For sizes that don't fit a truncating store (3, 5, 6, 7), 3274 // store the whole register as-is to the parameter save area 3275 // slot. 3276 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3277 MachinePointerInfo(&*FuncArg)); 3278 } 3279 3280 MemOps.push_back(Store); 3281 } 3282 // Whether we copied from a register or not, advance the offset 3283 // into the parameter save area by a full doubleword. 3284 ArgOffset += PtrByteSize; 3285 continue; 3286 } 3287 3288 // The value of the object is its address, which is the address of 3289 // its first stack doubleword. 3290 InVals.push_back(FIN); 3291 3292 // Store whatever pieces of the object are in registers to memory. 3293 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3294 if (GPR_idx == Num_GPR_Regs) 3295 break; 3296 3297 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3298 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3299 SDValue Addr = FIN; 3300 if (j) { 3301 SDValue Off = DAG.getConstant(j, dl, PtrVT); 3302 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); 3303 } 3304 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, 3305 MachinePointerInfo(&*FuncArg, j)); 3306 MemOps.push_back(Store); 3307 ++GPR_idx; 3308 } 3309 ArgOffset += ArgSize; 3310 continue; 3311 } 3312 3313 switch (ObjectVT.getSimpleVT().SimpleTy) { 3314 default: llvm_unreachable("Unhandled argument type!"); 3315 case MVT::i1: 3316 case MVT::i32: 3317 case MVT::i64: 3318 if (Flags.isNest()) { 3319 // The 'nest' parameter, if any, is passed in R11. 3320 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); 3321 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3322 3323 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3324 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3325 3326 break; 3327 } 3328 3329 // These can be scalar arguments or elements of an integer array type 3330 // passed directly. Clang may use those instead of "byval" aggregate 3331 // types to avoid forcing arguments to memory unnecessarily. 3332 if (GPR_idx != Num_GPR_Regs) { 3333 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3334 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3335 3336 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3337 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3338 // value to MVT::i64 and then truncate to the correct register size. 3339 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3340 } else { 3341 if (CallConv == CallingConv::Fast) 3342 ComputeArgOffset(); 3343 3344 needsLoad = true; 3345 ArgSize = PtrByteSize; 3346 } 3347 if (CallConv != CallingConv::Fast || needsLoad) 3348 ArgOffset += 8; 3349 break; 3350 3351 case MVT::f32: 3352 case MVT::f64: 3353 // These can be scalar arguments or elements of a float array type 3354 // passed directly. The latter are used to implement ELFv2 homogenous 3355 // float aggregates. 3356 if (FPR_idx != Num_FPR_Regs) { 3357 unsigned VReg; 3358 3359 if (ObjectVT == MVT::f32) 3360 VReg = MF.addLiveIn(FPR[FPR_idx], 3361 Subtarget.hasP8Vector() 3362 ? &PPC::VSSRCRegClass 3363 : &PPC::F4RCRegClass); 3364 else 3365 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() 3366 ? &PPC::VSFRCRegClass 3367 : &PPC::F8RCRegClass); 3368 3369 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3370 ++FPR_idx; 3371 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { 3372 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 3373 // once we support fp <-> gpr moves. 3374 3375 // This can only ever happen in the presence of f32 array types, 3376 // since otherwise we never run out of FPRs before running out 3377 // of GPRs. 3378 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3379 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3380 3381 if (ObjectVT == MVT::f32) { 3382 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) 3383 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, 3384 DAG.getConstant(32, dl, MVT::i32)); 3385 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 3386 } 3387 3388 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); 3389 } else { 3390 if (CallConv == CallingConv::Fast) 3391 ComputeArgOffset(); 3392 3393 needsLoad = true; 3394 } 3395 3396 // When passing an array of floats, the array occupies consecutive 3397 // space in the argument area; only round up to the next doubleword 3398 // at the end of the array. Otherwise, each float takes 8 bytes. 3399 if (CallConv != CallingConv::Fast || needsLoad) { 3400 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; 3401 ArgOffset += ArgSize; 3402 if (Flags.isInConsecutiveRegsLast()) 3403 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3404 } 3405 break; 3406 case MVT::v4f32: 3407 case MVT::v4i32: 3408 case MVT::v8i16: 3409 case MVT::v16i8: 3410 case MVT::v2f64: 3411 case MVT::v2i64: 3412 case MVT::v1i128: 3413 if (!Subtarget.hasQPX()) { 3414 // These can be scalar arguments or elements of a vector array type 3415 // passed directly. The latter are used to implement ELFv2 homogenous 3416 // vector aggregates. 3417 if (VR_idx != Num_VR_Regs) { 3418 unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? 3419 MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : 3420 MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3421 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3422 ++VR_idx; 3423 } else { 3424 if (CallConv == CallingConv::Fast) 3425 ComputeArgOffset(); 3426 3427 needsLoad = true; 3428 } 3429 if (CallConv != CallingConv::Fast || needsLoad) 3430 ArgOffset += 16; 3431 break; 3432 } // not QPX 3433 3434 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && 3435 "Invalid QPX parameter type"); 3436 /* fall through */ 3437 3438 case MVT::v4f64: 3439 case MVT::v4i1: 3440 // QPX vectors are treated like their scalar floating-point subregisters 3441 // (except that they're larger). 3442 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; 3443 if (QFPR_idx != Num_QFPR_Regs) { 3444 const TargetRegisterClass *RC; 3445 switch (ObjectVT.getSimpleVT().SimpleTy) { 3446 case MVT::v4f64: RC = &PPC::QFRCRegClass; break; 3447 case MVT::v4f32: RC = &PPC::QSRCRegClass; break; 3448 default: RC = &PPC::QBRCRegClass; break; 3449 } 3450 3451 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); 3452 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3453 ++QFPR_idx; 3454 } else { 3455 if (CallConv == CallingConv::Fast) 3456 ComputeArgOffset(); 3457 needsLoad = true; 3458 } 3459 if (CallConv != CallingConv::Fast || needsLoad) 3460 ArgOffset += Sz; 3461 break; 3462 } 3463 3464 // We need to load the argument to a virtual register if we determined 3465 // above that we ran out of physical registers of the appropriate type. 3466 if (needsLoad) { 3467 if (ObjSize < ArgSize && !isLittleEndian) 3468 CurArgOffset += ArgSize - ObjSize; 3469 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable); 3470 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3471 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 3472 } 3473 3474 InVals.push_back(ArgVal); 3475 } 3476 3477 // Area that is at least reserved in the caller of this function. 3478 unsigned MinReservedArea; 3479 if (HasParameterArea) 3480 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); 3481 else 3482 MinReservedArea = LinkageSize; 3483 3484 // Set the size that is at least reserved in caller of this function. Tail 3485 // call optimized functions' reserved stack space needs to be aligned so that 3486 // taking the difference between two stack areas will result in an aligned 3487 // stack. 3488 MinReservedArea = 3489 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3490 FuncInfo->setMinReservedArea(MinReservedArea); 3491 3492 // If the function takes variable number of arguments, make a frame index for 3493 // the start of the first vararg value... for expansion of llvm.va_start. 3494 if (isVarArg) { 3495 int Depth = ArgOffset; 3496 3497 FuncInfo->setVarArgsFrameIndex( 3498 MFI->CreateFixedObject(PtrByteSize, Depth, true)); 3499 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3500 3501 // If this function is vararg, store any remaining integer argument regs 3502 // to their spots on the stack so that they may be loaded by dereferencing 3503 // the result of va_next. 3504 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3505 GPR_idx < Num_GPR_Regs; ++GPR_idx) { 3506 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3507 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3508 SDValue Store = 3509 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3510 MemOps.push_back(Store); 3511 // Increment the address by four for the next argument to store 3512 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); 3513 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3514 } 3515 } 3516 3517 if (!MemOps.empty()) 3518 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3519 3520 return Chain; 3521 } 3522 3523 SDValue PPCTargetLowering::LowerFormalArguments_Darwin( 3524 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3525 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3526 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3527 // TODO: add description of PPC stack frame format, or at least some docs. 3528 // 3529 MachineFunction &MF = DAG.getMachineFunction(); 3530 MachineFrameInfo *MFI = MF.getFrameInfo(); 3531 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3532 3533 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3534 bool isPPC64 = PtrVT == MVT::i64; 3535 // Potential tail calls could cause overwriting of argument stack slots. 3536 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3537 (CallConv == CallingConv::Fast)); 3538 unsigned PtrByteSize = isPPC64 ? 8 : 4; 3539 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3540 unsigned ArgOffset = LinkageSize; 3541 // Area that is at least reserved in caller of this function. 3542 unsigned MinReservedArea = ArgOffset; 3543 3544 static const MCPhysReg GPR_32[] = { // 32-bit registers. 3545 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3546 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3547 }; 3548 static const MCPhysReg GPR_64[] = { // 64-bit registers. 3549 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3550 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3551 }; 3552 static const MCPhysReg VR[] = { 3553 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3554 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3555 }; 3556 3557 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 3558 const unsigned Num_FPR_Regs = 13; 3559 const unsigned Num_VR_Regs = array_lengthof( VR); 3560 3561 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3562 3563 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 3564 3565 // In 32-bit non-varargs functions, the stack space for vectors is after the 3566 // stack space for non-vectors. We do not use this space unless we have 3567 // too many vectors to fit in registers, something that only occurs in 3568 // constructed examples:), but we have to walk the arglist to figure 3569 // that out...for the pathological case, compute VecArgOffset as the 3570 // start of the vector parameter area. Computing VecArgOffset is the 3571 // entire point of the following loop. 3572 unsigned VecArgOffset = ArgOffset; 3573 if (!isVarArg && !isPPC64) { 3574 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 3575 ++ArgNo) { 3576 EVT ObjectVT = Ins[ArgNo].VT; 3577 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3578 3579 if (Flags.isByVal()) { 3580 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 3581 unsigned ObjSize = Flags.getByValSize(); 3582 unsigned ArgSize = 3583 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3584 VecArgOffset += ArgSize; 3585 continue; 3586 } 3587 3588 switch(ObjectVT.getSimpleVT().SimpleTy) { 3589 default: llvm_unreachable("Unhandled argument type!"); 3590 case MVT::i1: 3591 case MVT::i32: 3592 case MVT::f32: 3593 VecArgOffset += 4; 3594 break; 3595 case MVT::i64: // PPC64 3596 case MVT::f64: 3597 // FIXME: We are guaranteed to be !isPPC64 at this point. 3598 // Does MVT::i64 apply? 3599 VecArgOffset += 8; 3600 break; 3601 case MVT::v4f32: 3602 case MVT::v4i32: 3603 case MVT::v8i16: 3604 case MVT::v16i8: 3605 // Nothing to do, we're only looking at Nonvector args here. 3606 break; 3607 } 3608 } 3609 } 3610 // We've found where the vector parameter area in memory is. Skip the 3611 // first 12 parameters; these don't use that memory. 3612 VecArgOffset = ((VecArgOffset+15)/16)*16; 3613 VecArgOffset += 12*16; 3614 3615 // Add DAG nodes to load the arguments or copy them out of registers. On 3616 // entry to a function on PPC, the arguments start after the linkage area, 3617 // although the first ones are often in registers. 3618 3619 SmallVector<SDValue, 8> MemOps; 3620 unsigned nAltivecParamsAtEnd = 0; 3621 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3622 unsigned CurArgIdx = 0; 3623 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3624 SDValue ArgVal; 3625 bool needsLoad = false; 3626 EVT ObjectVT = Ins[ArgNo].VT; 3627 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 3628 unsigned ArgSize = ObjSize; 3629 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3630 if (Ins[ArgNo].isOrigArg()) { 3631 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3632 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3633 } 3634 unsigned CurArgOffset = ArgOffset; 3635 3636 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 3637 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 3638 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 3639 if (isVarArg || isPPC64) { 3640 MinReservedArea = ((MinReservedArea+15)/16)*16; 3641 MinReservedArea += CalculateStackSlotSize(ObjectVT, 3642 Flags, 3643 PtrByteSize); 3644 } else nAltivecParamsAtEnd++; 3645 } else 3646 // Calculate min reserved area. 3647 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 3648 Flags, 3649 PtrByteSize); 3650 3651 // FIXME the codegen can be much improved in some cases. 3652 // We do not have to keep everything in memory. 3653 if (Flags.isByVal()) { 3654 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3655 3656 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3657 ObjSize = Flags.getByValSize(); 3658 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3659 // Objects of size 1 and 2 are right justified, everything else is 3660 // left justified. This means the memory address is adjusted forwards. 3661 if (ObjSize==1 || ObjSize==2) { 3662 CurArgOffset = CurArgOffset + (4 - ObjSize); 3663 } 3664 // The value of the object is its address. 3665 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true); 3666 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3667 InVals.push_back(FIN); 3668 if (ObjSize==1 || ObjSize==2) { 3669 if (GPR_idx != Num_GPR_Regs) { 3670 unsigned VReg; 3671 if (isPPC64) 3672 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3673 else 3674 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3675 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3676 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 3677 SDValue Store = 3678 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 3679 MachinePointerInfo(&*FuncArg), ObjType); 3680 MemOps.push_back(Store); 3681 ++GPR_idx; 3682 } 3683 3684 ArgOffset += PtrByteSize; 3685 3686 continue; 3687 } 3688 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3689 // Store whatever pieces of the object are in registers 3690 // to memory. ArgOffset will be the address of the beginning 3691 // of the object. 3692 if (GPR_idx != Num_GPR_Regs) { 3693 unsigned VReg; 3694 if (isPPC64) 3695 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3696 else 3697 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3698 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 3699 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3700 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3701 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3702 MachinePointerInfo(&*FuncArg, j)); 3703 MemOps.push_back(Store); 3704 ++GPR_idx; 3705 ArgOffset += PtrByteSize; 3706 } else { 3707 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 3708 break; 3709 } 3710 } 3711 continue; 3712 } 3713 3714 switch (ObjectVT.getSimpleVT().SimpleTy) { 3715 default: llvm_unreachable("Unhandled argument type!"); 3716 case MVT::i1: 3717 case MVT::i32: 3718 if (!isPPC64) { 3719 if (GPR_idx != Num_GPR_Regs) { 3720 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3721 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 3722 3723 if (ObjectVT == MVT::i1) 3724 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 3725 3726 ++GPR_idx; 3727 } else { 3728 needsLoad = true; 3729 ArgSize = PtrByteSize; 3730 } 3731 // All int arguments reserve stack space in the Darwin ABI. 3732 ArgOffset += PtrByteSize; 3733 break; 3734 } 3735 // FALLTHROUGH 3736 case MVT::i64: // PPC64 3737 if (GPR_idx != Num_GPR_Regs) { 3738 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3739 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3740 3741 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3742 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3743 // value to MVT::i64 and then truncate to the correct register size. 3744 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3745 3746 ++GPR_idx; 3747 } else { 3748 needsLoad = true; 3749 ArgSize = PtrByteSize; 3750 } 3751 // All int arguments reserve stack space in the Darwin ABI. 3752 ArgOffset += 8; 3753 break; 3754 3755 case MVT::f32: 3756 case MVT::f64: 3757 // Every 4 bytes of argument space consumes one of the GPRs available for 3758 // argument passing. 3759 if (GPR_idx != Num_GPR_Regs) { 3760 ++GPR_idx; 3761 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 3762 ++GPR_idx; 3763 } 3764 if (FPR_idx != Num_FPR_Regs) { 3765 unsigned VReg; 3766 3767 if (ObjectVT == MVT::f32) 3768 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 3769 else 3770 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 3771 3772 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3773 ++FPR_idx; 3774 } else { 3775 needsLoad = true; 3776 } 3777 3778 // All FP arguments reserve stack space in the Darwin ABI. 3779 ArgOffset += isPPC64 ? 8 : ObjSize; 3780 break; 3781 case MVT::v4f32: 3782 case MVT::v4i32: 3783 case MVT::v8i16: 3784 case MVT::v16i8: 3785 // Note that vector arguments in registers don't reserve stack space, 3786 // except in varargs functions. 3787 if (VR_idx != Num_VR_Regs) { 3788 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3789 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3790 if (isVarArg) { 3791 while ((ArgOffset % 16) != 0) { 3792 ArgOffset += PtrByteSize; 3793 if (GPR_idx != Num_GPR_Regs) 3794 GPR_idx++; 3795 } 3796 ArgOffset += 16; 3797 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 3798 } 3799 ++VR_idx; 3800 } else { 3801 if (!isVarArg && !isPPC64) { 3802 // Vectors go after all the nonvectors. 3803 CurArgOffset = VecArgOffset; 3804 VecArgOffset += 16; 3805 } else { 3806 // Vectors are aligned. 3807 ArgOffset = ((ArgOffset+15)/16)*16; 3808 CurArgOffset = ArgOffset; 3809 ArgOffset += 16; 3810 } 3811 needsLoad = true; 3812 } 3813 break; 3814 } 3815 3816 // We need to load the argument to a virtual register if we determined above 3817 // that we ran out of physical registers of the appropriate type. 3818 if (needsLoad) { 3819 int FI = MFI->CreateFixedObject(ObjSize, 3820 CurArgOffset + (ArgSize - ObjSize), 3821 isImmutable); 3822 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3823 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 3824 } 3825 3826 InVals.push_back(ArgVal); 3827 } 3828 3829 // Allow for Altivec parameters at the end, if needed. 3830 if (nAltivecParamsAtEnd) { 3831 MinReservedArea = ((MinReservedArea+15)/16)*16; 3832 MinReservedArea += 16*nAltivecParamsAtEnd; 3833 } 3834 3835 // Area that is at least reserved in the caller of this function. 3836 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 3837 3838 // Set the size that is at least reserved in caller of this function. Tail 3839 // call optimized functions' reserved stack space needs to be aligned so that 3840 // taking the difference between two stack areas will result in an aligned 3841 // stack. 3842 MinReservedArea = 3843 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3844 FuncInfo->setMinReservedArea(MinReservedArea); 3845 3846 // If the function takes variable number of arguments, make a frame index for 3847 // the start of the first vararg value... for expansion of llvm.va_start. 3848 if (isVarArg) { 3849 int Depth = ArgOffset; 3850 3851 FuncInfo->setVarArgsFrameIndex( 3852 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 3853 Depth, true)); 3854 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3855 3856 // If this function is vararg, store any remaining integer argument regs 3857 // to their spots on the stack so that they may be loaded by dereferencing 3858 // the result of va_next. 3859 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 3860 unsigned VReg; 3861 3862 if (isPPC64) 3863 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3864 else 3865 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3866 3867 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3868 SDValue Store = 3869 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3870 MemOps.push_back(Store); 3871 // Increment the address by four for the next argument to store 3872 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3873 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3874 } 3875 } 3876 3877 if (!MemOps.empty()) 3878 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3879 3880 return Chain; 3881 } 3882 3883 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 3884 /// adjusted to accommodate the arguments for the tailcall. 3885 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 3886 unsigned ParamSize) { 3887 3888 if (!isTailCall) return 0; 3889 3890 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 3891 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 3892 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 3893 // Remember only if the new adjustement is bigger. 3894 if (SPDiff < FI->getTailCallSPDelta()) 3895 FI->setTailCallSPDelta(SPDiff); 3896 3897 return SPDiff; 3898 } 3899 3900 static bool isFunctionGlobalAddress(SDValue Callee); 3901 3902 static bool 3903 resideInSameModule(SDValue Callee, Reloc::Model RelMod) { 3904 // If !G, Callee can be an external symbol. 3905 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 3906 if (!G) return false; 3907 3908 const GlobalValue *GV = G->getGlobal(); 3909 3910 if (GV->isDeclaration()) return false; 3911 3912 switch(GV->getLinkage()) { 3913 default: llvm_unreachable("unknow linkage type"); 3914 case GlobalValue::AvailableExternallyLinkage: 3915 case GlobalValue::ExternalWeakLinkage: 3916 return false; 3917 3918 // Callee with weak linkage is allowed if it has hidden or protected 3919 // visibility 3920 case GlobalValue::LinkOnceAnyLinkage: 3921 case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions 3922 case GlobalValue::WeakAnyLinkage: 3923 case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation 3924 if (GV->hasDefaultVisibility()) 3925 return false; 3926 3927 case GlobalValue::ExternalLinkage: 3928 case GlobalValue::InternalLinkage: 3929 case GlobalValue::PrivateLinkage: 3930 break; 3931 } 3932 3933 // With '-fPIC', calling default visiblity function need insert 'nop' after 3934 // function call, no matter that function resides in same module or not, so 3935 // we treat it as in different module. 3936 if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility()) 3937 return false; 3938 3939 return true; 3940 } 3941 3942 static bool 3943 needStackSlotPassParameters(const PPCSubtarget &Subtarget, 3944 const SmallVectorImpl<ISD::OutputArg> &Outs) { 3945 assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); 3946 3947 const unsigned PtrByteSize = 8; 3948 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3949 3950 static const MCPhysReg GPR[] = { 3951 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3952 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3953 }; 3954 static const MCPhysReg VR[] = { 3955 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3956 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3957 }; 3958 3959 const unsigned NumGPRs = array_lengthof(GPR); 3960 const unsigned NumFPRs = 13; 3961 const unsigned NumVRs = array_lengthof(VR); 3962 const unsigned ParamAreaSize = NumGPRs * PtrByteSize; 3963 3964 unsigned NumBytes = LinkageSize; 3965 unsigned AvailableFPRs = NumFPRs; 3966 unsigned AvailableVRs = NumVRs; 3967 3968 for (const ISD::OutputArg& Param : Outs) { 3969 if (Param.Flags.isNest()) continue; 3970 3971 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, 3972 PtrByteSize, LinkageSize, ParamAreaSize, 3973 NumBytes, AvailableFPRs, AvailableVRs, 3974 Subtarget.hasQPX())) 3975 return true; 3976 } 3977 return false; 3978 } 3979 3980 static bool 3981 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) { 3982 if (CS->arg_size() != CallerFn->getArgumentList().size()) 3983 return false; 3984 3985 ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin(); 3986 ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end(); 3987 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); 3988 3989 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { 3990 const Value* CalleeArg = *CalleeArgIter; 3991 const Value* CallerArg = &(*CallerArgIter); 3992 if (CalleeArg == CallerArg) 3993 continue; 3994 3995 // e.g. @caller([4 x i64] %a, [4 x i64] %b) { 3996 // tail call @callee([4 x i64] undef, [4 x i64] %b) 3997 // } 3998 // 1st argument of callee is undef and has the same type as caller. 3999 if (CalleeArg->getType() == CallerArg->getType() && 4000 isa<UndefValue>(CalleeArg)) 4001 continue; 4002 4003 return false; 4004 } 4005 4006 return true; 4007 } 4008 4009 bool 4010 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( 4011 SDValue Callee, 4012 CallingConv::ID CalleeCC, 4013 ImmutableCallSite *CS, 4014 bool isVarArg, 4015 const SmallVectorImpl<ISD::OutputArg> &Outs, 4016 const SmallVectorImpl<ISD::InputArg> &Ins, 4017 SelectionDAG& DAG) const { 4018 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; 4019 4020 if (DisableSCO && !TailCallOpt) return false; 4021 4022 // Variadic argument functions are not supported. 4023 if (isVarArg) return false; 4024 4025 MachineFunction &MF = DAG.getMachineFunction(); 4026 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 4027 4028 // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has 4029 // the same calling convention 4030 if (CallerCC != CalleeCC) return false; 4031 4032 // SCO support C calling convention 4033 if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C) 4034 return false; 4035 4036 // Functions containing by val parameters are not supported. 4037 if (std::any_of(Ins.begin(), Ins.end(), 4038 [](const ISD::InputArg& IA) { return IA.Flags.isByVal(); })) 4039 return false; 4040 4041 // No TCO/SCO on indirect call because Caller have to restore its TOC 4042 if (!isFunctionGlobalAddress(Callee) && 4043 !isa<ExternalSymbolSDNode>(Callee)) 4044 return false; 4045 4046 // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI 4047 // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another 4048 // module. 4049 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 4050 if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel())) 4051 return false; 4052 4053 // TCO allows altering callee ABI, so we don't have to check further. 4054 if (CalleeCC == CallingConv::Fast && TailCallOpt) 4055 return true; 4056 4057 if (DisableSCO) return false; 4058 4059 // If callee use the same argument list that caller is using, then we can 4060 // apply SCO on this case. If it is not, then we need to check if callee needs 4061 // stack for passing arguments. 4062 if (!hasSameArgumentList(MF.getFunction(), CS) && 4063 needStackSlotPassParameters(Subtarget, Outs)) { 4064 return false; 4065 } 4066 4067 return true; 4068 } 4069 4070 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 4071 /// for tail call optimization. Targets which want to do tail call 4072 /// optimization should implement this function. 4073 bool 4074 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 4075 CallingConv::ID CalleeCC, 4076 bool isVarArg, 4077 const SmallVectorImpl<ISD::InputArg> &Ins, 4078 SelectionDAG& DAG) const { 4079 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4080 return false; 4081 4082 // Variable argument functions are not supported. 4083 if (isVarArg) 4084 return false; 4085 4086 MachineFunction &MF = DAG.getMachineFunction(); 4087 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 4088 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 4089 // Functions containing by val parameters are not supported. 4090 for (unsigned i = 0; i != Ins.size(); i++) { 4091 ISD::ArgFlagsTy Flags = Ins[i].Flags; 4092 if (Flags.isByVal()) return false; 4093 } 4094 4095 // Non-PIC/GOT tail calls are supported. 4096 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 4097 return true; 4098 4099 // At the moment we can only do local tail calls (in same module, hidden 4100 // or protected) if we are generating PIC. 4101 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4102 return G->getGlobal()->hasHiddenVisibility() 4103 || G->getGlobal()->hasProtectedVisibility(); 4104 } 4105 4106 return false; 4107 } 4108 4109 /// isCallCompatibleAddress - Return the immediate to use if the specified 4110 /// 32-bit value is representable in the immediate field of a BxA instruction. 4111 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 4112 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 4113 if (!C) return nullptr; 4114 4115 int Addr = C->getZExtValue(); 4116 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 4117 SignExtend32<26>(Addr) != Addr) 4118 return nullptr; // Top 6 bits have to be sext of immediate. 4119 4120 return DAG 4121 .getConstant( 4122 (int)C->getZExtValue() >> 2, SDLoc(Op), 4123 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) 4124 .getNode(); 4125 } 4126 4127 namespace { 4128 4129 struct TailCallArgumentInfo { 4130 SDValue Arg; 4131 SDValue FrameIdxOp; 4132 int FrameIdx; 4133 4134 TailCallArgumentInfo() : FrameIdx(0) {} 4135 }; 4136 } 4137 4138 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 4139 static void StoreTailCallArgumentsToStackSlot( 4140 SelectionDAG &DAG, SDValue Chain, 4141 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 4142 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) { 4143 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 4144 SDValue Arg = TailCallArgs[i].Arg; 4145 SDValue FIN = TailCallArgs[i].FrameIdxOp; 4146 int FI = TailCallArgs[i].FrameIdx; 4147 // Store relative to framepointer. 4148 MemOpChains.push_back(DAG.getStore( 4149 Chain, dl, Arg, FIN, 4150 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); 4151 } 4152 } 4153 4154 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 4155 /// the appropriate stack slot for the tail call optimized function call. 4156 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, 4157 SDValue OldRetAddr, SDValue OldFP, 4158 int SPDiff, const SDLoc &dl) { 4159 if (SPDiff) { 4160 // Calculate the new stack slot for the return address. 4161 MachineFunction &MF = DAG.getMachineFunction(); 4162 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); 4163 const PPCFrameLowering *FL = Subtarget.getFrameLowering(); 4164 bool isPPC64 = Subtarget.isPPC64(); 4165 int SlotSize = isPPC64 ? 8 : 4; 4166 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); 4167 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 4168 NewRetAddrLoc, true); 4169 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4170 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 4171 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 4172 MachinePointerInfo::getFixedStack(MF, NewRetAddr)); 4173 4174 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 4175 // slot as the FP is never overwritten. 4176 if (Subtarget.isDarwinABI()) { 4177 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); 4178 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 4179 true); 4180 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 4181 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 4182 MachinePointerInfo::getFixedStack( 4183 DAG.getMachineFunction(), NewFPIdx)); 4184 } 4185 } 4186 return Chain; 4187 } 4188 4189 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 4190 /// the position of the argument. 4191 static void 4192 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 4193 SDValue Arg, int SPDiff, unsigned ArgOffset, 4194 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 4195 int Offset = ArgOffset + SPDiff; 4196 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 4197 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 4198 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4199 SDValue FIN = DAG.getFrameIndex(FI, VT); 4200 TailCallArgumentInfo Info; 4201 Info.Arg = Arg; 4202 Info.FrameIdxOp = FIN; 4203 Info.FrameIdx = FI; 4204 TailCallArguments.push_back(Info); 4205 } 4206 4207 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 4208 /// stack slot. Returns the chain as result and the loaded frame pointers in 4209 /// LROpOut/FPOpout. Used when tail calling. 4210 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( 4211 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, 4212 SDValue &FPOpOut, const SDLoc &dl) const { 4213 if (SPDiff) { 4214 // Load the LR and FP stack slot for later adjusting. 4215 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 4216 LROpOut = getReturnAddrFrameIndex(DAG); 4217 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); 4218 Chain = SDValue(LROpOut.getNode(), 1); 4219 4220 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 4221 // slot as the FP is never overwritten. 4222 if (Subtarget.isDarwinABI()) { 4223 FPOpOut = getFramePointerFrameIndex(DAG); 4224 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo()); 4225 Chain = SDValue(FPOpOut.getNode(), 1); 4226 } 4227 } 4228 return Chain; 4229 } 4230 4231 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 4232 /// by "Src" to address "Dst" of size "Size". Alignment information is 4233 /// specified by the specific parameter attribute. The copy will be passed as 4234 /// a byval function parameter. 4235 /// Sometimes what we are copying is the end of a larger object, the part that 4236 /// does not fit in registers. 4237 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, 4238 SDValue Chain, ISD::ArgFlagsTy Flags, 4239 SelectionDAG &DAG, const SDLoc &dl) { 4240 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); 4241 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 4242 false, false, false, MachinePointerInfo(), 4243 MachinePointerInfo()); 4244 } 4245 4246 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 4247 /// tail calls. 4248 static void LowerMemOpCallTo( 4249 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, 4250 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, 4251 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 4252 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) { 4253 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4254 if (!isTailCall) { 4255 if (isVector) { 4256 SDValue StackPtr; 4257 if (isPPC64) 4258 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4259 else 4260 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4261 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4262 DAG.getConstant(ArgOffset, dl, PtrVT)); 4263 } 4264 MemOpChains.push_back( 4265 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 4266 // Calculate and remember argument location. 4267 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 4268 TailCallArguments); 4269 } 4270 4271 static void 4272 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 4273 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, 4274 SDValue FPOp, 4275 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 4276 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 4277 // might overwrite each other in case of tail call optimization. 4278 SmallVector<SDValue, 8> MemOpChains2; 4279 // Do not flag preceding copytoreg stuff together with the following stuff. 4280 InFlag = SDValue(); 4281 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 4282 MemOpChains2, dl); 4283 if (!MemOpChains2.empty()) 4284 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 4285 4286 // Store the return address to the appropriate stack slot. 4287 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); 4288 4289 // Emit callseq_end just before tailcall node. 4290 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4291 DAG.getIntPtrConstant(0, dl, true), InFlag, dl); 4292 InFlag = Chain.getValue(1); 4293 } 4294 4295 // Is this global address that of a function that can be called by name? (as 4296 // opposed to something that must hold a descriptor for an indirect call). 4297 static bool isFunctionGlobalAddress(SDValue Callee) { 4298 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4299 if (Callee.getOpcode() == ISD::GlobalTLSAddress || 4300 Callee.getOpcode() == ISD::TargetGlobalTLSAddress) 4301 return false; 4302 4303 return G->getGlobal()->getValueType()->isFunctionTy(); 4304 } 4305 4306 return false; 4307 } 4308 4309 static unsigned 4310 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, 4311 SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, 4312 bool isPatchPoint, bool hasNest, 4313 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, 4314 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 4315 ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { 4316 4317 bool isPPC64 = Subtarget.isPPC64(); 4318 bool isSVR4ABI = Subtarget.isSVR4ABI(); 4319 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4320 4321 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4322 NodeTys.push_back(MVT::Other); // Returns a chain 4323 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 4324 4325 unsigned CallOpc = PPCISD::CALL; 4326 4327 bool needIndirectCall = true; 4328 if (!isSVR4ABI || !isPPC64) 4329 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 4330 // If this is an absolute destination address, use the munged value. 4331 Callee = SDValue(Dest, 0); 4332 needIndirectCall = false; 4333 } 4334 4335 // PC-relative references to external symbols should go through $stub, unless 4336 // we're building with the leopard linker or later, which automatically 4337 // synthesizes these stubs. 4338 const TargetMachine &TM = DAG.getTarget(); 4339 const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); 4340 const GlobalValue *GV = nullptr; 4341 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4342 GV = G->getGlobal(); 4343 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); 4344 bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; 4345 4346 if (isFunctionGlobalAddress(Callee)) { 4347 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); 4348 // A call to a TLS address is actually an indirect call to a 4349 // thread-specific pointer. 4350 unsigned OpFlags = 0; 4351 if (UsePlt) 4352 OpFlags = PPCII::MO_PLT; 4353 4354 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 4355 // every direct call is) turn it into a TargetGlobalAddress / 4356 // TargetExternalSymbol node so that legalize doesn't hack it. 4357 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 4358 Callee.getValueType(), 0, OpFlags); 4359 needIndirectCall = false; 4360 } 4361 4362 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4363 unsigned char OpFlags = 0; 4364 4365 if (UsePlt) 4366 OpFlags = PPCII::MO_PLT; 4367 4368 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 4369 OpFlags); 4370 needIndirectCall = false; 4371 } 4372 4373 if (isPatchPoint) { 4374 // We'll form an invalid direct call when lowering a patchpoint; the full 4375 // sequence for an indirect call is complicated, and many of the 4376 // instructions introduced might have side effects (and, thus, can't be 4377 // removed later). The call itself will be removed as soon as the 4378 // argument/return lowering is complete, so the fact that it has the wrong 4379 // kind of operands should not really matter. 4380 needIndirectCall = false; 4381 } 4382 4383 if (needIndirectCall) { 4384 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 4385 // to do the call, we can't use PPCISD::CALL. 4386 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 4387 4388 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 4389 // Function pointers in the 64-bit SVR4 ABI do not point to the function 4390 // entry point, but to the function descriptor (the function entry point 4391 // address is part of the function descriptor though). 4392 // The function descriptor is a three doubleword structure with the 4393 // following fields: function entry point, TOC base address and 4394 // environment pointer. 4395 // Thus for a call through a function pointer, the following actions need 4396 // to be performed: 4397 // 1. Save the TOC of the caller in the TOC save area of its stack 4398 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 4399 // 2. Load the address of the function entry point from the function 4400 // descriptor. 4401 // 3. Load the TOC of the callee from the function descriptor into r2. 4402 // 4. Load the environment pointer from the function descriptor into 4403 // r11. 4404 // 5. Branch to the function entry point address. 4405 // 6. On return of the callee, the TOC of the caller needs to be 4406 // restored (this is done in FinishCall()). 4407 // 4408 // The loads are scheduled at the beginning of the call sequence, and the 4409 // register copies are flagged together to ensure that no other 4410 // operations can be scheduled in between. E.g. without flagging the 4411 // copies together, a TOC access in the caller could be scheduled between 4412 // the assignment of the callee TOC and the branch to the callee, which 4413 // results in the TOC access going through the TOC of the callee instead 4414 // of going through the TOC of the caller, which leads to incorrect code. 4415 4416 // Load the address of the function entry point from the function 4417 // descriptor. 4418 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); 4419 if (LDChain.getValueType() == MVT::Glue) 4420 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); 4421 4422 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() 4423 ? MachineMemOperand::MOInvariant 4424 : MachineMemOperand::MONone; 4425 4426 MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr); 4427 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, 4428 /* Alignment = */ 8, MMOFlags); 4429 4430 // Load environment pointer into r11. 4431 SDValue PtrOff = DAG.getIntPtrConstant(16, dl); 4432 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 4433 SDValue LoadEnvPtr = 4434 DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), 4435 /* Alignment = */ 8, MMOFlags); 4436 4437 SDValue TOCOff = DAG.getIntPtrConstant(8, dl); 4438 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 4439 SDValue TOCPtr = 4440 DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), 4441 /* Alignment = */ 8, MMOFlags); 4442 4443 setUsesTOCBasePtr(DAG); 4444 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, 4445 InFlag); 4446 Chain = TOCVal.getValue(0); 4447 InFlag = TOCVal.getValue(1); 4448 4449 // If the function call has an explicit 'nest' parameter, it takes the 4450 // place of the environment pointer. 4451 if (!hasNest) { 4452 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 4453 InFlag); 4454 4455 Chain = EnvVal.getValue(0); 4456 InFlag = EnvVal.getValue(1); 4457 } 4458 4459 MTCTROps[0] = Chain; 4460 MTCTROps[1] = LoadFuncPtr; 4461 MTCTROps[2] = InFlag; 4462 } 4463 4464 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 4465 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 4466 InFlag = Chain.getValue(1); 4467 4468 NodeTys.clear(); 4469 NodeTys.push_back(MVT::Other); 4470 NodeTys.push_back(MVT::Glue); 4471 Ops.push_back(Chain); 4472 CallOpc = PPCISD::BCTRL; 4473 Callee.setNode(nullptr); 4474 // Add use of X11 (holding environment pointer) 4475 if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) 4476 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 4477 // Add CTR register as callee so a bctr can be emitted later. 4478 if (isTailCall) 4479 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 4480 } 4481 4482 // If this is a direct call, pass the chain and the callee. 4483 if (Callee.getNode()) { 4484 Ops.push_back(Chain); 4485 Ops.push_back(Callee); 4486 } 4487 // If this is a tail call add stack pointer delta. 4488 if (isTailCall) 4489 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); 4490 4491 // Add argument registers to the end of the list so that they are known live 4492 // into the call. 4493 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 4494 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 4495 RegsToPass[i].second.getValueType())); 4496 4497 // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live 4498 // into the call. 4499 if (isSVR4ABI && isPPC64 && !isPatchPoint) { 4500 setUsesTOCBasePtr(DAG); 4501 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); 4502 } 4503 4504 return CallOpc; 4505 } 4506 4507 static 4508 bool isLocalCall(const SDValue &Callee) 4509 { 4510 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4511 return G->getGlobal()->isStrongDefinitionForLinker(); 4512 return false; 4513 } 4514 4515 SDValue PPCTargetLowering::LowerCallResult( 4516 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, 4517 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 4518 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 4519 4520 SmallVector<CCValAssign, 16> RVLocs; 4521 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 4522 *DAG.getContext()); 4523 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 4524 4525 // Copy all of the result registers out of their specified physreg. 4526 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 4527 CCValAssign &VA = RVLocs[i]; 4528 assert(VA.isRegLoc() && "Can only return in registers!"); 4529 4530 SDValue Val = DAG.getCopyFromReg(Chain, dl, 4531 VA.getLocReg(), VA.getLocVT(), InFlag); 4532 Chain = Val.getValue(1); 4533 InFlag = Val.getValue(2); 4534 4535 switch (VA.getLocInfo()) { 4536 default: llvm_unreachable("Unknown loc info!"); 4537 case CCValAssign::Full: break; 4538 case CCValAssign::AExt: 4539 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4540 break; 4541 case CCValAssign::ZExt: 4542 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 4543 DAG.getValueType(VA.getValVT())); 4544 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4545 break; 4546 case CCValAssign::SExt: 4547 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 4548 DAG.getValueType(VA.getValVT())); 4549 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4550 break; 4551 } 4552 4553 InVals.push_back(Val); 4554 } 4555 4556 return Chain; 4557 } 4558 4559 SDValue PPCTargetLowering::FinishCall( 4560 CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, 4561 bool isPatchPoint, bool hasNest, SelectionDAG &DAG, 4562 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, 4563 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, 4564 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, 4565 SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const { 4566 4567 std::vector<EVT> NodeTys; 4568 SmallVector<SDValue, 8> Ops; 4569 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, 4570 SPDiff, isTailCall, isPatchPoint, hasNest, 4571 RegsToPass, Ops, NodeTys, CS, Subtarget); 4572 4573 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 4574 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 4575 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 4576 4577 // When performing tail call optimization the callee pops its arguments off 4578 // the stack. Account for this here so these bytes can be pushed back on in 4579 // PPCFrameLowering::eliminateCallFramePseudoInstr. 4580 int BytesCalleePops = 4581 (CallConv == CallingConv::Fast && 4582 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 4583 4584 // Add a register mask operand representing the call-preserved registers. 4585 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4586 const uint32_t *Mask = 4587 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); 4588 assert(Mask && "Missing call preserved mask for calling convention"); 4589 Ops.push_back(DAG.getRegisterMask(Mask)); 4590 4591 if (InFlag.getNode()) 4592 Ops.push_back(InFlag); 4593 4594 // Emit tail call. 4595 if (isTailCall) { 4596 assert(((Callee.getOpcode() == ISD::Register && 4597 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 4598 Callee.getOpcode() == ISD::TargetExternalSymbol || 4599 Callee.getOpcode() == ISD::TargetGlobalAddress || 4600 isa<ConstantSDNode>(Callee)) && 4601 "Expecting an global address, external symbol, absolute value or register"); 4602 4603 DAG.getMachineFunction().getFrameInfo()->setHasTailCall(); 4604 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 4605 } 4606 4607 // Add a NOP immediately after the branch instruction when using the 64-bit 4608 // SVR4 ABI. At link time, if caller and callee are in a different module and 4609 // thus have a different TOC, the call will be replaced with a call to a stub 4610 // function which saves the current TOC, loads the TOC of the callee and 4611 // branches to the callee. The NOP will be replaced with a load instruction 4612 // which restores the TOC of the caller from the TOC save slot of the current 4613 // stack frame. If caller and callee belong to the same module (and have the 4614 // same TOC), the NOP will remain unchanged. 4615 4616 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && 4617 !isPatchPoint) { 4618 if (CallOpc == PPCISD::BCTRL) { 4619 // This is a call through a function pointer. 4620 // Restore the caller TOC from the save area into R2. 4621 // See PrepareCall() for more information about calls through function 4622 // pointers in the 64-bit SVR4 ABI. 4623 // We are using a target-specific load with r2 hard coded, because the 4624 // result of a target-independent load would never go directly into r2, 4625 // since r2 is a reserved register (which prevents the register allocator 4626 // from allocating it), resulting in an additional register being 4627 // allocated and an unnecessary move instruction being generated. 4628 CallOpc = PPCISD::BCTRL_LOAD_TOC; 4629 4630 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4631 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 4632 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 4633 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 4634 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 4635 4636 // The address needs to go after the chain input but before the flag (or 4637 // any other variadic arguments). 4638 Ops.insert(std::next(Ops.begin()), AddTOC); 4639 } else if ((CallOpc == PPCISD::CALL) && 4640 (!isLocalCall(Callee) || 4641 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) 4642 // Otherwise insert NOP for non-local calls. 4643 CallOpc = PPCISD::CALL_NOP; 4644 } 4645 4646 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 4647 InFlag = Chain.getValue(1); 4648 4649 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4650 DAG.getIntPtrConstant(BytesCalleePops, dl, true), 4651 InFlag, dl); 4652 if (!Ins.empty()) 4653 InFlag = Chain.getValue(1); 4654 4655 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 4656 Ins, dl, DAG, InVals); 4657 } 4658 4659 SDValue 4660 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 4661 SmallVectorImpl<SDValue> &InVals) const { 4662 SelectionDAG &DAG = CLI.DAG; 4663 SDLoc &dl = CLI.DL; 4664 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4665 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4666 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4667 SDValue Chain = CLI.Chain; 4668 SDValue Callee = CLI.Callee; 4669 bool &isTailCall = CLI.IsTailCall; 4670 CallingConv::ID CallConv = CLI.CallConv; 4671 bool isVarArg = CLI.IsVarArg; 4672 bool isPatchPoint = CLI.IsPatchPoint; 4673 ImmutableCallSite *CS = CLI.CS; 4674 4675 if (isTailCall) { 4676 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) 4677 isTailCall = 4678 IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, 4679 isVarArg, Outs, Ins, DAG); 4680 else 4681 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 4682 Ins, DAG); 4683 if (isTailCall) { 4684 ++NumTailCalls; 4685 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4686 ++NumSiblingCalls; 4687 4688 assert(isa<GlobalAddressSDNode>(Callee) && 4689 "Callee should be an llvm::Function object."); 4690 DEBUG( 4691 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); 4692 const unsigned Width = 80 - strlen("TCO caller: ") 4693 - strlen(", callee linkage: 0, 0"); 4694 dbgs() << "TCO caller: " 4695 << left_justify(DAG.getMachineFunction().getName(), Width) 4696 << ", callee linkage: " 4697 << GV->getVisibility() << ", " << GV->getLinkage() << "\n" 4698 ); 4699 } 4700 } 4701 4702 if (!isTailCall && CS && CS->isMustTailCall()) 4703 report_fatal_error("failed to perform tail call elimination on a call " 4704 "site marked musttail"); 4705 4706 if (Subtarget.isSVR4ABI()) { 4707 if (Subtarget.isPPC64()) 4708 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 4709 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4710 dl, DAG, InVals, CS); 4711 else 4712 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 4713 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4714 dl, DAG, InVals, CS); 4715 } 4716 4717 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 4718 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4719 dl, DAG, InVals, CS); 4720 } 4721 4722 SDValue PPCTargetLowering::LowerCall_32SVR4( 4723 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 4724 bool isTailCall, bool isPatchPoint, 4725 const SmallVectorImpl<ISD::OutputArg> &Outs, 4726 const SmallVectorImpl<SDValue> &OutVals, 4727 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 4728 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 4729 ImmutableCallSite *CS) const { 4730 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 4731 // of the 32-bit SVR4 ABI stack frame layout. 4732 4733 assert((CallConv == CallingConv::C || 4734 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 4735 4736 unsigned PtrByteSize = 4; 4737 4738 MachineFunction &MF = DAG.getMachineFunction(); 4739 4740 // Mark this function as potentially containing a function that contains a 4741 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4742 // and restoring the callers stack pointer in this functions epilog. This is 4743 // done because by tail calling the called function might overwrite the value 4744 // in this function's (MF) stack pointer stack slot 0(SP). 4745 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4746 CallConv == CallingConv::Fast) 4747 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4748 4749 // Count how many bytes are to be pushed on the stack, including the linkage 4750 // area, parameter list area and the part of the local variable space which 4751 // contains copies of aggregates which are passed by value. 4752 4753 // Assign locations to all of the outgoing arguments. 4754 SmallVector<CCValAssign, 16> ArgLocs; 4755 PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); 4756 4757 // Reserve space for the linkage area on the stack. 4758 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), 4759 PtrByteSize); 4760 if (useSoftFloat()) 4761 CCInfo.PreAnalyzeCallOperands(Outs); 4762 4763 if (isVarArg) { 4764 // Handle fixed and variable vector arguments differently. 4765 // Fixed vector arguments go into registers as long as registers are 4766 // available. Variable vector arguments always go into memory. 4767 unsigned NumArgs = Outs.size(); 4768 4769 for (unsigned i = 0; i != NumArgs; ++i) { 4770 MVT ArgVT = Outs[i].VT; 4771 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4772 bool Result; 4773 4774 if (Outs[i].IsFixed) { 4775 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 4776 CCInfo); 4777 } else { 4778 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 4779 ArgFlags, CCInfo); 4780 } 4781 4782 if (Result) { 4783 #ifndef NDEBUG 4784 errs() << "Call operand #" << i << " has unhandled type " 4785 << EVT(ArgVT).getEVTString() << "\n"; 4786 #endif 4787 llvm_unreachable(nullptr); 4788 } 4789 } 4790 } else { 4791 // All arguments are treated the same. 4792 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 4793 } 4794 CCInfo.clearWasPPCF128(); 4795 4796 // Assign locations to all of the outgoing aggregate by value arguments. 4797 SmallVector<CCValAssign, 16> ByValArgLocs; 4798 CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext()); 4799 4800 // Reserve stack space for the allocations in CCInfo. 4801 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 4802 4803 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 4804 4805 // Size of the linkage area, parameter list area and the part of the local 4806 // space variable where copies of aggregates which are passed by value are 4807 // stored. 4808 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 4809 4810 // Calculate by how many bytes the stack has to be adjusted in case of tail 4811 // call optimization. 4812 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4813 4814 // Adjust the stack pointer for the new arguments... 4815 // These operations are automatically eliminated by the prolog/epilog pass 4816 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4817 dl); 4818 SDValue CallSeqStart = Chain; 4819 4820 // Load the return address and frame pointer so it can be moved somewhere else 4821 // later. 4822 SDValue LROp, FPOp; 4823 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 4824 4825 // Set up a copy of the stack pointer for use loading and storing any 4826 // arguments that may not fit in the registers available for argument 4827 // passing. 4828 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4829 4830 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4831 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4832 SmallVector<SDValue, 8> MemOpChains; 4833 4834 bool seenFloatArg = false; 4835 // Walk the register/memloc assignments, inserting copies/loads. 4836 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 4837 i != e; 4838 ++i) { 4839 CCValAssign &VA = ArgLocs[i]; 4840 SDValue Arg = OutVals[i]; 4841 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4842 4843 if (Flags.isByVal()) { 4844 // Argument is an aggregate which is passed by value, thus we need to 4845 // create a copy of it in the local variable space of the current stack 4846 // frame (which is the stack frame of the caller) and pass the address of 4847 // this copy to the callee. 4848 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 4849 CCValAssign &ByValVA = ByValArgLocs[j++]; 4850 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 4851 4852 // Memory reserved in the local variable space of the callers stack frame. 4853 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 4854 4855 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 4856 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 4857 StackPtr, PtrOff); 4858 4859 // Create a copy of the argument in the local area of the current 4860 // stack frame. 4861 SDValue MemcpyCall = 4862 CreateCopyOfByValArgument(Arg, PtrOff, 4863 CallSeqStart.getNode()->getOperand(0), 4864 Flags, DAG, dl); 4865 4866 // This must go outside the CALLSEQ_START..END. 4867 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4868 CallSeqStart.getNode()->getOperand(1), 4869 SDLoc(MemcpyCall)); 4870 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4871 NewCallSeqStart.getNode()); 4872 Chain = CallSeqStart = NewCallSeqStart; 4873 4874 // Pass the address of the aggregate copy on the stack either in a 4875 // physical register or in the parameter list area of the current stack 4876 // frame to the callee. 4877 Arg = PtrOff; 4878 } 4879 4880 if (VA.isRegLoc()) { 4881 if (Arg.getValueType() == MVT::i1) 4882 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); 4883 4884 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 4885 // Put argument in a physical register. 4886 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 4887 } else { 4888 // Put argument in the parameter list area of the current stack frame. 4889 assert(VA.isMemLoc()); 4890 unsigned LocMemOffset = VA.getLocMemOffset(); 4891 4892 if (!isTailCall) { 4893 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 4894 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 4895 StackPtr, PtrOff); 4896 4897 MemOpChains.push_back( 4898 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 4899 } else { 4900 // Calculate and remember argument location. 4901 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 4902 TailCallArguments); 4903 } 4904 } 4905 } 4906 4907 if (!MemOpChains.empty()) 4908 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4909 4910 // Build a sequence of copy-to-reg nodes chained together with token chain 4911 // and flag operands which copy the outgoing args into the appropriate regs. 4912 SDValue InFlag; 4913 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4914 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4915 RegsToPass[i].second, InFlag); 4916 InFlag = Chain.getValue(1); 4917 } 4918 4919 // Set CR bit 6 to true if this is a vararg call with floating args passed in 4920 // registers. 4921 if (isVarArg) { 4922 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 4923 SDValue Ops[] = { Chain, InFlag }; 4924 4925 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 4926 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 4927 4928 InFlag = Chain.getValue(1); 4929 } 4930 4931 if (isTailCall) 4932 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 4933 TailCallArguments); 4934 4935 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 4936 /* unused except on PPC64 ELFv1 */ false, DAG, 4937 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 4938 NumBytes, Ins, InVals, CS); 4939 } 4940 4941 // Copy an argument into memory, being careful to do this outside the 4942 // call sequence for the call to which the argument belongs. 4943 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( 4944 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, 4945 SelectionDAG &DAG, const SDLoc &dl) const { 4946 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 4947 CallSeqStart.getNode()->getOperand(0), 4948 Flags, DAG, dl); 4949 // The MEMCPY must go outside the CALLSEQ_START..END. 4950 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4951 CallSeqStart.getNode()->getOperand(1), 4952 SDLoc(MemcpyCall)); 4953 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4954 NewCallSeqStart.getNode()); 4955 return NewCallSeqStart; 4956 } 4957 4958 SDValue PPCTargetLowering::LowerCall_64SVR4( 4959 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 4960 bool isTailCall, bool isPatchPoint, 4961 const SmallVectorImpl<ISD::OutputArg> &Outs, 4962 const SmallVectorImpl<SDValue> &OutVals, 4963 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 4964 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 4965 ImmutableCallSite *CS) const { 4966 4967 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4968 bool isLittleEndian = Subtarget.isLittleEndian(); 4969 unsigned NumOps = Outs.size(); 4970 bool hasNest = false; 4971 bool IsSibCall = false; 4972 4973 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4974 unsigned PtrByteSize = 8; 4975 4976 MachineFunction &MF = DAG.getMachineFunction(); 4977 4978 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) 4979 IsSibCall = true; 4980 4981 // Mark this function as potentially containing a function that contains a 4982 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4983 // and restoring the callers stack pointer in this functions epilog. This is 4984 // done because by tail calling the called function might overwrite the value 4985 // in this function's (MF) stack pointer stack slot 0(SP). 4986 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4987 CallConv == CallingConv::Fast) 4988 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4989 4990 assert(!(CallConv == CallingConv::Fast && isVarArg) && 4991 "fastcc not supported on varargs functions"); 4992 4993 // Count how many bytes are to be pushed on the stack, including the linkage 4994 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 4995 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 4996 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 4997 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 4998 unsigned NumBytes = LinkageSize; 4999 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5000 unsigned &QFPR_idx = FPR_idx; 5001 5002 static const MCPhysReg GPR[] = { 5003 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5004 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5005 }; 5006 static const MCPhysReg VR[] = { 5007 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5008 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5009 }; 5010 static const MCPhysReg VSRH[] = { 5011 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 5012 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 5013 }; 5014 5015 const unsigned NumGPRs = array_lengthof(GPR); 5016 const unsigned NumFPRs = 13; 5017 const unsigned NumVRs = array_lengthof(VR); 5018 const unsigned NumQFPRs = NumFPRs; 5019 5020 // When using the fast calling convention, we don't provide backing for 5021 // arguments that will be in registers. 5022 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; 5023 5024 // Add up all the space actually used. 5025 for (unsigned i = 0; i != NumOps; ++i) { 5026 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5027 EVT ArgVT = Outs[i].VT; 5028 EVT OrigVT = Outs[i].ArgVT; 5029 5030 if (Flags.isNest()) 5031 continue; 5032 5033 if (CallConv == CallingConv::Fast) { 5034 if (Flags.isByVal()) 5035 NumGPRsUsed += (Flags.getByValSize()+7)/8; 5036 else 5037 switch (ArgVT.getSimpleVT().SimpleTy) { 5038 default: llvm_unreachable("Unexpected ValueType for argument!"); 5039 case MVT::i1: 5040 case MVT::i32: 5041 case MVT::i64: 5042 if (++NumGPRsUsed <= NumGPRs) 5043 continue; 5044 break; 5045 case MVT::v4i32: 5046 case MVT::v8i16: 5047 case MVT::v16i8: 5048 case MVT::v2f64: 5049 case MVT::v2i64: 5050 case MVT::v1i128: 5051 if (++NumVRsUsed <= NumVRs) 5052 continue; 5053 break; 5054 case MVT::v4f32: 5055 // When using QPX, this is handled like a FP register, otherwise, it 5056 // is an Altivec register. 5057 if (Subtarget.hasQPX()) { 5058 if (++NumFPRsUsed <= NumFPRs) 5059 continue; 5060 } else { 5061 if (++NumVRsUsed <= NumVRs) 5062 continue; 5063 } 5064 break; 5065 case MVT::f32: 5066 case MVT::f64: 5067 case MVT::v4f64: // QPX 5068 case MVT::v4i1: // QPX 5069 if (++NumFPRsUsed <= NumFPRs) 5070 continue; 5071 break; 5072 } 5073 } 5074 5075 /* Respect alignment of argument on the stack. */ 5076 unsigned Align = 5077 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5078 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 5079 5080 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5081 if (Flags.isInConsecutiveRegsLast()) 5082 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5083 } 5084 5085 unsigned NumBytesActuallyUsed = NumBytes; 5086 5087 // The prolog code of the callee may store up to 8 GPR argument registers to 5088 // the stack, allowing va_start to index over them in memory if its varargs. 5089 // Because we cannot tell if this is needed on the caller side, we have to 5090 // conservatively assume that it is needed. As such, make sure we have at 5091 // least enough stack space for the caller to store the 8 GPRs. 5092 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. 5093 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5094 5095 // Tail call needs the stack to be aligned. 5096 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5097 CallConv == CallingConv::Fast) 5098 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5099 5100 int SPDiff = 0; 5101 5102 // Calculate by how many bytes the stack has to be adjusted in case of tail 5103 // call optimization. 5104 if (!IsSibCall) 5105 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5106 5107 // To protect arguments on the stack from being clobbered in a tail call, 5108 // force all the loads to happen before doing any other lowering. 5109 if (isTailCall) 5110 Chain = DAG.getStackArgumentTokenFactor(Chain); 5111 5112 // Adjust the stack pointer for the new arguments... 5113 // These operations are automatically eliminated by the prolog/epilog pass 5114 if (!IsSibCall) 5115 Chain = DAG.getCALLSEQ_START(Chain, 5116 DAG.getIntPtrConstant(NumBytes, dl, true), dl); 5117 SDValue CallSeqStart = Chain; 5118 5119 // Load the return address and frame pointer so it can be move somewhere else 5120 // later. 5121 SDValue LROp, FPOp; 5122 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5123 5124 // Set up a copy of the stack pointer for use loading and storing any 5125 // arguments that may not fit in the registers available for argument 5126 // passing. 5127 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5128 5129 // Figure out which arguments are going to go in registers, and which in 5130 // memory. Also, if this is a vararg function, floating point operations 5131 // must be stored to our stack, and loaded into integer regs as well, if 5132 // any integer regs are available for argument passing. 5133 unsigned ArgOffset = LinkageSize; 5134 5135 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5136 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5137 5138 SmallVector<SDValue, 8> MemOpChains; 5139 for (unsigned i = 0; i != NumOps; ++i) { 5140 SDValue Arg = OutVals[i]; 5141 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5142 EVT ArgVT = Outs[i].VT; 5143 EVT OrigVT = Outs[i].ArgVT; 5144 5145 // PtrOff will be used to store the current argument to the stack if a 5146 // register cannot be found for it. 5147 SDValue PtrOff; 5148 5149 // We re-align the argument offset for each argument, except when using the 5150 // fast calling convention, when we need to make sure we do that only when 5151 // we'll actually use a stack slot. 5152 auto ComputePtrOff = [&]() { 5153 /* Respect alignment of argument on the stack. */ 5154 unsigned Align = 5155 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5156 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 5157 5158 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5159 5160 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5161 }; 5162 5163 if (CallConv != CallingConv::Fast) { 5164 ComputePtrOff(); 5165 5166 /* Compute GPR index associated with argument offset. */ 5167 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 5168 GPR_idx = std::min(GPR_idx, NumGPRs); 5169 } 5170 5171 // Promote integers to 64-bit values. 5172 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 5173 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5174 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5175 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5176 } 5177 5178 // FIXME memcpy is used way more than necessary. Correctness first. 5179 // Note: "by value" is code for passing a structure by value, not 5180 // basic types. 5181 if (Flags.isByVal()) { 5182 // Note: Size includes alignment padding, so 5183 // struct x { short a; char b; } 5184 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 5185 // These are the proper values we need for right-justifying the 5186 // aggregate in a parameter register. 5187 unsigned Size = Flags.getByValSize(); 5188 5189 // An empty aggregate parameter takes up no storage and no 5190 // registers. 5191 if (Size == 0) 5192 continue; 5193 5194 if (CallConv == CallingConv::Fast) 5195 ComputePtrOff(); 5196 5197 // All aggregates smaller than 8 bytes must be passed right-justified. 5198 if (Size==1 || Size==2 || Size==4) { 5199 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 5200 if (GPR_idx != NumGPRs) { 5201 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5202 MachinePointerInfo(), VT); 5203 MemOpChains.push_back(Load.getValue(1)); 5204 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5205 5206 ArgOffset += PtrByteSize; 5207 continue; 5208 } 5209 } 5210 5211 if (GPR_idx == NumGPRs && Size < 8) { 5212 SDValue AddPtr = PtrOff; 5213 if (!isLittleEndian) { 5214 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5215 PtrOff.getValueType()); 5216 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5217 } 5218 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5219 CallSeqStart, 5220 Flags, DAG, dl); 5221 ArgOffset += PtrByteSize; 5222 continue; 5223 } 5224 // Copy entire object into memory. There are cases where gcc-generated 5225 // code assumes it is there, even if it could be put entirely into 5226 // registers. (This is not what the doc says.) 5227 5228 // FIXME: The above statement is likely due to a misunderstanding of the 5229 // documents. All arguments must be copied into the parameter area BY 5230 // THE CALLEE in the event that the callee takes the address of any 5231 // formal argument. That has not yet been implemented. However, it is 5232 // reasonable to use the stack area as a staging area for the register 5233 // load. 5234 5235 // Skip this for small aggregates, as we will use the same slot for a 5236 // right-justified copy, below. 5237 if (Size >= 8) 5238 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5239 CallSeqStart, 5240 Flags, DAG, dl); 5241 5242 // When a register is available, pass a small aggregate right-justified. 5243 if (Size < 8 && GPR_idx != NumGPRs) { 5244 // The easiest way to get this right-justified in a register 5245 // is to copy the structure into the rightmost portion of a 5246 // local variable slot, then load the whole slot into the 5247 // register. 5248 // FIXME: The memcpy seems to produce pretty awful code for 5249 // small aggregates, particularly for packed ones. 5250 // FIXME: It would be preferable to use the slot in the 5251 // parameter save area instead of a new local variable. 5252 SDValue AddPtr = PtrOff; 5253 if (!isLittleEndian) { 5254 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); 5255 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5256 } 5257 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5258 CallSeqStart, 5259 Flags, DAG, dl); 5260 5261 // Load the slot into the register. 5262 SDValue Load = 5263 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); 5264 MemOpChains.push_back(Load.getValue(1)); 5265 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5266 5267 // Done with this argument. 5268 ArgOffset += PtrByteSize; 5269 continue; 5270 } 5271 5272 // For aggregates larger than PtrByteSize, copy the pieces of the 5273 // object that fit into registers from the parameter save area. 5274 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5275 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5276 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5277 if (GPR_idx != NumGPRs) { 5278 SDValue Load = 5279 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 5280 MemOpChains.push_back(Load.getValue(1)); 5281 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5282 ArgOffset += PtrByteSize; 5283 } else { 5284 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5285 break; 5286 } 5287 } 5288 continue; 5289 } 5290 5291 switch (Arg.getSimpleValueType().SimpleTy) { 5292 default: llvm_unreachable("Unexpected ValueType for argument!"); 5293 case MVT::i1: 5294 case MVT::i32: 5295 case MVT::i64: 5296 if (Flags.isNest()) { 5297 // The 'nest' parameter, if any, is passed in R11. 5298 RegsToPass.push_back(std::make_pair(PPC::X11, Arg)); 5299 hasNest = true; 5300 break; 5301 } 5302 5303 // These can be scalar arguments or elements of an integer array type 5304 // passed directly. Clang may use those instead of "byval" aggregate 5305 // types to avoid forcing arguments to memory unnecessarily. 5306 if (GPR_idx != NumGPRs) { 5307 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5308 } else { 5309 if (CallConv == CallingConv::Fast) 5310 ComputePtrOff(); 5311 5312 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5313 true, isTailCall, false, MemOpChains, 5314 TailCallArguments, dl); 5315 if (CallConv == CallingConv::Fast) 5316 ArgOffset += PtrByteSize; 5317 } 5318 if (CallConv != CallingConv::Fast) 5319 ArgOffset += PtrByteSize; 5320 break; 5321 case MVT::f32: 5322 case MVT::f64: { 5323 // These can be scalar arguments or elements of a float array type 5324 // passed directly. The latter are used to implement ELFv2 homogenous 5325 // float aggregates. 5326 5327 // Named arguments go into FPRs first, and once they overflow, the 5328 // remaining arguments go into GPRs and then the parameter save area. 5329 // Unnamed arguments for vararg functions always go to GPRs and 5330 // then the parameter save area. For now, put all arguments to vararg 5331 // routines always in both locations (FPR *and* GPR or stack slot). 5332 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 5333 bool NeededLoad = false; 5334 5335 // First load the argument into the next available FPR. 5336 if (FPR_idx != NumFPRs) 5337 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5338 5339 // Next, load the argument into GPR or stack slot if needed. 5340 if (!NeedGPROrStack) 5341 ; 5342 else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) { 5343 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 5344 // once we support fp <-> gpr moves. 5345 5346 // In the non-vararg case, this can only ever happen in the 5347 // presence of f32 array types, since otherwise we never run 5348 // out of FPRs before running out of GPRs. 5349 SDValue ArgVal; 5350 5351 // Double values are always passed in a single GPR. 5352 if (Arg.getValueType() != MVT::f32) { 5353 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 5354 5355 // Non-array float values are extended and passed in a GPR. 5356 } else if (!Flags.isInConsecutiveRegs()) { 5357 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5358 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5359 5360 // If we have an array of floats, we collect every odd element 5361 // together with its predecessor into one GPR. 5362 } else if (ArgOffset % PtrByteSize != 0) { 5363 SDValue Lo, Hi; 5364 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 5365 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5366 if (!isLittleEndian) 5367 std::swap(Lo, Hi); 5368 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 5369 5370 // The final element, if even, goes into the first half of a GPR. 5371 } else if (Flags.isInConsecutiveRegsLast()) { 5372 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5373 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5374 if (!isLittleEndian) 5375 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 5376 DAG.getConstant(32, dl, MVT::i32)); 5377 5378 // Non-final even elements are skipped; they will be handled 5379 // together the with subsequent argument on the next go-around. 5380 } else 5381 ArgVal = SDValue(); 5382 5383 if (ArgVal.getNode()) 5384 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); 5385 } else { 5386 if (CallConv == CallingConv::Fast) 5387 ComputePtrOff(); 5388 5389 // Single-precision floating-point values are mapped to the 5390 // second (rightmost) word of the stack doubleword. 5391 if (Arg.getValueType() == MVT::f32 && 5392 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 5393 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 5394 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5395 } 5396 5397 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5398 true, isTailCall, false, MemOpChains, 5399 TailCallArguments, dl); 5400 5401 NeededLoad = true; 5402 } 5403 // When passing an array of floats, the array occupies consecutive 5404 // space in the argument area; only round up to the next doubleword 5405 // at the end of the array. Otherwise, each float takes 8 bytes. 5406 if (CallConv != CallingConv::Fast || NeededLoad) { 5407 ArgOffset += (Arg.getValueType() == MVT::f32 && 5408 Flags.isInConsecutiveRegs()) ? 4 : 8; 5409 if (Flags.isInConsecutiveRegsLast()) 5410 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5411 } 5412 break; 5413 } 5414 case MVT::v4f32: 5415 case MVT::v4i32: 5416 case MVT::v8i16: 5417 case MVT::v16i8: 5418 case MVT::v2f64: 5419 case MVT::v2i64: 5420 case MVT::v1i128: 5421 if (!Subtarget.hasQPX()) { 5422 // These can be scalar arguments or elements of a vector array type 5423 // passed directly. The latter are used to implement ELFv2 homogenous 5424 // vector aggregates. 5425 5426 // For a varargs call, named arguments go into VRs or on the stack as 5427 // usual; unnamed arguments always go to the stack or the corresponding 5428 // GPRs when within range. For now, we always put the value in both 5429 // locations (or even all three). 5430 if (isVarArg) { 5431 // We could elide this store in the case where the object fits 5432 // entirely in R registers. Maybe later. 5433 SDValue Store = 5434 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5435 MemOpChains.push_back(Store); 5436 if (VR_idx != NumVRs) { 5437 SDValue Load = 5438 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 5439 MemOpChains.push_back(Load.getValue(1)); 5440 5441 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 5442 Arg.getSimpleValueType() == MVT::v2i64) ? 5443 VSRH[VR_idx] : VR[VR_idx]; 5444 ++VR_idx; 5445 5446 RegsToPass.push_back(std::make_pair(VReg, Load)); 5447 } 5448 ArgOffset += 16; 5449 for (unsigned i=0; i<16; i+=PtrByteSize) { 5450 if (GPR_idx == NumGPRs) 5451 break; 5452 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5453 DAG.getConstant(i, dl, PtrVT)); 5454 SDValue Load = 5455 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5456 MemOpChains.push_back(Load.getValue(1)); 5457 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5458 } 5459 break; 5460 } 5461 5462 // Non-varargs Altivec params go into VRs or on the stack. 5463 if (VR_idx != NumVRs) { 5464 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 5465 Arg.getSimpleValueType() == MVT::v2i64) ? 5466 VSRH[VR_idx] : VR[VR_idx]; 5467 ++VR_idx; 5468 5469 RegsToPass.push_back(std::make_pair(VReg, Arg)); 5470 } else { 5471 if (CallConv == CallingConv::Fast) 5472 ComputePtrOff(); 5473 5474 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5475 true, isTailCall, true, MemOpChains, 5476 TailCallArguments, dl); 5477 if (CallConv == CallingConv::Fast) 5478 ArgOffset += 16; 5479 } 5480 5481 if (CallConv != CallingConv::Fast) 5482 ArgOffset += 16; 5483 break; 5484 } // not QPX 5485 5486 assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && 5487 "Invalid QPX parameter type"); 5488 5489 /* fall through */ 5490 case MVT::v4f64: 5491 case MVT::v4i1: { 5492 bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; 5493 if (isVarArg) { 5494 // We could elide this store in the case where the object fits 5495 // entirely in R registers. Maybe later. 5496 SDValue Store = 5497 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5498 MemOpChains.push_back(Store); 5499 if (QFPR_idx != NumQFPRs) { 5500 SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store, 5501 PtrOff, MachinePointerInfo()); 5502 MemOpChains.push_back(Load.getValue(1)); 5503 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); 5504 } 5505 ArgOffset += (IsF32 ? 16 : 32); 5506 for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) { 5507 if (GPR_idx == NumGPRs) 5508 break; 5509 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5510 DAG.getConstant(i, dl, PtrVT)); 5511 SDValue Load = 5512 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5513 MemOpChains.push_back(Load.getValue(1)); 5514 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5515 } 5516 break; 5517 } 5518 5519 // Non-varargs QPX params go into registers or on the stack. 5520 if (QFPR_idx != NumQFPRs) { 5521 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); 5522 } else { 5523 if (CallConv == CallingConv::Fast) 5524 ComputePtrOff(); 5525 5526 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5527 true, isTailCall, true, MemOpChains, 5528 TailCallArguments, dl); 5529 if (CallConv == CallingConv::Fast) 5530 ArgOffset += (IsF32 ? 16 : 32); 5531 } 5532 5533 if (CallConv != CallingConv::Fast) 5534 ArgOffset += (IsF32 ? 16 : 32); 5535 break; 5536 } 5537 } 5538 } 5539 5540 assert(NumBytesActuallyUsed == ArgOffset); 5541 (void)NumBytesActuallyUsed; 5542 5543 if (!MemOpChains.empty()) 5544 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5545 5546 // Check if this is an indirect call (MTCTR/BCTRL). 5547 // See PrepareCall() for more information about calls through function 5548 // pointers in the 64-bit SVR4 ABI. 5549 if (!isTailCall && !isPatchPoint && 5550 !isFunctionGlobalAddress(Callee) && 5551 !isa<ExternalSymbolSDNode>(Callee)) { 5552 // Load r2 into a virtual register and store it to the TOC save area. 5553 setUsesTOCBasePtr(DAG); 5554 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 5555 // TOC save area offset. 5556 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 5557 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 5558 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5559 Chain = DAG.getStore( 5560 Val.getValue(1), dl, Val, AddPtr, 5561 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); 5562 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 5563 // This does not mean the MTCTR instruction must use R12; it's easier 5564 // to model this as an extra parameter, so do that. 5565 if (isELFv2ABI && !isPatchPoint) 5566 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 5567 } 5568 5569 // Build a sequence of copy-to-reg nodes chained together with token chain 5570 // and flag operands which copy the outgoing args into the appropriate regs. 5571 SDValue InFlag; 5572 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5573 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5574 RegsToPass[i].second, InFlag); 5575 InFlag = Chain.getValue(1); 5576 } 5577 5578 if (isTailCall && !IsSibCall) 5579 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 5580 TailCallArguments); 5581 5582 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest, 5583 DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, 5584 SPDiff, NumBytes, Ins, InVals, CS); 5585 } 5586 5587 SDValue PPCTargetLowering::LowerCall_Darwin( 5588 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 5589 bool isTailCall, bool isPatchPoint, 5590 const SmallVectorImpl<ISD::OutputArg> &Outs, 5591 const SmallVectorImpl<SDValue> &OutVals, 5592 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5593 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 5594 ImmutableCallSite *CS) const { 5595 5596 unsigned NumOps = Outs.size(); 5597 5598 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5599 bool isPPC64 = PtrVT == MVT::i64; 5600 unsigned PtrByteSize = isPPC64 ? 8 : 4; 5601 5602 MachineFunction &MF = DAG.getMachineFunction(); 5603 5604 // Mark this function as potentially containing a function that contains a 5605 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5606 // and restoring the callers stack pointer in this functions epilog. This is 5607 // done because by tail calling the called function might overwrite the value 5608 // in this function's (MF) stack pointer stack slot 0(SP). 5609 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5610 CallConv == CallingConv::Fast) 5611 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5612 5613 // Count how many bytes are to be pushed on the stack, including the linkage 5614 // area, and parameter passing area. We start with 24/48 bytes, which is 5615 // prereserved space for [SP][CR][LR][3 x unused]. 5616 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 5617 unsigned NumBytes = LinkageSize; 5618 5619 // Add up all the space actually used. 5620 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 5621 // they all go in registers, but we must reserve stack space for them for 5622 // possible use by the caller. In varargs or 64-bit calls, parameters are 5623 // assigned stack space in order, with padding so Altivec parameters are 5624 // 16-byte aligned. 5625 unsigned nAltivecParamsAtEnd = 0; 5626 for (unsigned i = 0; i != NumOps; ++i) { 5627 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5628 EVT ArgVT = Outs[i].VT; 5629 // Varargs Altivec parameters are padded to a 16 byte boundary. 5630 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 5631 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 5632 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 5633 if (!isVarArg && !isPPC64) { 5634 // Non-varargs Altivec parameters go after all the non-Altivec 5635 // parameters; handle those later so we know how much padding we need. 5636 nAltivecParamsAtEnd++; 5637 continue; 5638 } 5639 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 5640 NumBytes = ((NumBytes+15)/16)*16; 5641 } 5642 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5643 } 5644 5645 // Allow for Altivec parameters at the end, if needed. 5646 if (nAltivecParamsAtEnd) { 5647 NumBytes = ((NumBytes+15)/16)*16; 5648 NumBytes += 16*nAltivecParamsAtEnd; 5649 } 5650 5651 // The prolog code of the callee may store up to 8 GPR argument registers to 5652 // the stack, allowing va_start to index over them in memory if its varargs. 5653 // Because we cannot tell if this is needed on the caller side, we have to 5654 // conservatively assume that it is needed. As such, make sure we have at 5655 // least enough stack space for the caller to store the 8 GPRs. 5656 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5657 5658 // Tail call needs the stack to be aligned. 5659 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5660 CallConv == CallingConv::Fast) 5661 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5662 5663 // Calculate by how many bytes the stack has to be adjusted in case of tail 5664 // call optimization. 5665 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5666 5667 // To protect arguments on the stack from being clobbered in a tail call, 5668 // force all the loads to happen before doing any other lowering. 5669 if (isTailCall) 5670 Chain = DAG.getStackArgumentTokenFactor(Chain); 5671 5672 // Adjust the stack pointer for the new arguments... 5673 // These operations are automatically eliminated by the prolog/epilog pass 5674 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 5675 dl); 5676 SDValue CallSeqStart = Chain; 5677 5678 // Load the return address and frame pointer so it can be move somewhere else 5679 // later. 5680 SDValue LROp, FPOp; 5681 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5682 5683 // Set up a copy of the stack pointer for use loading and storing any 5684 // arguments that may not fit in the registers available for argument 5685 // passing. 5686 SDValue StackPtr; 5687 if (isPPC64) 5688 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5689 else 5690 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 5691 5692 // Figure out which arguments are going to go in registers, and which in 5693 // memory. Also, if this is a vararg function, floating point operations 5694 // must be stored to our stack, and loaded into integer regs as well, if 5695 // any integer regs are available for argument passing. 5696 unsigned ArgOffset = LinkageSize; 5697 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5698 5699 static const MCPhysReg GPR_32[] = { // 32-bit registers. 5700 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 5701 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 5702 }; 5703 static const MCPhysReg GPR_64[] = { // 64-bit registers. 5704 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5705 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5706 }; 5707 static const MCPhysReg VR[] = { 5708 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5709 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5710 }; 5711 const unsigned NumGPRs = array_lengthof(GPR_32); 5712 const unsigned NumFPRs = 13; 5713 const unsigned NumVRs = array_lengthof(VR); 5714 5715 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 5716 5717 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5718 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5719 5720 SmallVector<SDValue, 8> MemOpChains; 5721 for (unsigned i = 0; i != NumOps; ++i) { 5722 SDValue Arg = OutVals[i]; 5723 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5724 5725 // PtrOff will be used to store the current argument to the stack if a 5726 // register cannot be found for it. 5727 SDValue PtrOff; 5728 5729 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5730 5731 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5732 5733 // On PPC64, promote integers to 64-bit values. 5734 if (isPPC64 && Arg.getValueType() == MVT::i32) { 5735 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5736 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5737 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5738 } 5739 5740 // FIXME memcpy is used way more than necessary. Correctness first. 5741 // Note: "by value" is code for passing a structure by value, not 5742 // basic types. 5743 if (Flags.isByVal()) { 5744 unsigned Size = Flags.getByValSize(); 5745 // Very small objects are passed right-justified. Everything else is 5746 // passed left-justified. 5747 if (Size==1 || Size==2) { 5748 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 5749 if (GPR_idx != NumGPRs) { 5750 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5751 MachinePointerInfo(), VT); 5752 MemOpChains.push_back(Load.getValue(1)); 5753 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5754 5755 ArgOffset += PtrByteSize; 5756 } else { 5757 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5758 PtrOff.getValueType()); 5759 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5760 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5761 CallSeqStart, 5762 Flags, DAG, dl); 5763 ArgOffset += PtrByteSize; 5764 } 5765 continue; 5766 } 5767 // Copy entire object into memory. There are cases where gcc-generated 5768 // code assumes it is there, even if it could be put entirely into 5769 // registers. (This is not what the doc says.) 5770 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5771 CallSeqStart, 5772 Flags, DAG, dl); 5773 5774 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 5775 // copy the pieces of the object that fit into registers from the 5776 // parameter save area. 5777 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5778 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5779 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5780 if (GPR_idx != NumGPRs) { 5781 SDValue Load = 5782 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 5783 MemOpChains.push_back(Load.getValue(1)); 5784 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5785 ArgOffset += PtrByteSize; 5786 } else { 5787 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5788 break; 5789 } 5790 } 5791 continue; 5792 } 5793 5794 switch (Arg.getSimpleValueType().SimpleTy) { 5795 default: llvm_unreachable("Unexpected ValueType for argument!"); 5796 case MVT::i1: 5797 case MVT::i32: 5798 case MVT::i64: 5799 if (GPR_idx != NumGPRs) { 5800 if (Arg.getValueType() == MVT::i1) 5801 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 5802 5803 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5804 } else { 5805 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5806 isPPC64, isTailCall, false, MemOpChains, 5807 TailCallArguments, dl); 5808 } 5809 ArgOffset += PtrByteSize; 5810 break; 5811 case MVT::f32: 5812 case MVT::f64: 5813 if (FPR_idx != NumFPRs) { 5814 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5815 5816 if (isVarArg) { 5817 SDValue Store = 5818 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5819 MemOpChains.push_back(Store); 5820 5821 // Float varargs are always shadowed in available integer registers 5822 if (GPR_idx != NumGPRs) { 5823 SDValue Load = 5824 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 5825 MemOpChains.push_back(Load.getValue(1)); 5826 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5827 } 5828 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 5829 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 5830 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5831 SDValue Load = 5832 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 5833 MemOpChains.push_back(Load.getValue(1)); 5834 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5835 } 5836 } else { 5837 // If we have any FPRs remaining, we may also have GPRs remaining. 5838 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 5839 // GPRs. 5840 if (GPR_idx != NumGPRs) 5841 ++GPR_idx; 5842 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 5843 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 5844 ++GPR_idx; 5845 } 5846 } else 5847 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5848 isPPC64, isTailCall, false, MemOpChains, 5849 TailCallArguments, dl); 5850 if (isPPC64) 5851 ArgOffset += 8; 5852 else 5853 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 5854 break; 5855 case MVT::v4f32: 5856 case MVT::v4i32: 5857 case MVT::v8i16: 5858 case MVT::v16i8: 5859 if (isVarArg) { 5860 // These go aligned on the stack, or in the corresponding R registers 5861 // when within range. The Darwin PPC ABI doc claims they also go in 5862 // V registers; in fact gcc does this only for arguments that are 5863 // prototyped, not for those that match the ... We do it for all 5864 // arguments, seems to work. 5865 while (ArgOffset % 16 !=0) { 5866 ArgOffset += PtrByteSize; 5867 if (GPR_idx != NumGPRs) 5868 GPR_idx++; 5869 } 5870 // We could elide this store in the case where the object fits 5871 // entirely in R registers. Maybe later. 5872 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 5873 DAG.getConstant(ArgOffset, dl, PtrVT)); 5874 SDValue Store = 5875 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5876 MemOpChains.push_back(Store); 5877 if (VR_idx != NumVRs) { 5878 SDValue Load = 5879 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 5880 MemOpChains.push_back(Load.getValue(1)); 5881 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5882 } 5883 ArgOffset += 16; 5884 for (unsigned i=0; i<16; i+=PtrByteSize) { 5885 if (GPR_idx == NumGPRs) 5886 break; 5887 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5888 DAG.getConstant(i, dl, PtrVT)); 5889 SDValue Load = 5890 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5891 MemOpChains.push_back(Load.getValue(1)); 5892 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5893 } 5894 break; 5895 } 5896 5897 // Non-varargs Altivec params generally go in registers, but have 5898 // stack space allocated at the end. 5899 if (VR_idx != NumVRs) { 5900 // Doesn't have GPR space allocated. 5901 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5902 } else if (nAltivecParamsAtEnd==0) { 5903 // We are emitting Altivec params in order. 5904 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5905 isPPC64, isTailCall, true, MemOpChains, 5906 TailCallArguments, dl); 5907 ArgOffset += 16; 5908 } 5909 break; 5910 } 5911 } 5912 // If all Altivec parameters fit in registers, as they usually do, 5913 // they get stack space following the non-Altivec parameters. We 5914 // don't track this here because nobody below needs it. 5915 // If there are more Altivec parameters than fit in registers emit 5916 // the stores here. 5917 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 5918 unsigned j = 0; 5919 // Offset is aligned; skip 1st 12 params which go in V registers. 5920 ArgOffset = ((ArgOffset+15)/16)*16; 5921 ArgOffset += 12*16; 5922 for (unsigned i = 0; i != NumOps; ++i) { 5923 SDValue Arg = OutVals[i]; 5924 EVT ArgType = Outs[i].VT; 5925 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 5926 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 5927 if (++j > NumVRs) { 5928 SDValue PtrOff; 5929 // We are emitting Altivec params in order. 5930 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5931 isPPC64, isTailCall, true, MemOpChains, 5932 TailCallArguments, dl); 5933 ArgOffset += 16; 5934 } 5935 } 5936 } 5937 } 5938 5939 if (!MemOpChains.empty()) 5940 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5941 5942 // On Darwin, R12 must contain the address of an indirect callee. This does 5943 // not mean the MTCTR instruction must use R12; it's easier to model this as 5944 // an extra parameter, so do that. 5945 if (!isTailCall && 5946 !isFunctionGlobalAddress(Callee) && 5947 !isa<ExternalSymbolSDNode>(Callee) && 5948 !isBLACompatibleAddress(Callee, DAG)) 5949 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 5950 PPC::R12), Callee)); 5951 5952 // Build a sequence of copy-to-reg nodes chained together with token chain 5953 // and flag operands which copy the outgoing args into the appropriate regs. 5954 SDValue InFlag; 5955 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5956 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5957 RegsToPass[i].second, InFlag); 5958 InFlag = Chain.getValue(1); 5959 } 5960 5961 if (isTailCall) 5962 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 5963 TailCallArguments); 5964 5965 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 5966 /* unused except on PPC64 ELFv1 */ false, DAG, 5967 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 5968 NumBytes, Ins, InVals, CS); 5969 } 5970 5971 bool 5972 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 5973 MachineFunction &MF, bool isVarArg, 5974 const SmallVectorImpl<ISD::OutputArg> &Outs, 5975 LLVMContext &Context) const { 5976 SmallVector<CCValAssign, 16> RVLocs; 5977 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 5978 return CCInfo.CheckReturn(Outs, RetCC_PPC); 5979 } 5980 5981 SDValue 5982 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 5983 bool isVarArg, 5984 const SmallVectorImpl<ISD::OutputArg> &Outs, 5985 const SmallVectorImpl<SDValue> &OutVals, 5986 const SDLoc &dl, SelectionDAG &DAG) const { 5987 5988 SmallVector<CCValAssign, 16> RVLocs; 5989 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 5990 *DAG.getContext()); 5991 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 5992 5993 SDValue Flag; 5994 SmallVector<SDValue, 4> RetOps(1, Chain); 5995 5996 // Copy the result values into the output registers. 5997 for (unsigned i = 0; i != RVLocs.size(); ++i) { 5998 CCValAssign &VA = RVLocs[i]; 5999 assert(VA.isRegLoc() && "Can only return in registers!"); 6000 6001 SDValue Arg = OutVals[i]; 6002 6003 switch (VA.getLocInfo()) { 6004 default: llvm_unreachable("Unknown loc info!"); 6005 case CCValAssign::Full: break; 6006 case CCValAssign::AExt: 6007 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 6008 break; 6009 case CCValAssign::ZExt: 6010 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 6011 break; 6012 case CCValAssign::SExt: 6013 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 6014 break; 6015 } 6016 6017 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 6018 Flag = Chain.getValue(1); 6019 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 6020 } 6021 6022 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6023 const MCPhysReg *I = 6024 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); 6025 if (I) { 6026 for (; *I; ++I) { 6027 6028 if (PPC::G8RCRegClass.contains(*I)) 6029 RetOps.push_back(DAG.getRegister(*I, MVT::i64)); 6030 else if (PPC::F8RCRegClass.contains(*I)) 6031 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); 6032 else if (PPC::CRRCRegClass.contains(*I)) 6033 RetOps.push_back(DAG.getRegister(*I, MVT::i1)); 6034 else if (PPC::VRRCRegClass.contains(*I)) 6035 RetOps.push_back(DAG.getRegister(*I, MVT::Other)); 6036 else 6037 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 6038 } 6039 } 6040 6041 RetOps[0] = Chain; // Update chain. 6042 6043 // Add the flag if we have it. 6044 if (Flag.getNode()) 6045 RetOps.push_back(Flag); 6046 6047 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 6048 } 6049 6050 SDValue 6051 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, 6052 SelectionDAG &DAG) const { 6053 SDLoc dl(Op); 6054 6055 // Get the corect type for integers. 6056 EVT IntVT = Op.getValueType(); 6057 6058 // Get the inputs. 6059 SDValue Chain = Op.getOperand(0); 6060 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6061 // Build a DYNAREAOFFSET node. 6062 SDValue Ops[2] = {Chain, FPSIdx}; 6063 SDVTList VTs = DAG.getVTList(IntVT); 6064 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); 6065 } 6066 6067 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, 6068 SelectionDAG &DAG) const { 6069 // When we pop the dynamic allocation we need to restore the SP link. 6070 SDLoc dl(Op); 6071 6072 // Get the corect type for pointers. 6073 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6074 6075 // Construct the stack pointer operand. 6076 bool isPPC64 = Subtarget.isPPC64(); 6077 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 6078 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 6079 6080 // Get the operands for the STACKRESTORE. 6081 SDValue Chain = Op.getOperand(0); 6082 SDValue SaveSP = Op.getOperand(1); 6083 6084 // Load the old link SP. 6085 SDValue LoadLinkSP = 6086 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); 6087 6088 // Restore the stack pointer. 6089 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 6090 6091 // Store the old link SP. 6092 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); 6093 } 6094 6095 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { 6096 MachineFunction &MF = DAG.getMachineFunction(); 6097 bool isPPC64 = Subtarget.isPPC64(); 6098 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6099 6100 // Get current frame pointer save index. The users of this index will be 6101 // primarily DYNALLOC instructions. 6102 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6103 int RASI = FI->getReturnAddrSaveIndex(); 6104 6105 // If the frame pointer save index hasn't been defined yet. 6106 if (!RASI) { 6107 // Find out what the fix offset of the frame pointer save area. 6108 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); 6109 // Allocate the frame index for frame pointer save area. 6110 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false); 6111 // Save the result. 6112 FI->setReturnAddrSaveIndex(RASI); 6113 } 6114 return DAG.getFrameIndex(RASI, PtrVT); 6115 } 6116 6117 SDValue 6118 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 6119 MachineFunction &MF = DAG.getMachineFunction(); 6120 bool isPPC64 = Subtarget.isPPC64(); 6121 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6122 6123 // Get current frame pointer save index. The users of this index will be 6124 // primarily DYNALLOC instructions. 6125 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6126 int FPSI = FI->getFramePointerSaveIndex(); 6127 6128 // If the frame pointer save index hasn't been defined yet. 6129 if (!FPSI) { 6130 // Find out what the fix offset of the frame pointer save area. 6131 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); 6132 // Allocate the frame index for frame pointer save area. 6133 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 6134 // Save the result. 6135 FI->setFramePointerSaveIndex(FPSI); 6136 } 6137 return DAG.getFrameIndex(FPSI, PtrVT); 6138 } 6139 6140 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 6141 SelectionDAG &DAG) const { 6142 // Get the inputs. 6143 SDValue Chain = Op.getOperand(0); 6144 SDValue Size = Op.getOperand(1); 6145 SDLoc dl(Op); 6146 6147 // Get the corect type for pointers. 6148 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6149 // Negate the size. 6150 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 6151 DAG.getConstant(0, dl, PtrVT), Size); 6152 // Construct a node for the frame pointer save index. 6153 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6154 // Build a DYNALLOC node. 6155 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 6156 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 6157 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 6158 } 6159 6160 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 6161 SelectionDAG &DAG) const { 6162 SDLoc DL(Op); 6163 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 6164 DAG.getVTList(MVT::i32, MVT::Other), 6165 Op.getOperand(0), Op.getOperand(1)); 6166 } 6167 6168 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 6169 SelectionDAG &DAG) const { 6170 SDLoc DL(Op); 6171 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 6172 Op.getOperand(0), Op.getOperand(1)); 6173 } 6174 6175 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 6176 if (Op.getValueType().isVector()) 6177 return LowerVectorLoad(Op, DAG); 6178 6179 assert(Op.getValueType() == MVT::i1 && 6180 "Custom lowering only for i1 loads"); 6181 6182 // First, load 8 bits into 32 bits, then truncate to 1 bit. 6183 6184 SDLoc dl(Op); 6185 LoadSDNode *LD = cast<LoadSDNode>(Op); 6186 6187 SDValue Chain = LD->getChain(); 6188 SDValue BasePtr = LD->getBasePtr(); 6189 MachineMemOperand *MMO = LD->getMemOperand(); 6190 6191 SDValue NewLD = 6192 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, 6193 BasePtr, MVT::i8, MMO); 6194 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 6195 6196 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 6197 return DAG.getMergeValues(Ops, dl); 6198 } 6199 6200 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 6201 if (Op.getOperand(1).getValueType().isVector()) 6202 return LowerVectorStore(Op, DAG); 6203 6204 assert(Op.getOperand(1).getValueType() == MVT::i1 && 6205 "Custom lowering only for i1 stores"); 6206 6207 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 6208 6209 SDLoc dl(Op); 6210 StoreSDNode *ST = cast<StoreSDNode>(Op); 6211 6212 SDValue Chain = ST->getChain(); 6213 SDValue BasePtr = ST->getBasePtr(); 6214 SDValue Value = ST->getValue(); 6215 MachineMemOperand *MMO = ST->getMemOperand(); 6216 6217 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), 6218 Value); 6219 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 6220 } 6221 6222 // FIXME: Remove this once the ANDI glue bug is fixed: 6223 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 6224 assert(Op.getValueType() == MVT::i1 && 6225 "Custom lowering only for i1 results"); 6226 6227 SDLoc DL(Op); 6228 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 6229 Op.getOperand(0)); 6230 } 6231 6232 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 6233 /// possible. 6234 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 6235 // Not FP? Not a fsel. 6236 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 6237 !Op.getOperand(2).getValueType().isFloatingPoint()) 6238 return Op; 6239 6240 // We might be able to do better than this under some circumstances, but in 6241 // general, fsel-based lowering of select is a finite-math-only optimization. 6242 // For more information, see section F.3 of the 2.06 ISA specification. 6243 if (!DAG.getTarget().Options.NoInfsFPMath || 6244 !DAG.getTarget().Options.NoNaNsFPMath) 6245 return Op; 6246 // TODO: Propagate flags from the select rather than global settings. 6247 SDNodeFlags Flags; 6248 Flags.setNoInfs(true); 6249 Flags.setNoNaNs(true); 6250 6251 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 6252 6253 EVT ResVT = Op.getValueType(); 6254 EVT CmpVT = Op.getOperand(0).getValueType(); 6255 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6256 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 6257 SDLoc dl(Op); 6258 6259 // If the RHS of the comparison is a 0.0, we don't need to do the 6260 // subtraction at all. 6261 SDValue Sel1; 6262 if (isFloatingPointZero(RHS)) 6263 switch (CC) { 6264 default: break; // SETUO etc aren't handled by fsel. 6265 case ISD::SETNE: 6266 std::swap(TV, FV); 6267 case ISD::SETEQ: 6268 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6269 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6270 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6271 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6272 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6273 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6274 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 6275 case ISD::SETULT: 6276 case ISD::SETLT: 6277 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6278 case ISD::SETOGE: 6279 case ISD::SETGE: 6280 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6281 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6282 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6283 case ISD::SETUGT: 6284 case ISD::SETGT: 6285 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6286 case ISD::SETOLE: 6287 case ISD::SETLE: 6288 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6289 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6290 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6291 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 6292 } 6293 6294 SDValue Cmp; 6295 switch (CC) { 6296 default: break; // SETUO etc aren't handled by fsel. 6297 case ISD::SETNE: 6298 std::swap(TV, FV); 6299 case ISD::SETEQ: 6300 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6301 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6302 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6303 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6304 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6305 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6306 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6307 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 6308 case ISD::SETULT: 6309 case ISD::SETLT: 6310 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6311 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6312 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6313 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6314 case ISD::SETOGE: 6315 case ISD::SETGE: 6316 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6317 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6318 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6319 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6320 case ISD::SETUGT: 6321 case ISD::SETGT: 6322 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); 6323 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6324 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6325 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6326 case ISD::SETOLE: 6327 case ISD::SETLE: 6328 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); 6329 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6330 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6331 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6332 } 6333 return Op; 6334 } 6335 6336 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 6337 SelectionDAG &DAG, 6338 const SDLoc &dl) const { 6339 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 6340 SDValue Src = Op.getOperand(0); 6341 if (Src.getValueType() == MVT::f32) 6342 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 6343 6344 SDValue Tmp; 6345 switch (Op.getSimpleValueType().SimpleTy) { 6346 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 6347 case MVT::i32: 6348 Tmp = DAG.getNode( 6349 Op.getOpcode() == ISD::FP_TO_SINT 6350 ? PPCISD::FCTIWZ 6351 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 6352 dl, MVT::f64, Src); 6353 break; 6354 case MVT::i64: 6355 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 6356 "i64 FP_TO_UINT is supported only with FPCVT"); 6357 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 6358 PPCISD::FCTIDUZ, 6359 dl, MVT::f64, Src); 6360 break; 6361 } 6362 6363 // Convert the FP value to an int value through memory. 6364 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 6365 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 6366 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 6367 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 6368 MachinePointerInfo MPI = 6369 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); 6370 6371 // Emit a store to the stack slot. 6372 SDValue Chain; 6373 if (i32Stack) { 6374 MachineFunction &MF = DAG.getMachineFunction(); 6375 MachineMemOperand *MMO = 6376 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 6377 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 6378 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 6379 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 6380 } else 6381 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); 6382 6383 // Result is a load from the stack slot. If loading 4 bytes, make sure to 6384 // add in a bias on big endian. 6385 if (Op.getValueType() == MVT::i32 && !i32Stack) { 6386 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 6387 DAG.getConstant(4, dl, FIPtr.getValueType())); 6388 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); 6389 } 6390 6391 RLI.Chain = Chain; 6392 RLI.Ptr = FIPtr; 6393 RLI.MPI = MPI; 6394 } 6395 6396 /// \brief Custom lowers floating point to integer conversions to use 6397 /// the direct move instructions available in ISA 2.07 to avoid the 6398 /// need for load/store combinations. 6399 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, 6400 SelectionDAG &DAG, 6401 const SDLoc &dl) const { 6402 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 6403 SDValue Src = Op.getOperand(0); 6404 6405 if (Src.getValueType() == MVT::f32) 6406 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 6407 6408 SDValue Tmp; 6409 switch (Op.getSimpleValueType().SimpleTy) { 6410 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 6411 case MVT::i32: 6412 Tmp = DAG.getNode( 6413 Op.getOpcode() == ISD::FP_TO_SINT 6414 ? PPCISD::FCTIWZ 6415 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 6416 dl, MVT::f64, Src); 6417 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); 6418 break; 6419 case MVT::i64: 6420 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 6421 "i64 FP_TO_UINT is supported only with FPCVT"); 6422 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 6423 PPCISD::FCTIDUZ, 6424 dl, MVT::f64, Src); 6425 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); 6426 break; 6427 } 6428 return Tmp; 6429 } 6430 6431 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 6432 const SDLoc &dl) const { 6433 if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) 6434 return LowerFP_TO_INTDirectMove(Op, DAG, dl); 6435 6436 ReuseLoadInfo RLI; 6437 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 6438 6439 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, 6440 RLI.Alignment, 6441 RLI.IsInvariant ? MachineMemOperand::MOInvariant 6442 : MachineMemOperand::MONone, 6443 RLI.AAInfo, RLI.Ranges); 6444 } 6445 6446 // We're trying to insert a regular store, S, and then a load, L. If the 6447 // incoming value, O, is a load, we might just be able to have our load use the 6448 // address used by O. However, we don't know if anything else will store to 6449 // that address before we can load from it. To prevent this situation, we need 6450 // to insert our load, L, into the chain as a peer of O. To do this, we give L 6451 // the same chain operand as O, we create a token factor from the chain results 6452 // of O and L, and we replace all uses of O's chain result with that token 6453 // factor (see spliceIntoChain below for this last part). 6454 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, 6455 ReuseLoadInfo &RLI, 6456 SelectionDAG &DAG, 6457 ISD::LoadExtType ET) const { 6458 SDLoc dl(Op); 6459 if (ET == ISD::NON_EXTLOAD && 6460 (Op.getOpcode() == ISD::FP_TO_UINT || 6461 Op.getOpcode() == ISD::FP_TO_SINT) && 6462 isOperationLegalOrCustom(Op.getOpcode(), 6463 Op.getOperand(0).getValueType())) { 6464 6465 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 6466 return true; 6467 } 6468 6469 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); 6470 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || 6471 LD->isNonTemporal()) 6472 return false; 6473 if (LD->getMemoryVT() != MemVT) 6474 return false; 6475 6476 RLI.Ptr = LD->getBasePtr(); 6477 if (LD->isIndexed() && !LD->getOffset().isUndef()) { 6478 assert(LD->getAddressingMode() == ISD::PRE_INC && 6479 "Non-pre-inc AM on PPC?"); 6480 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, 6481 LD->getOffset()); 6482 } 6483 6484 RLI.Chain = LD->getChain(); 6485 RLI.MPI = LD->getPointerInfo(); 6486 RLI.IsInvariant = LD->isInvariant(); 6487 RLI.Alignment = LD->getAlignment(); 6488 RLI.AAInfo = LD->getAAInfo(); 6489 RLI.Ranges = LD->getRanges(); 6490 6491 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); 6492 return true; 6493 } 6494 6495 // Given the head of the old chain, ResChain, insert a token factor containing 6496 // it and NewResChain, and make users of ResChain now be users of that token 6497 // factor. 6498 void PPCTargetLowering::spliceIntoChain(SDValue ResChain, 6499 SDValue NewResChain, 6500 SelectionDAG &DAG) const { 6501 if (!ResChain) 6502 return; 6503 6504 SDLoc dl(NewResChain); 6505 6506 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 6507 NewResChain, DAG.getUNDEF(MVT::Other)); 6508 assert(TF.getNode() != NewResChain.getNode() && 6509 "A new TF really is required here"); 6510 6511 DAG.ReplaceAllUsesOfValueWith(ResChain, TF); 6512 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); 6513 } 6514 6515 /// \brief Analyze profitability of direct move 6516 /// prefer float load to int load plus direct move 6517 /// when there is no integer use of int load 6518 static bool directMoveIsProfitable(const SDValue &Op) { 6519 SDNode *Origin = Op.getOperand(0).getNode(); 6520 if (Origin->getOpcode() != ISD::LOAD) 6521 return true; 6522 6523 for (SDNode::use_iterator UI = Origin->use_begin(), 6524 UE = Origin->use_end(); 6525 UI != UE; ++UI) { 6526 6527 // Only look at the users of the loaded value. 6528 if (UI.getUse().get().getResNo() != 0) 6529 continue; 6530 6531 if (UI->getOpcode() != ISD::SINT_TO_FP && 6532 UI->getOpcode() != ISD::UINT_TO_FP) 6533 return true; 6534 } 6535 6536 return false; 6537 } 6538 6539 /// \brief Custom lowers integer to floating point conversions to use 6540 /// the direct move instructions available in ISA 2.07 to avoid the 6541 /// need for load/store combinations. 6542 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, 6543 SelectionDAG &DAG, 6544 const SDLoc &dl) const { 6545 assert((Op.getValueType() == MVT::f32 || 6546 Op.getValueType() == MVT::f64) && 6547 "Invalid floating point type as target of conversion"); 6548 assert(Subtarget.hasFPCVT() && 6549 "Int to FP conversions with direct moves require FPCVT"); 6550 SDValue FP; 6551 SDValue Src = Op.getOperand(0); 6552 bool SinglePrec = Op.getValueType() == MVT::f32; 6553 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; 6554 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; 6555 unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : 6556 (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); 6557 6558 if (WordInt) { 6559 FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, 6560 dl, MVT::f64, Src); 6561 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 6562 } 6563 else { 6564 FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); 6565 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 6566 } 6567 6568 return FP; 6569 } 6570 6571 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 6572 SelectionDAG &DAG) const { 6573 SDLoc dl(Op); 6574 6575 if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { 6576 if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) 6577 return SDValue(); 6578 6579 SDValue Value = Op.getOperand(0); 6580 // The values are now known to be -1 (false) or 1 (true). To convert this 6581 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 6582 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 6583 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 6584 6585 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 6586 6587 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 6588 6589 if (Op.getValueType() != MVT::v4f64) 6590 Value = DAG.getNode(ISD::FP_ROUND, dl, 6591 Op.getValueType(), Value, 6592 DAG.getIntPtrConstant(1, dl)); 6593 return Value; 6594 } 6595 6596 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 6597 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 6598 return SDValue(); 6599 6600 if (Op.getOperand(0).getValueType() == MVT::i1) 6601 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 6602 DAG.getConstantFP(1.0, dl, Op.getValueType()), 6603 DAG.getConstantFP(0.0, dl, Op.getValueType())); 6604 6605 // If we have direct moves, we can do all the conversion, skip the store/load 6606 // however, without FPCVT we can't do most conversions. 6607 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && 6608 Subtarget.isPPC64() && Subtarget.hasFPCVT()) 6609 return LowerINT_TO_FPDirectMove(Op, DAG, dl); 6610 6611 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 6612 "UINT_TO_FP is supported only with FPCVT"); 6613 6614 // If we have FCFIDS, then use it when converting to single-precision. 6615 // Otherwise, convert to double-precision and then round. 6616 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6617 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 6618 : PPCISD::FCFIDS) 6619 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 6620 : PPCISD::FCFID); 6621 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6622 ? MVT::f32 6623 : MVT::f64; 6624 6625 if (Op.getOperand(0).getValueType() == MVT::i64) { 6626 SDValue SINT = Op.getOperand(0); 6627 // When converting to single-precision, we actually need to convert 6628 // to double-precision first and then round to single-precision. 6629 // To avoid double-rounding effects during that operation, we have 6630 // to prepare the input operand. Bits that might be truncated when 6631 // converting to double-precision are replaced by a bit that won't 6632 // be lost at this stage, but is below the single-precision rounding 6633 // position. 6634 // 6635 // However, if -enable-unsafe-fp-math is in effect, accept double 6636 // rounding to avoid the extra overhead. 6637 if (Op.getValueType() == MVT::f32 && 6638 !Subtarget.hasFPCVT() && 6639 !DAG.getTarget().Options.UnsafeFPMath) { 6640 6641 // Twiddle input to make sure the low 11 bits are zero. (If this 6642 // is the case, we are guaranteed the value will fit into the 53 bit 6643 // mantissa of an IEEE double-precision value without rounding.) 6644 // If any of those low 11 bits were not zero originally, make sure 6645 // bit 12 (value 2048) is set instead, so that the final rounding 6646 // to single-precision gets the correct result. 6647 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6648 SINT, DAG.getConstant(2047, dl, MVT::i64)); 6649 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 6650 Round, DAG.getConstant(2047, dl, MVT::i64)); 6651 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 6652 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6653 Round, DAG.getConstant(-2048, dl, MVT::i64)); 6654 6655 // However, we cannot use that value unconditionally: if the magnitude 6656 // of the input value is small, the bit-twiddling we did above might 6657 // end up visibly changing the output. Fortunately, in that case, we 6658 // don't need to twiddle bits since the original input will convert 6659 // exactly to double-precision floating-point already. Therefore, 6660 // construct a conditional to use the original value if the top 11 6661 // bits are all sign-bit copies, and use the rounded value computed 6662 // above otherwise. 6663 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 6664 SINT, DAG.getConstant(53, dl, MVT::i32)); 6665 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 6666 Cond, DAG.getConstant(1, dl, MVT::i64)); 6667 Cond = DAG.getSetCC(dl, MVT::i32, 6668 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); 6669 6670 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 6671 } 6672 6673 ReuseLoadInfo RLI; 6674 SDValue Bits; 6675 6676 MachineFunction &MF = DAG.getMachineFunction(); 6677 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { 6678 Bits = 6679 DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment, 6680 RLI.IsInvariant ? MachineMemOperand::MOInvariant 6681 : MachineMemOperand::MONone, 6682 RLI.AAInfo, RLI.Ranges); 6683 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6684 } else if (Subtarget.hasLFIWAX() && 6685 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { 6686 MachineMemOperand *MMO = 6687 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6688 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6689 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6690 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, 6691 DAG.getVTList(MVT::f64, MVT::Other), 6692 Ops, MVT::i32, MMO); 6693 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6694 } else if (Subtarget.hasFPCVT() && 6695 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { 6696 MachineMemOperand *MMO = 6697 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6698 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6699 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6700 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, 6701 DAG.getVTList(MVT::f64, MVT::Other), 6702 Ops, MVT::i32, MMO); 6703 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6704 } else if (((Subtarget.hasLFIWAX() && 6705 SINT.getOpcode() == ISD::SIGN_EXTEND) || 6706 (Subtarget.hasFPCVT() && 6707 SINT.getOpcode() == ISD::ZERO_EXTEND)) && 6708 SINT.getOperand(0).getValueType() == MVT::i32) { 6709 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 6710 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6711 6712 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 6713 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6714 6715 SDValue Store = 6716 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, 6717 MachinePointerInfo::getFixedStack( 6718 DAG.getMachineFunction(), FrameIdx)); 6719 6720 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6721 "Expected an i32 store"); 6722 6723 RLI.Ptr = FIdx; 6724 RLI.Chain = Store; 6725 RLI.MPI = 6726 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 6727 RLI.Alignment = 4; 6728 6729 MachineMemOperand *MMO = 6730 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6731 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6732 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6733 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? 6734 PPCISD::LFIWZX : PPCISD::LFIWAX, 6735 dl, DAG.getVTList(MVT::f64, MVT::Other), 6736 Ops, MVT::i32, MMO); 6737 } else 6738 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 6739 6740 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 6741 6742 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6743 FP = DAG.getNode(ISD::FP_ROUND, dl, 6744 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 6745 return FP; 6746 } 6747 6748 assert(Op.getOperand(0).getValueType() == MVT::i32 && 6749 "Unhandled INT_TO_FP type in custom expander!"); 6750 // Since we only generate this in 64-bit mode, we can take advantage of 6751 // 64-bit registers. In particular, sign extend the input value into the 6752 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 6753 // then lfd it and fcfid it. 6754 MachineFunction &MF = DAG.getMachineFunction(); 6755 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 6756 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6757 6758 SDValue Ld; 6759 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 6760 ReuseLoadInfo RLI; 6761 bool ReusingLoad; 6762 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, 6763 DAG))) { 6764 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 6765 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6766 6767 SDValue Store = 6768 DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 6769 MachinePointerInfo::getFixedStack( 6770 DAG.getMachineFunction(), FrameIdx)); 6771 6772 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6773 "Expected an i32 store"); 6774 6775 RLI.Ptr = FIdx; 6776 RLI.Chain = Store; 6777 RLI.MPI = 6778 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 6779 RLI.Alignment = 4; 6780 } 6781 6782 MachineMemOperand *MMO = 6783 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6784 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6785 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6786 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 6787 PPCISD::LFIWZX : PPCISD::LFIWAX, 6788 dl, DAG.getVTList(MVT::f64, MVT::Other), 6789 Ops, MVT::i32, MMO); 6790 if (ReusingLoad) 6791 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); 6792 } else { 6793 assert(Subtarget.isPPC64() && 6794 "i32->FP without LFIWAX supported only on PPC64"); 6795 6796 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 6797 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6798 6799 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 6800 Op.getOperand(0)); 6801 6802 // STD the extended value into the stack slot. 6803 SDValue Store = DAG.getStore( 6804 DAG.getEntryNode(), dl, Ext64, FIdx, 6805 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 6806 6807 // Load the value as a double. 6808 Ld = DAG.getLoad( 6809 MVT::f64, dl, Store, FIdx, 6810 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 6811 } 6812 6813 // FCFID it and return it. 6814 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 6815 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6816 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, 6817 DAG.getIntPtrConstant(0, dl)); 6818 return FP; 6819 } 6820 6821 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 6822 SelectionDAG &DAG) const { 6823 SDLoc dl(Op); 6824 /* 6825 The rounding mode is in bits 30:31 of FPSR, and has the following 6826 settings: 6827 00 Round to nearest 6828 01 Round to 0 6829 10 Round to +inf 6830 11 Round to -inf 6831 6832 FLT_ROUNDS, on the other hand, expects the following: 6833 -1 Undefined 6834 0 Round to 0 6835 1 Round to nearest 6836 2 Round to +inf 6837 3 Round to -inf 6838 6839 To perform the conversion, we do: 6840 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 6841 */ 6842 6843 MachineFunction &MF = DAG.getMachineFunction(); 6844 EVT VT = Op.getValueType(); 6845 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6846 6847 // Save FP Control Word to register 6848 EVT NodeTys[] = { 6849 MVT::f64, // return register 6850 MVT::Glue // unused in this context 6851 }; 6852 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 6853 6854 // Save FP register to stack slot 6855 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 6856 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 6857 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, 6858 MachinePointerInfo()); 6859 6860 // Load FP Control Word from low 32 bits of stack slot. 6861 SDValue Four = DAG.getConstant(4, dl, PtrVT); 6862 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 6863 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); 6864 6865 // Transform as necessary 6866 SDValue CWD1 = 6867 DAG.getNode(ISD::AND, dl, MVT::i32, 6868 CWD, DAG.getConstant(3, dl, MVT::i32)); 6869 SDValue CWD2 = 6870 DAG.getNode(ISD::SRL, dl, MVT::i32, 6871 DAG.getNode(ISD::AND, dl, MVT::i32, 6872 DAG.getNode(ISD::XOR, dl, MVT::i32, 6873 CWD, DAG.getConstant(3, dl, MVT::i32)), 6874 DAG.getConstant(3, dl, MVT::i32)), 6875 DAG.getConstant(1, dl, MVT::i32)); 6876 6877 SDValue RetVal = 6878 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 6879 6880 return DAG.getNode((VT.getSizeInBits() < 16 ? 6881 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 6882 } 6883 6884 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 6885 EVT VT = Op.getValueType(); 6886 unsigned BitWidth = VT.getSizeInBits(); 6887 SDLoc dl(Op); 6888 assert(Op.getNumOperands() == 3 && 6889 VT == Op.getOperand(1).getValueType() && 6890 "Unexpected SHL!"); 6891 6892 // Expand into a bunch of logical ops. Note that these ops 6893 // depend on the PPC behavior for oversized shift amounts. 6894 SDValue Lo = Op.getOperand(0); 6895 SDValue Hi = Op.getOperand(1); 6896 SDValue Amt = Op.getOperand(2); 6897 EVT AmtVT = Amt.getValueType(); 6898 6899 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6900 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 6901 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 6902 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 6903 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 6904 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6905 DAG.getConstant(-BitWidth, dl, AmtVT)); 6906 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 6907 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 6908 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 6909 SDValue OutOps[] = { OutLo, OutHi }; 6910 return DAG.getMergeValues(OutOps, dl); 6911 } 6912 6913 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 6914 EVT VT = Op.getValueType(); 6915 SDLoc dl(Op); 6916 unsigned BitWidth = VT.getSizeInBits(); 6917 assert(Op.getNumOperands() == 3 && 6918 VT == Op.getOperand(1).getValueType() && 6919 "Unexpected SRL!"); 6920 6921 // Expand into a bunch of logical ops. Note that these ops 6922 // depend on the PPC behavior for oversized shift amounts. 6923 SDValue Lo = Op.getOperand(0); 6924 SDValue Hi = Op.getOperand(1); 6925 SDValue Amt = Op.getOperand(2); 6926 EVT AmtVT = Amt.getValueType(); 6927 6928 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6929 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 6930 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 6931 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 6932 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 6933 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6934 DAG.getConstant(-BitWidth, dl, AmtVT)); 6935 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 6936 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 6937 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 6938 SDValue OutOps[] = { OutLo, OutHi }; 6939 return DAG.getMergeValues(OutOps, dl); 6940 } 6941 6942 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 6943 SDLoc dl(Op); 6944 EVT VT = Op.getValueType(); 6945 unsigned BitWidth = VT.getSizeInBits(); 6946 assert(Op.getNumOperands() == 3 && 6947 VT == Op.getOperand(1).getValueType() && 6948 "Unexpected SRA!"); 6949 6950 // Expand into a bunch of logical ops, followed by a select_cc. 6951 SDValue Lo = Op.getOperand(0); 6952 SDValue Hi = Op.getOperand(1); 6953 SDValue Amt = Op.getOperand(2); 6954 EVT AmtVT = Amt.getValueType(); 6955 6956 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6957 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 6958 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 6959 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 6960 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 6961 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6962 DAG.getConstant(-BitWidth, dl, AmtVT)); 6963 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 6964 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 6965 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), 6966 Tmp4, Tmp6, ISD::SETLE); 6967 SDValue OutOps[] = { OutLo, OutHi }; 6968 return DAG.getMergeValues(OutOps, dl); 6969 } 6970 6971 //===----------------------------------------------------------------------===// 6972 // Vector related lowering. 6973 // 6974 6975 /// BuildSplatI - Build a canonical splati of Val with an element size of 6976 /// SplatSize. Cast the result to VT. 6977 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 6978 SelectionDAG &DAG, const SDLoc &dl) { 6979 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 6980 6981 static const MVT VTys[] = { // canonical VT to use for each size. 6982 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 6983 }; 6984 6985 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 6986 6987 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 6988 if (Val == -1) 6989 SplatSize = 1; 6990 6991 EVT CanonicalVT = VTys[SplatSize-1]; 6992 6993 // Build a canonical splat for this value. 6994 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); 6995 } 6996 6997 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the 6998 /// specified intrinsic ID. 6999 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, 7000 const SDLoc &dl, EVT DestVT = MVT::Other) { 7001 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 7002 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7003 DAG.getConstant(IID, dl, MVT::i32), Op); 7004 } 7005 7006 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 7007 /// specified intrinsic ID. 7008 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 7009 SelectionDAG &DAG, const SDLoc &dl, 7010 EVT DestVT = MVT::Other) { 7011 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 7012 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7013 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); 7014 } 7015 7016 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 7017 /// specified intrinsic ID. 7018 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 7019 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, 7020 EVT DestVT = MVT::Other) { 7021 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 7022 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7023 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); 7024 } 7025 7026 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 7027 /// amount. The result has the specified value type. 7028 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, 7029 SelectionDAG &DAG, const SDLoc &dl) { 7030 // Force LHS/RHS to be the right type. 7031 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 7032 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 7033 7034 int Ops[16]; 7035 for (unsigned i = 0; i != 16; ++i) 7036 Ops[i] = i + Amt; 7037 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 7038 return DAG.getNode(ISD::BITCAST, dl, VT, T); 7039 } 7040 7041 // If this is a case we can't handle, return null and let the default 7042 // expansion code take care of it. If we CAN select this case, and if it 7043 // selects to a single instruction, return Op. Otherwise, if we can codegen 7044 // this case more efficiently than a constant pool load, lower it to the 7045 // sequence of ops that should be used. 7046 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 7047 SelectionDAG &DAG) const { 7048 SDLoc dl(Op); 7049 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 7050 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 7051 7052 if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { 7053 // We first build an i32 vector, load it into a QPX register, 7054 // then convert it to a floating-point vector and compare it 7055 // to a zero vector to get the boolean result. 7056 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 7057 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 7058 MachinePointerInfo PtrInfo = 7059 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7060 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7061 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7062 7063 assert(BVN->getNumOperands() == 4 && 7064 "BUILD_VECTOR for v4i1 does not have 4 operands"); 7065 7066 bool IsConst = true; 7067 for (unsigned i = 0; i < 4; ++i) { 7068 if (BVN->getOperand(i).isUndef()) continue; 7069 if (!isa<ConstantSDNode>(BVN->getOperand(i))) { 7070 IsConst = false; 7071 break; 7072 } 7073 } 7074 7075 if (IsConst) { 7076 Constant *One = 7077 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); 7078 Constant *NegOne = 7079 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); 7080 7081 Constant *CV[4]; 7082 for (unsigned i = 0; i < 4; ++i) { 7083 if (BVN->getOperand(i).isUndef()) 7084 CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); 7085 else if (isNullConstant(BVN->getOperand(i))) 7086 CV[i] = NegOne; 7087 else 7088 CV[i] = One; 7089 } 7090 7091 Constant *CP = ConstantVector::get(CV); 7092 SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), 7093 16 /* alignment */); 7094 7095 SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; 7096 SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other}); 7097 return DAG.getMemIntrinsicNode( 7098 PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, 7099 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 7100 } 7101 7102 SmallVector<SDValue, 4> Stores; 7103 for (unsigned i = 0; i < 4; ++i) { 7104 if (BVN->getOperand(i).isUndef()) continue; 7105 7106 unsigned Offset = 4*i; 7107 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 7108 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 7109 7110 unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); 7111 if (StoreSize > 4) { 7112 Stores.push_back( 7113 DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx, 7114 PtrInfo.getWithOffset(Offset), MVT::i32)); 7115 } else { 7116 SDValue StoreValue = BVN->getOperand(i); 7117 if (StoreSize < 4) 7118 StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); 7119 7120 Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx, 7121 PtrInfo.getWithOffset(Offset))); 7122 } 7123 } 7124 7125 SDValue StoreChain; 7126 if (!Stores.empty()) 7127 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 7128 else 7129 StoreChain = DAG.getEntryNode(); 7130 7131 // Now load from v4i32 into the QPX register; this will extend it to 7132 // v4i64 but not yet convert it to a floating point. Nevertheless, this 7133 // is typed as v4f64 because the QPX register integer states are not 7134 // explicitly represented. 7135 7136 SDValue Ops[] = {StoreChain, 7137 DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32), 7138 FIdx}; 7139 SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other}); 7140 7141 SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, 7142 dl, VTs, Ops, MVT::v4i32, PtrInfo); 7143 LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 7144 DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), 7145 LoadedVect); 7146 7147 SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); 7148 7149 return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); 7150 } 7151 7152 // All other QPX vectors are handled by generic code. 7153 if (Subtarget.hasQPX()) 7154 return SDValue(); 7155 7156 // Check if this is a splat of a constant value. 7157 APInt APSplatBits, APSplatUndef; 7158 unsigned SplatBitSize; 7159 bool HasAnyUndefs; 7160 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 7161 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || 7162 SplatBitSize > 32) 7163 return SDValue(); 7164 7165 unsigned SplatBits = APSplatBits.getZExtValue(); 7166 unsigned SplatUndef = APSplatUndef.getZExtValue(); 7167 unsigned SplatSize = SplatBitSize / 8; 7168 7169 // First, handle single instruction cases. 7170 7171 // All zeros? 7172 if (SplatBits == 0) { 7173 // Canonicalize all zero vectors to be v4i32. 7174 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 7175 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); 7176 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 7177 } 7178 return Op; 7179 } 7180 7181 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 7182 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 7183 (32-SplatBitSize)); 7184 if (SextVal >= -16 && SextVal <= 15) 7185 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 7186 7187 // Two instruction sequences. 7188 7189 // If this value is in the range [-32,30] and is even, use: 7190 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 7191 // If this value is in the range [17,31] and is odd, use: 7192 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 7193 // If this value is in the range [-31,-17] and is odd, use: 7194 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 7195 // Note the last two are three-instruction sequences. 7196 if (SextVal >= -32 && SextVal <= 31) { 7197 // To avoid having these optimizations undone by constant folding, 7198 // we convert to a pseudo that will be expanded later into one of 7199 // the above forms. 7200 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); 7201 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 7202 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 7203 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); 7204 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 7205 if (VT == Op.getValueType()) 7206 return RetVal; 7207 else 7208 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 7209 } 7210 7211 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 7212 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 7213 // for fneg/fabs. 7214 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 7215 // Make -1 and vspltisw -1: 7216 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 7217 7218 // Make the VSLW intrinsic, computing 0x8000_0000. 7219 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 7220 OnesV, DAG, dl); 7221 7222 // xor by OnesV to invert it. 7223 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 7224 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7225 } 7226 7227 // Check to see if this is a wide variety of vsplti*, binop self cases. 7228 static const signed char SplatCsts[] = { 7229 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 7230 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 7231 }; 7232 7233 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 7234 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 7235 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 7236 int i = SplatCsts[idx]; 7237 7238 // Figure out what shift amount will be used by altivec if shifted by i in 7239 // this splat size. 7240 unsigned TypeShiftAmt = i & (SplatBitSize-1); 7241 7242 // vsplti + shl self. 7243 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 7244 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7245 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7246 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 7247 Intrinsic::ppc_altivec_vslw 7248 }; 7249 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7250 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7251 } 7252 7253 // vsplti + srl self. 7254 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 7255 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7256 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7257 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 7258 Intrinsic::ppc_altivec_vsrw 7259 }; 7260 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7261 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7262 } 7263 7264 // vsplti + sra self. 7265 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 7266 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7267 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7268 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 7269 Intrinsic::ppc_altivec_vsraw 7270 }; 7271 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7272 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7273 } 7274 7275 // vsplti + rol self. 7276 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 7277 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 7278 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7279 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7280 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 7281 Intrinsic::ppc_altivec_vrlw 7282 }; 7283 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7284 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7285 } 7286 7287 // t = vsplti c, result = vsldoi t, t, 1 7288 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 7289 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7290 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; 7291 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7292 } 7293 // t = vsplti c, result = vsldoi t, t, 2 7294 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 7295 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7296 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; 7297 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7298 } 7299 // t = vsplti c, result = vsldoi t, t, 3 7300 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 7301 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7302 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; 7303 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7304 } 7305 } 7306 7307 return SDValue(); 7308 } 7309 7310 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 7311 /// the specified operations to build the shuffle. 7312 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 7313 SDValue RHS, SelectionDAG &DAG, 7314 const SDLoc &dl) { 7315 unsigned OpNum = (PFEntry >> 26) & 0x0F; 7316 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 7317 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 7318 7319 enum { 7320 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 7321 OP_VMRGHW, 7322 OP_VMRGLW, 7323 OP_VSPLTISW0, 7324 OP_VSPLTISW1, 7325 OP_VSPLTISW2, 7326 OP_VSPLTISW3, 7327 OP_VSLDOI4, 7328 OP_VSLDOI8, 7329 OP_VSLDOI12 7330 }; 7331 7332 if (OpNum == OP_COPY) { 7333 if (LHSID == (1*9+2)*9+3) return LHS; 7334 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 7335 return RHS; 7336 } 7337 7338 SDValue OpLHS, OpRHS; 7339 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 7340 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 7341 7342 int ShufIdxs[16]; 7343 switch (OpNum) { 7344 default: llvm_unreachable("Unknown i32 permute!"); 7345 case OP_VMRGHW: 7346 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 7347 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 7348 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 7349 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 7350 break; 7351 case OP_VMRGLW: 7352 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 7353 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 7354 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 7355 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 7356 break; 7357 case OP_VSPLTISW0: 7358 for (unsigned i = 0; i != 16; ++i) 7359 ShufIdxs[i] = (i&3)+0; 7360 break; 7361 case OP_VSPLTISW1: 7362 for (unsigned i = 0; i != 16; ++i) 7363 ShufIdxs[i] = (i&3)+4; 7364 break; 7365 case OP_VSPLTISW2: 7366 for (unsigned i = 0; i != 16; ++i) 7367 ShufIdxs[i] = (i&3)+8; 7368 break; 7369 case OP_VSPLTISW3: 7370 for (unsigned i = 0; i != 16; ++i) 7371 ShufIdxs[i] = (i&3)+12; 7372 break; 7373 case OP_VSLDOI4: 7374 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 7375 case OP_VSLDOI8: 7376 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 7377 case OP_VSLDOI12: 7378 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 7379 } 7380 EVT VT = OpLHS.getValueType(); 7381 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 7382 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 7383 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 7384 return DAG.getNode(ISD::BITCAST, dl, VT, T); 7385 } 7386 7387 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 7388 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 7389 /// return the code it can be lowered into. Worst case, it can always be 7390 /// lowered into a vperm. 7391 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 7392 SelectionDAG &DAG) const { 7393 SDLoc dl(Op); 7394 SDValue V1 = Op.getOperand(0); 7395 SDValue V2 = Op.getOperand(1); 7396 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 7397 EVT VT = Op.getValueType(); 7398 bool isLittleEndian = Subtarget.isLittleEndian(); 7399 7400 unsigned ShiftElts, InsertAtByte; 7401 bool Swap; 7402 if (Subtarget.hasP9Vector() && 7403 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, 7404 isLittleEndian)) { 7405 if (Swap) 7406 std::swap(V1, V2); 7407 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 7408 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); 7409 if (ShiftElts) { 7410 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, 7411 DAG.getConstant(ShiftElts, dl, MVT::i32)); 7412 SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl, 7413 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 7414 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 7415 } 7416 SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2, 7417 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 7418 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 7419 } 7420 7421 if (Subtarget.hasVSX()) { 7422 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { 7423 int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); 7424 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 7425 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, 7426 DAG.getConstant(SplatIdx, dl, MVT::i32)); 7427 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); 7428 } 7429 7430 // Left shifts of 8 bytes are actually swaps. Convert accordingly. 7431 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { 7432 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); 7433 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); 7434 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); 7435 } 7436 7437 } 7438 7439 if (Subtarget.hasQPX()) { 7440 if (VT.getVectorNumElements() != 4) 7441 return SDValue(); 7442 7443 if (V2.isUndef()) V2 = V1; 7444 7445 int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); 7446 if (AlignIdx != -1) { 7447 return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, 7448 DAG.getConstant(AlignIdx, dl, MVT::i32)); 7449 } else if (SVOp->isSplat()) { 7450 int SplatIdx = SVOp->getSplatIndex(); 7451 if (SplatIdx >= 4) { 7452 std::swap(V1, V2); 7453 SplatIdx -= 4; 7454 } 7455 7456 return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, 7457 DAG.getConstant(SplatIdx, dl, MVT::i32)); 7458 } 7459 7460 // Lower this into a qvgpci/qvfperm pair. 7461 7462 // Compute the qvgpci literal 7463 unsigned idx = 0; 7464 for (unsigned i = 0; i < 4; ++i) { 7465 int m = SVOp->getMaskElt(i); 7466 unsigned mm = m >= 0 ? (unsigned) m : i; 7467 idx |= mm << (3-i)*3; 7468 } 7469 7470 SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, 7471 DAG.getConstant(idx, dl, MVT::i32)); 7472 return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); 7473 } 7474 7475 // Cases that are handled by instructions that take permute immediates 7476 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 7477 // selected by the instruction selector. 7478 if (V2.isUndef()) { 7479 if (PPC::isSplatShuffleMask(SVOp, 1) || 7480 PPC::isSplatShuffleMask(SVOp, 2) || 7481 PPC::isSplatShuffleMask(SVOp, 4) || 7482 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 7483 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 7484 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 7485 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 7486 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 7487 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 7488 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 7489 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 7490 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || 7491 (Subtarget.hasP8Altivec() && ( 7492 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || 7493 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || 7494 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { 7495 return Op; 7496 } 7497 } 7498 7499 // Altivec has a variety of "shuffle immediates" that take two vector inputs 7500 // and produce a fixed permutation. If any of these match, do not lower to 7501 // VPERM. 7502 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 7503 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 7504 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 7505 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 7506 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 7507 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 7508 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 7509 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 7510 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 7511 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || 7512 (Subtarget.hasP8Altivec() && ( 7513 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || 7514 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || 7515 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) 7516 return Op; 7517 7518 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 7519 // perfect shuffle table to emit an optimal matching sequence. 7520 ArrayRef<int> PermMask = SVOp->getMask(); 7521 7522 unsigned PFIndexes[4]; 7523 bool isFourElementShuffle = true; 7524 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 7525 unsigned EltNo = 8; // Start out undef. 7526 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 7527 if (PermMask[i*4+j] < 0) 7528 continue; // Undef, ignore it. 7529 7530 unsigned ByteSource = PermMask[i*4+j]; 7531 if ((ByteSource & 3) != j) { 7532 isFourElementShuffle = false; 7533 break; 7534 } 7535 7536 if (EltNo == 8) { 7537 EltNo = ByteSource/4; 7538 } else if (EltNo != ByteSource/4) { 7539 isFourElementShuffle = false; 7540 break; 7541 } 7542 } 7543 PFIndexes[i] = EltNo; 7544 } 7545 7546 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 7547 // perfect shuffle vector to determine if it is cost effective to do this as 7548 // discrete instructions, or whether we should use a vperm. 7549 // For now, we skip this for little endian until such time as we have a 7550 // little-endian perfect shuffle table. 7551 if (isFourElementShuffle && !isLittleEndian) { 7552 // Compute the index in the perfect shuffle table. 7553 unsigned PFTableIndex = 7554 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 7555 7556 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 7557 unsigned Cost = (PFEntry >> 30); 7558 7559 // Determining when to avoid vperm is tricky. Many things affect the cost 7560 // of vperm, particularly how many times the perm mask needs to be computed. 7561 // For example, if the perm mask can be hoisted out of a loop or is already 7562 // used (perhaps because there are multiple permutes with the same shuffle 7563 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 7564 // the loop requires an extra register. 7565 // 7566 // As a compromise, we only emit discrete instructions if the shuffle can be 7567 // generated in 3 or fewer operations. When we have loop information 7568 // available, if this block is within a loop, we should avoid using vperm 7569 // for 3-operation perms and use a constant pool load instead. 7570 if (Cost < 3) 7571 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 7572 } 7573 7574 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 7575 // vector that will get spilled to the constant pool. 7576 if (V2.isUndef()) V2 = V1; 7577 7578 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 7579 // that it is in input element units, not in bytes. Convert now. 7580 7581 // For little endian, the order of the input vectors is reversed, and 7582 // the permutation mask is complemented with respect to 31. This is 7583 // necessary to produce proper semantics with the big-endian-biased vperm 7584 // instruction. 7585 EVT EltVT = V1.getValueType().getVectorElementType(); 7586 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 7587 7588 SmallVector<SDValue, 16> ResultMask; 7589 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 7590 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 7591 7592 for (unsigned j = 0; j != BytesPerElement; ++j) 7593 if (isLittleEndian) 7594 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), 7595 dl, MVT::i32)); 7596 else 7597 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, 7598 MVT::i32)); 7599 } 7600 7601 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); 7602 if (isLittleEndian) 7603 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7604 V2, V1, VPermMask); 7605 else 7606 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7607 V1, V2, VPermMask); 7608 } 7609 7610 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a 7611 /// vector comparison. If it is, return true and fill in Opc/isDot with 7612 /// information about the intrinsic. 7613 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, 7614 bool &isDot, const PPCSubtarget &Subtarget) { 7615 unsigned IntrinsicID = 7616 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 7617 CompareOpc = -1; 7618 isDot = false; 7619 switch (IntrinsicID) { 7620 default: return false; 7621 // Comparison predicates. 7622 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 7623 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 7624 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 7625 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 7626 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 7627 case Intrinsic::ppc_altivec_vcmpequd_p: 7628 if (Subtarget.hasP8Altivec()) { 7629 CompareOpc = 199; 7630 isDot = 1; 7631 } else 7632 return false; 7633 7634 break; 7635 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 7636 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 7637 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 7638 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 7639 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 7640 case Intrinsic::ppc_altivec_vcmpgtsd_p: 7641 if (Subtarget.hasP8Altivec()) { 7642 CompareOpc = 967; 7643 isDot = 1; 7644 } else 7645 return false; 7646 7647 break; 7648 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 7649 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 7650 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 7651 case Intrinsic::ppc_altivec_vcmpgtud_p: 7652 if (Subtarget.hasP8Altivec()) { 7653 CompareOpc = 711; 7654 isDot = 1; 7655 } else 7656 return false; 7657 7658 break; 7659 // VSX predicate comparisons use the same infrastructure 7660 case Intrinsic::ppc_vsx_xvcmpeqdp_p: 7661 case Intrinsic::ppc_vsx_xvcmpgedp_p: 7662 case Intrinsic::ppc_vsx_xvcmpgtdp_p: 7663 case Intrinsic::ppc_vsx_xvcmpeqsp_p: 7664 case Intrinsic::ppc_vsx_xvcmpgesp_p: 7665 case Intrinsic::ppc_vsx_xvcmpgtsp_p: 7666 if (Subtarget.hasVSX()) { 7667 switch (IntrinsicID) { 7668 case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; 7669 case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; 7670 case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; 7671 case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; 7672 case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; 7673 case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; 7674 } 7675 isDot = 1; 7676 } 7677 else 7678 return false; 7679 7680 break; 7681 7682 // Normal Comparisons. 7683 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 7684 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 7685 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 7686 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 7687 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 7688 case Intrinsic::ppc_altivec_vcmpequd: 7689 if (Subtarget.hasP8Altivec()) { 7690 CompareOpc = 199; 7691 isDot = 0; 7692 } else 7693 return false; 7694 7695 break; 7696 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 7697 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 7698 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 7699 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 7700 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 7701 case Intrinsic::ppc_altivec_vcmpgtsd: 7702 if (Subtarget.hasP8Altivec()) { 7703 CompareOpc = 967; 7704 isDot = 0; 7705 } else 7706 return false; 7707 7708 break; 7709 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 7710 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 7711 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 7712 case Intrinsic::ppc_altivec_vcmpgtud: 7713 if (Subtarget.hasP8Altivec()) { 7714 CompareOpc = 711; 7715 isDot = 0; 7716 } else 7717 return false; 7718 7719 break; 7720 } 7721 return true; 7722 } 7723 7724 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 7725 /// lower, do it, otherwise return null. 7726 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 7727 SelectionDAG &DAG) const { 7728 unsigned IntrinsicID = 7729 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7730 7731 if (IntrinsicID == Intrinsic::thread_pointer) { 7732 // Reads the thread pointer register, used for __builtin_thread_pointer. 7733 bool is64bit = Subtarget.isPPC64(); 7734 return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 7735 is64bit ? MVT::i64 : MVT::i32); 7736 } 7737 7738 // If this is a lowered altivec predicate compare, CompareOpc is set to the 7739 // opcode number of the comparison. 7740 SDLoc dl(Op); 7741 int CompareOpc; 7742 bool isDot; 7743 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) 7744 return SDValue(); // Don't custom lower most intrinsics. 7745 7746 // If this is a non-dot comparison, make the VCMP node and we are done. 7747 if (!isDot) { 7748 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 7749 Op.getOperand(1), Op.getOperand(2), 7750 DAG.getConstant(CompareOpc, dl, MVT::i32)); 7751 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 7752 } 7753 7754 // Create the PPCISD altivec 'dot' comparison node. 7755 SDValue Ops[] = { 7756 Op.getOperand(2), // LHS 7757 Op.getOperand(3), // RHS 7758 DAG.getConstant(CompareOpc, dl, MVT::i32) 7759 }; 7760 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 7761 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 7762 7763 // Now that we have the comparison, emit a copy from the CR to a GPR. 7764 // This is flagged to the above dot comparison. 7765 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 7766 DAG.getRegister(PPC::CR6, MVT::i32), 7767 CompNode.getValue(1)); 7768 7769 // Unpack the result based on how the target uses it. 7770 unsigned BitNo; // Bit # of CR6. 7771 bool InvertBit; // Invert result? 7772 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 7773 default: // Can't happen, don't crash on invalid number though. 7774 case 0: // Return the value of the EQ bit of CR6. 7775 BitNo = 0; InvertBit = false; 7776 break; 7777 case 1: // Return the inverted value of the EQ bit of CR6. 7778 BitNo = 0; InvertBit = true; 7779 break; 7780 case 2: // Return the value of the LT bit of CR6. 7781 BitNo = 2; InvertBit = false; 7782 break; 7783 case 3: // Return the inverted value of the LT bit of CR6. 7784 BitNo = 2; InvertBit = true; 7785 break; 7786 } 7787 7788 // Shift the bit into the low position. 7789 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 7790 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); 7791 // Isolate the bit. 7792 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 7793 DAG.getConstant(1, dl, MVT::i32)); 7794 7795 // If we are supposed to, toggle the bit. 7796 if (InvertBit) 7797 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 7798 DAG.getConstant(1, dl, MVT::i32)); 7799 return Flags; 7800 } 7801 7802 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 7803 SelectionDAG &DAG) const { 7804 SDLoc dl(Op); 7805 // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int 7806 // instructions), but for smaller types, we need to first extend up to v2i32 7807 // before doing going farther. 7808 if (Op.getValueType() == MVT::v2i64) { 7809 EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 7810 if (ExtVT != MVT::v2i32) { 7811 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); 7812 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, 7813 DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), 7814 ExtVT.getVectorElementType(), 4))); 7815 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); 7816 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, 7817 DAG.getValueType(MVT::v2i32)); 7818 } 7819 7820 return Op; 7821 } 7822 7823 return SDValue(); 7824 } 7825 7826 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 7827 SelectionDAG &DAG) const { 7828 SDLoc dl(Op); 7829 // Create a stack slot that is 16-byte aligned. 7830 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 7831 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 7832 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7833 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7834 7835 // Store the input value into Value#0 of the stack slot. 7836 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 7837 MachinePointerInfo()); 7838 // Load it out. 7839 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); 7840 } 7841 7842 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 7843 SelectionDAG &DAG) const { 7844 SDLoc dl(Op); 7845 SDNode *N = Op.getNode(); 7846 7847 assert(N->getOperand(0).getValueType() == MVT::v4i1 && 7848 "Unknown extract_vector_elt type"); 7849 7850 SDValue Value = N->getOperand(0); 7851 7852 // The first part of this is like the store lowering except that we don't 7853 // need to track the chain. 7854 7855 // The values are now known to be -1 (false) or 1 (true). To convert this 7856 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 7857 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 7858 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 7859 7860 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 7861 // understand how to form the extending load. 7862 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 7863 7864 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 7865 7866 // Now convert to an integer and store. 7867 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 7868 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 7869 Value); 7870 7871 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 7872 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 7873 MachinePointerInfo PtrInfo = 7874 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7875 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7876 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7877 7878 SDValue StoreChain = DAG.getEntryNode(); 7879 SDValue Ops[] = {StoreChain, 7880 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 7881 Value, FIdx}; 7882 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 7883 7884 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 7885 dl, VTs, Ops, MVT::v4i32, PtrInfo); 7886 7887 // Extract the value requested. 7888 unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 7889 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 7890 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 7891 7892 SDValue IntVal = 7893 DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); 7894 7895 if (!Subtarget.useCRBits()) 7896 return IntVal; 7897 7898 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); 7899 } 7900 7901 /// Lowering for QPX v4i1 loads 7902 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, 7903 SelectionDAG &DAG) const { 7904 SDLoc dl(Op); 7905 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); 7906 SDValue LoadChain = LN->getChain(); 7907 SDValue BasePtr = LN->getBasePtr(); 7908 7909 if (Op.getValueType() == MVT::v4f64 || 7910 Op.getValueType() == MVT::v4f32) { 7911 EVT MemVT = LN->getMemoryVT(); 7912 unsigned Alignment = LN->getAlignment(); 7913 7914 // If this load is properly aligned, then it is legal. 7915 if (Alignment >= MemVT.getStoreSize()) 7916 return Op; 7917 7918 EVT ScalarVT = Op.getValueType().getScalarType(), 7919 ScalarMemVT = MemVT.getScalarType(); 7920 unsigned Stride = ScalarMemVT.getStoreSize(); 7921 7922 SDValue Vals[4], LoadChains[4]; 7923 for (unsigned Idx = 0; Idx < 4; ++Idx) { 7924 SDValue Load; 7925 if (ScalarVT != ScalarMemVT) 7926 Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, 7927 BasePtr, 7928 LN->getPointerInfo().getWithOffset(Idx * Stride), 7929 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 7930 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 7931 else 7932 Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, 7933 LN->getPointerInfo().getWithOffset(Idx * Stride), 7934 MinAlign(Alignment, Idx * Stride), 7935 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 7936 7937 if (Idx == 0 && LN->isIndexed()) { 7938 assert(LN->getAddressingMode() == ISD::PRE_INC && 7939 "Unknown addressing mode on vector load"); 7940 Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), 7941 LN->getAddressingMode()); 7942 } 7943 7944 Vals[Idx] = Load; 7945 LoadChains[Idx] = Load.getValue(1); 7946 7947 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 7948 DAG.getConstant(Stride, dl, 7949 BasePtr.getValueType())); 7950 } 7951 7952 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 7953 SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals); 7954 7955 if (LN->isIndexed()) { 7956 SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; 7957 return DAG.getMergeValues(RetOps, dl); 7958 } 7959 7960 SDValue RetOps[] = { Value, TF }; 7961 return DAG.getMergeValues(RetOps, dl); 7962 } 7963 7964 assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); 7965 assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); 7966 7967 // To lower v4i1 from a byte array, we load the byte elements of the 7968 // vector and then reuse the BUILD_VECTOR logic. 7969 7970 SDValue VectElmts[4], VectElmtChains[4]; 7971 for (unsigned i = 0; i < 4; ++i) { 7972 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 7973 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 7974 7975 VectElmts[i] = DAG.getExtLoad( 7976 ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx, 7977 LN->getPointerInfo().getWithOffset(i), MVT::i8, 7978 /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo()); 7979 VectElmtChains[i] = VectElmts[i].getValue(1); 7980 } 7981 7982 LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); 7983 SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts); 7984 7985 SDValue RVals[] = { Value, LoadChain }; 7986 return DAG.getMergeValues(RVals, dl); 7987 } 7988 7989 /// Lowering for QPX v4i1 stores 7990 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, 7991 SelectionDAG &DAG) const { 7992 SDLoc dl(Op); 7993 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode()); 7994 SDValue StoreChain = SN->getChain(); 7995 SDValue BasePtr = SN->getBasePtr(); 7996 SDValue Value = SN->getValue(); 7997 7998 if (Value.getValueType() == MVT::v4f64 || 7999 Value.getValueType() == MVT::v4f32) { 8000 EVT MemVT = SN->getMemoryVT(); 8001 unsigned Alignment = SN->getAlignment(); 8002 8003 // If this store is properly aligned, then it is legal. 8004 if (Alignment >= MemVT.getStoreSize()) 8005 return Op; 8006 8007 EVT ScalarVT = Value.getValueType().getScalarType(), 8008 ScalarMemVT = MemVT.getScalarType(); 8009 unsigned Stride = ScalarMemVT.getStoreSize(); 8010 8011 SDValue Stores[4]; 8012 for (unsigned Idx = 0; Idx < 4; ++Idx) { 8013 SDValue Ex = DAG.getNode( 8014 ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, 8015 DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); 8016 SDValue Store; 8017 if (ScalarVT != ScalarMemVT) 8018 Store = 8019 DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, 8020 SN->getPointerInfo().getWithOffset(Idx * Stride), 8021 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 8022 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 8023 else 8024 Store = DAG.getStore(StoreChain, dl, Ex, BasePtr, 8025 SN->getPointerInfo().getWithOffset(Idx * Stride), 8026 MinAlign(Alignment, Idx * Stride), 8027 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 8028 8029 if (Idx == 0 && SN->isIndexed()) { 8030 assert(SN->getAddressingMode() == ISD::PRE_INC && 8031 "Unknown addressing mode on vector store"); 8032 Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), 8033 SN->getAddressingMode()); 8034 } 8035 8036 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 8037 DAG.getConstant(Stride, dl, 8038 BasePtr.getValueType())); 8039 Stores[Idx] = Store; 8040 } 8041 8042 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 8043 8044 if (SN->isIndexed()) { 8045 SDValue RetOps[] = { TF, Stores[0].getValue(1) }; 8046 return DAG.getMergeValues(RetOps, dl); 8047 } 8048 8049 return TF; 8050 } 8051 8052 assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); 8053 assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); 8054 8055 // The values are now known to be -1 (false) or 1 (true). To convert this 8056 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 8057 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 8058 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 8059 8060 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 8061 // understand how to form the extending load. 8062 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 8063 8064 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 8065 8066 // Now convert to an integer and store. 8067 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 8068 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 8069 Value); 8070 8071 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 8072 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 8073 MachinePointerInfo PtrInfo = 8074 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 8075 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8076 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 8077 8078 SDValue Ops[] = {StoreChain, 8079 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 8080 Value, FIdx}; 8081 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 8082 8083 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 8084 dl, VTs, Ops, MVT::v4i32, PtrInfo); 8085 8086 // Move data into the byte array. 8087 SDValue Loads[4], LoadChains[4]; 8088 for (unsigned i = 0; i < 4; ++i) { 8089 unsigned Offset = 4*i; 8090 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 8091 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 8092 8093 Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, 8094 PtrInfo.getWithOffset(Offset)); 8095 LoadChains[i] = Loads[i].getValue(1); 8096 } 8097 8098 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 8099 8100 SDValue Stores[4]; 8101 for (unsigned i = 0; i < 4; ++i) { 8102 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 8103 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 8104 8105 Stores[i] = DAG.getTruncStore( 8106 StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), 8107 MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(), 8108 SN->getAAInfo()); 8109 } 8110 8111 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 8112 8113 return StoreChain; 8114 } 8115 8116 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 8117 SDLoc dl(Op); 8118 if (Op.getValueType() == MVT::v4i32) { 8119 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8120 8121 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 8122 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 8123 8124 SDValue RHSSwap = // = vrlw RHS, 16 8125 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 8126 8127 // Shrinkify inputs to v8i16. 8128 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 8129 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 8130 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 8131 8132 // Low parts multiplied together, generating 32-bit results (we ignore the 8133 // top parts). 8134 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 8135 LHS, RHS, DAG, dl, MVT::v4i32); 8136 8137 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 8138 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 8139 // Shift the high parts up 16 bits. 8140 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 8141 Neg16, DAG, dl); 8142 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 8143 } else if (Op.getValueType() == MVT::v8i16) { 8144 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8145 8146 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 8147 8148 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 8149 LHS, RHS, Zero, DAG, dl); 8150 } else if (Op.getValueType() == MVT::v16i8) { 8151 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8152 bool isLittleEndian = Subtarget.isLittleEndian(); 8153 8154 // Multiply the even 8-bit parts, producing 16-bit sums. 8155 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 8156 LHS, RHS, DAG, dl, MVT::v8i16); 8157 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 8158 8159 // Multiply the odd 8-bit parts, producing 16-bit sums. 8160 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 8161 LHS, RHS, DAG, dl, MVT::v8i16); 8162 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 8163 8164 // Merge the results together. Because vmuleub and vmuloub are 8165 // instructions with a big-endian bias, we must reverse the 8166 // element numbering and reverse the meaning of "odd" and "even" 8167 // when generating little endian code. 8168 int Ops[16]; 8169 for (unsigned i = 0; i != 8; ++i) { 8170 if (isLittleEndian) { 8171 Ops[i*2 ] = 2*i; 8172 Ops[i*2+1] = 2*i+16; 8173 } else { 8174 Ops[i*2 ] = 2*i+1; 8175 Ops[i*2+1] = 2*i+1+16; 8176 } 8177 } 8178 if (isLittleEndian) 8179 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 8180 else 8181 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 8182 } else { 8183 llvm_unreachable("Unknown mul to lower!"); 8184 } 8185 } 8186 8187 /// LowerOperation - Provide custom lowering hooks for some operations. 8188 /// 8189 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 8190 switch (Op.getOpcode()) { 8191 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 8192 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 8193 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 8194 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 8195 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 8196 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 8197 case ISD::SETCC: return LowerSETCC(Op, DAG); 8198 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 8199 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 8200 case ISD::VASTART: 8201 return LowerVASTART(Op, DAG); 8202 8203 case ISD::VAARG: 8204 return LowerVAARG(Op, DAG); 8205 8206 case ISD::VACOPY: 8207 return LowerVACOPY(Op, DAG); 8208 8209 case ISD::STACKRESTORE: 8210 return LowerSTACKRESTORE(Op, DAG); 8211 8212 case ISD::DYNAMIC_STACKALLOC: 8213 return LowerDYNAMIC_STACKALLOC(Op, DAG); 8214 8215 case ISD::GET_DYNAMIC_AREA_OFFSET: 8216 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); 8217 8218 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 8219 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 8220 8221 case ISD::LOAD: return LowerLOAD(Op, DAG); 8222 case ISD::STORE: return LowerSTORE(Op, DAG); 8223 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 8224 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 8225 case ISD::FP_TO_UINT: 8226 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 8227 SDLoc(Op)); 8228 case ISD::UINT_TO_FP: 8229 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 8230 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 8231 8232 // Lower 64-bit shifts. 8233 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 8234 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 8235 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 8236 8237 // Vector-related lowering. 8238 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 8239 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 8240 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 8241 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 8242 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 8243 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 8244 case ISD::MUL: return LowerMUL(Op, DAG); 8245 8246 // For counter-based loop handling. 8247 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 8248 8249 // Frame & Return address. 8250 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 8251 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 8252 } 8253 } 8254 8255 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 8256 SmallVectorImpl<SDValue>&Results, 8257 SelectionDAG &DAG) const { 8258 SDLoc dl(N); 8259 switch (N->getOpcode()) { 8260 default: 8261 llvm_unreachable("Do not know how to custom type legalize this operation!"); 8262 case ISD::READCYCLECOUNTER: { 8263 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 8264 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); 8265 8266 Results.push_back(RTB); 8267 Results.push_back(RTB.getValue(1)); 8268 Results.push_back(RTB.getValue(2)); 8269 break; 8270 } 8271 case ISD::INTRINSIC_W_CHAIN: { 8272 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 8273 Intrinsic::ppc_is_decremented_ctr_nonzero) 8274 break; 8275 8276 assert(N->getValueType(0) == MVT::i1 && 8277 "Unexpected result type for CTR decrement intrinsic"); 8278 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), 8279 N->getValueType(0)); 8280 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 8281 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 8282 N->getOperand(1)); 8283 8284 Results.push_back(NewInt); 8285 Results.push_back(NewInt.getValue(1)); 8286 break; 8287 } 8288 case ISD::VAARG: { 8289 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) 8290 return; 8291 8292 EVT VT = N->getValueType(0); 8293 8294 if (VT == MVT::i64) { 8295 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); 8296 8297 Results.push_back(NewNode); 8298 Results.push_back(NewNode.getValue(1)); 8299 } 8300 return; 8301 } 8302 case ISD::FP_ROUND_INREG: { 8303 assert(N->getValueType(0) == MVT::ppcf128); 8304 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 8305 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 8306 MVT::f64, N->getOperand(0), 8307 DAG.getIntPtrConstant(0, dl)); 8308 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 8309 MVT::f64, N->getOperand(0), 8310 DAG.getIntPtrConstant(1, dl)); 8311 8312 // Add the two halves of the long double in round-to-zero mode. 8313 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 8314 8315 // We know the low half is about to be thrown away, so just use something 8316 // convenient. 8317 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 8318 FPreg, FPreg)); 8319 return; 8320 } 8321 case ISD::FP_TO_SINT: 8322 case ISD::FP_TO_UINT: 8323 // LowerFP_TO_INT() can only handle f32 and f64. 8324 if (N->getOperand(0).getValueType() == MVT::ppcf128) 8325 return; 8326 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 8327 return; 8328 } 8329 } 8330 8331 //===----------------------------------------------------------------------===// 8332 // Other Lowering Code 8333 //===----------------------------------------------------------------------===// 8334 8335 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { 8336 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 8337 Function *Func = Intrinsic::getDeclaration(M, Id); 8338 return Builder.CreateCall(Func, {}); 8339 } 8340 8341 // The mappings for emitLeading/TrailingFence is taken from 8342 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 8343 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 8344 AtomicOrdering Ord, bool IsStore, 8345 bool IsLoad) const { 8346 if (Ord == AtomicOrdering::SequentiallyConsistent) 8347 return callIntrinsic(Builder, Intrinsic::ppc_sync); 8348 if (isReleaseOrStronger(Ord)) 8349 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 8350 return nullptr; 8351 } 8352 8353 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 8354 AtomicOrdering Ord, bool IsStore, 8355 bool IsLoad) const { 8356 if (IsLoad && isAcquireOrStronger(Ord)) 8357 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 8358 // FIXME: this is too conservative, a dependent branch + isync is enough. 8359 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and 8360 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 8361 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. 8362 return nullptr; 8363 } 8364 8365 MachineBasicBlock * 8366 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, 8367 unsigned AtomicSize, 8368 unsigned BinOpcode) const { 8369 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 8370 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8371 8372 auto LoadMnemonic = PPC::LDARX; 8373 auto StoreMnemonic = PPC::STDCX; 8374 switch (AtomicSize) { 8375 default: 8376 llvm_unreachable("Unexpected size of atomic entity"); 8377 case 1: 8378 LoadMnemonic = PPC::LBARX; 8379 StoreMnemonic = PPC::STBCX; 8380 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 8381 break; 8382 case 2: 8383 LoadMnemonic = PPC::LHARX; 8384 StoreMnemonic = PPC::STHCX; 8385 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 8386 break; 8387 case 4: 8388 LoadMnemonic = PPC::LWARX; 8389 StoreMnemonic = PPC::STWCX; 8390 break; 8391 case 8: 8392 LoadMnemonic = PPC::LDARX; 8393 StoreMnemonic = PPC::STDCX; 8394 break; 8395 } 8396 8397 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8398 MachineFunction *F = BB->getParent(); 8399 MachineFunction::iterator It = ++BB->getIterator(); 8400 8401 unsigned dest = MI.getOperand(0).getReg(); 8402 unsigned ptrA = MI.getOperand(1).getReg(); 8403 unsigned ptrB = MI.getOperand(2).getReg(); 8404 unsigned incr = MI.getOperand(3).getReg(); 8405 DebugLoc dl = MI.getDebugLoc(); 8406 8407 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 8408 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8409 F->insert(It, loopMBB); 8410 F->insert(It, exitMBB); 8411 exitMBB->splice(exitMBB->begin(), BB, 8412 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8413 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8414 8415 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8416 unsigned TmpReg = (!BinOpcode) ? incr : 8417 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass 8418 : &PPC::GPRCRegClass); 8419 8420 // thisMBB: 8421 // ... 8422 // fallthrough --> loopMBB 8423 BB->addSuccessor(loopMBB); 8424 8425 // loopMBB: 8426 // l[wd]arx dest, ptr 8427 // add r0, dest, incr 8428 // st[wd]cx. r0, ptr 8429 // bne- loopMBB 8430 // fallthrough --> exitMBB 8431 BB = loopMBB; 8432 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 8433 .addReg(ptrA).addReg(ptrB); 8434 if (BinOpcode) 8435 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 8436 BuildMI(BB, dl, TII->get(StoreMnemonic)) 8437 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 8438 BuildMI(BB, dl, TII->get(PPC::BCC)) 8439 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 8440 BB->addSuccessor(loopMBB); 8441 BB->addSuccessor(exitMBB); 8442 8443 // exitMBB: 8444 // ... 8445 BB = exitMBB; 8446 return BB; 8447 } 8448 8449 MachineBasicBlock * 8450 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, 8451 MachineBasicBlock *BB, 8452 bool is8bit, // operation 8453 unsigned BinOpcode) const { 8454 // If we support part-word atomic mnemonics, just use them 8455 if (Subtarget.hasPartwordAtomics()) 8456 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); 8457 8458 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 8459 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8460 // In 64 bit mode we have to use 64 bits for addresses, even though the 8461 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 8462 // registers without caring whether they're 32 or 64, but here we're 8463 // doing actual arithmetic on the addresses. 8464 bool is64bit = Subtarget.isPPC64(); 8465 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 8466 8467 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8468 MachineFunction *F = BB->getParent(); 8469 MachineFunction::iterator It = ++BB->getIterator(); 8470 8471 unsigned dest = MI.getOperand(0).getReg(); 8472 unsigned ptrA = MI.getOperand(1).getReg(); 8473 unsigned ptrB = MI.getOperand(2).getReg(); 8474 unsigned incr = MI.getOperand(3).getReg(); 8475 DebugLoc dl = MI.getDebugLoc(); 8476 8477 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 8478 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8479 F->insert(It, loopMBB); 8480 F->insert(It, exitMBB); 8481 exitMBB->splice(exitMBB->begin(), BB, 8482 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8483 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8484 8485 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8486 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 8487 : &PPC::GPRCRegClass; 8488 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 8489 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 8490 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 8491 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 8492 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 8493 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 8494 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 8495 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 8496 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 8497 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 8498 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 8499 unsigned Ptr1Reg; 8500 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 8501 8502 // thisMBB: 8503 // ... 8504 // fallthrough --> loopMBB 8505 BB->addSuccessor(loopMBB); 8506 8507 // The 4-byte load must be aligned, while a char or short may be 8508 // anywhere in the word. Hence all this nasty bookkeeping code. 8509 // add ptr1, ptrA, ptrB [copy if ptrA==0] 8510 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 8511 // xori shift, shift1, 24 [16] 8512 // rlwinm ptr, ptr1, 0, 0, 29 8513 // slw incr2, incr, shift 8514 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 8515 // slw mask, mask2, shift 8516 // loopMBB: 8517 // lwarx tmpDest, ptr 8518 // add tmp, tmpDest, incr2 8519 // andc tmp2, tmpDest, mask 8520 // and tmp3, tmp, mask 8521 // or tmp4, tmp3, tmp2 8522 // stwcx. tmp4, ptr 8523 // bne- loopMBB 8524 // fallthrough --> exitMBB 8525 // srw dest, tmpDest, shift 8526 if (ptrA != ZeroReg) { 8527 Ptr1Reg = RegInfo.createVirtualRegister(RC); 8528 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 8529 .addReg(ptrA).addReg(ptrB); 8530 } else { 8531 Ptr1Reg = ptrB; 8532 } 8533 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 8534 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 8535 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 8536 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 8537 if (is64bit) 8538 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 8539 .addReg(Ptr1Reg).addImm(0).addImm(61); 8540 else 8541 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 8542 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 8543 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 8544 .addReg(incr).addReg(ShiftReg); 8545 if (is8bit) 8546 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 8547 else { 8548 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 8549 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 8550 } 8551 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 8552 .addReg(Mask2Reg).addReg(ShiftReg); 8553 8554 BB = loopMBB; 8555 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 8556 .addReg(ZeroReg).addReg(PtrReg); 8557 if (BinOpcode) 8558 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 8559 .addReg(Incr2Reg).addReg(TmpDestReg); 8560 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 8561 .addReg(TmpDestReg).addReg(MaskReg); 8562 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 8563 .addReg(TmpReg).addReg(MaskReg); 8564 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 8565 .addReg(Tmp3Reg).addReg(Tmp2Reg); 8566 BuildMI(BB, dl, TII->get(PPC::STWCX)) 8567 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 8568 BuildMI(BB, dl, TII->get(PPC::BCC)) 8569 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 8570 BB->addSuccessor(loopMBB); 8571 BB->addSuccessor(exitMBB); 8572 8573 // exitMBB: 8574 // ... 8575 BB = exitMBB; 8576 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 8577 .addReg(ShiftReg); 8578 return BB; 8579 } 8580 8581 llvm::MachineBasicBlock * 8582 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, 8583 MachineBasicBlock *MBB) const { 8584 DebugLoc DL = MI.getDebugLoc(); 8585 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8586 8587 MachineFunction *MF = MBB->getParent(); 8588 MachineRegisterInfo &MRI = MF->getRegInfo(); 8589 8590 const BasicBlock *BB = MBB->getBasicBlock(); 8591 MachineFunction::iterator I = ++MBB->getIterator(); 8592 8593 // Memory Reference 8594 MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); 8595 MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); 8596 8597 unsigned DstReg = MI.getOperand(0).getReg(); 8598 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 8599 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 8600 unsigned mainDstReg = MRI.createVirtualRegister(RC); 8601 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 8602 8603 MVT PVT = getPointerTy(MF->getDataLayout()); 8604 assert((PVT == MVT::i64 || PVT == MVT::i32) && 8605 "Invalid Pointer Size!"); 8606 // For v = setjmp(buf), we generate 8607 // 8608 // thisMBB: 8609 // SjLjSetup mainMBB 8610 // bl mainMBB 8611 // v_restore = 1 8612 // b sinkMBB 8613 // 8614 // mainMBB: 8615 // buf[LabelOffset] = LR 8616 // v_main = 0 8617 // 8618 // sinkMBB: 8619 // v = phi(main, restore) 8620 // 8621 8622 MachineBasicBlock *thisMBB = MBB; 8623 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 8624 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 8625 MF->insert(I, mainMBB); 8626 MF->insert(I, sinkMBB); 8627 8628 MachineInstrBuilder MIB; 8629 8630 // Transfer the remainder of BB and its successor edges to sinkMBB. 8631 sinkMBB->splice(sinkMBB->begin(), MBB, 8632 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 8633 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 8634 8635 // Note that the structure of the jmp_buf used here is not compatible 8636 // with that used by libc, and is not designed to be. Specifically, it 8637 // stores only those 'reserved' registers that LLVM does not otherwise 8638 // understand how to spill. Also, by convention, by the time this 8639 // intrinsic is called, Clang has already stored the frame address in the 8640 // first slot of the buffer and stack address in the third. Following the 8641 // X86 target code, we'll store the jump address in the second slot. We also 8642 // need to save the TOC pointer (R2) to handle jumps between shared 8643 // libraries, and that will be stored in the fourth slot. The thread 8644 // identifier (R13) is not affected. 8645 8646 // thisMBB: 8647 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 8648 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 8649 const int64_t BPOffset = 4 * PVT.getStoreSize(); 8650 8651 // Prepare IP either in reg. 8652 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 8653 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 8654 unsigned BufReg = MI.getOperand(1).getReg(); 8655 8656 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 8657 setUsesTOCBasePtr(*MBB->getParent()); 8658 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 8659 .addReg(PPC::X2) 8660 .addImm(TOCOffset) 8661 .addReg(BufReg); 8662 MIB.setMemRefs(MMOBegin, MMOEnd); 8663 } 8664 8665 // Naked functions never have a base pointer, and so we use r1. For all 8666 // other functions, this decision must be delayed until during PEI. 8667 unsigned BaseReg; 8668 if (MF->getFunction()->hasFnAttribute(Attribute::Naked)) 8669 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 8670 else 8671 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 8672 8673 MIB = BuildMI(*thisMBB, MI, DL, 8674 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 8675 .addReg(BaseReg) 8676 .addImm(BPOffset) 8677 .addReg(BufReg); 8678 MIB.setMemRefs(MMOBegin, MMOEnd); 8679 8680 // Setup 8681 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 8682 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 8683 MIB.addRegMask(TRI->getNoPreservedMask()); 8684 8685 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 8686 8687 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 8688 .addMBB(mainMBB); 8689 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 8690 8691 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); 8692 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); 8693 8694 // mainMBB: 8695 // mainDstReg = 0 8696 MIB = 8697 BuildMI(mainMBB, DL, 8698 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 8699 8700 // Store IP 8701 if (Subtarget.isPPC64()) { 8702 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 8703 .addReg(LabelReg) 8704 .addImm(LabelOffset) 8705 .addReg(BufReg); 8706 } else { 8707 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 8708 .addReg(LabelReg) 8709 .addImm(LabelOffset) 8710 .addReg(BufReg); 8711 } 8712 8713 MIB.setMemRefs(MMOBegin, MMOEnd); 8714 8715 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 8716 mainMBB->addSuccessor(sinkMBB); 8717 8718 // sinkMBB: 8719 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 8720 TII->get(PPC::PHI), DstReg) 8721 .addReg(mainDstReg).addMBB(mainMBB) 8722 .addReg(restoreDstReg).addMBB(thisMBB); 8723 8724 MI.eraseFromParent(); 8725 return sinkMBB; 8726 } 8727 8728 MachineBasicBlock * 8729 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, 8730 MachineBasicBlock *MBB) const { 8731 DebugLoc DL = MI.getDebugLoc(); 8732 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8733 8734 MachineFunction *MF = MBB->getParent(); 8735 MachineRegisterInfo &MRI = MF->getRegInfo(); 8736 8737 // Memory Reference 8738 MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); 8739 MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); 8740 8741 MVT PVT = getPointerTy(MF->getDataLayout()); 8742 assert((PVT == MVT::i64 || PVT == MVT::i32) && 8743 "Invalid Pointer Size!"); 8744 8745 const TargetRegisterClass *RC = 8746 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 8747 unsigned Tmp = MRI.createVirtualRegister(RC); 8748 // Since FP is only updated here but NOT referenced, it's treated as GPR. 8749 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 8750 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 8751 unsigned BP = 8752 (PVT == MVT::i64) 8753 ? PPC::X30 8754 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29 8755 : PPC::R30); 8756 8757 MachineInstrBuilder MIB; 8758 8759 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 8760 const int64_t SPOffset = 2 * PVT.getStoreSize(); 8761 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 8762 const int64_t BPOffset = 4 * PVT.getStoreSize(); 8763 8764 unsigned BufReg = MI.getOperand(0).getReg(); 8765 8766 // Reload FP (the jumped-to function may not have had a 8767 // frame pointer, and if so, then its r31 will be restored 8768 // as necessary). 8769 if (PVT == MVT::i64) { 8770 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 8771 .addImm(0) 8772 .addReg(BufReg); 8773 } else { 8774 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 8775 .addImm(0) 8776 .addReg(BufReg); 8777 } 8778 MIB.setMemRefs(MMOBegin, MMOEnd); 8779 8780 // Reload IP 8781 if (PVT == MVT::i64) { 8782 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 8783 .addImm(LabelOffset) 8784 .addReg(BufReg); 8785 } else { 8786 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 8787 .addImm(LabelOffset) 8788 .addReg(BufReg); 8789 } 8790 MIB.setMemRefs(MMOBegin, MMOEnd); 8791 8792 // Reload SP 8793 if (PVT == MVT::i64) { 8794 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 8795 .addImm(SPOffset) 8796 .addReg(BufReg); 8797 } else { 8798 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 8799 .addImm(SPOffset) 8800 .addReg(BufReg); 8801 } 8802 MIB.setMemRefs(MMOBegin, MMOEnd); 8803 8804 // Reload BP 8805 if (PVT == MVT::i64) { 8806 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 8807 .addImm(BPOffset) 8808 .addReg(BufReg); 8809 } else { 8810 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 8811 .addImm(BPOffset) 8812 .addReg(BufReg); 8813 } 8814 MIB.setMemRefs(MMOBegin, MMOEnd); 8815 8816 // Reload TOC 8817 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 8818 setUsesTOCBasePtr(*MBB->getParent()); 8819 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 8820 .addImm(TOCOffset) 8821 .addReg(BufReg); 8822 8823 MIB.setMemRefs(MMOBegin, MMOEnd); 8824 } 8825 8826 // Jump 8827 BuildMI(*MBB, MI, DL, 8828 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 8829 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 8830 8831 MI.eraseFromParent(); 8832 return MBB; 8833 } 8834 8835 MachineBasicBlock * 8836 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 8837 MachineBasicBlock *BB) const { 8838 if (MI.getOpcode() == TargetOpcode::STACKMAP || 8839 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 8840 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && 8841 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 8842 // Call lowering should have added an r2 operand to indicate a dependence 8843 // on the TOC base pointer value. It can't however, because there is no 8844 // way to mark the dependence as implicit there, and so the stackmap code 8845 // will confuse it with a regular operand. Instead, add the dependence 8846 // here. 8847 setUsesTOCBasePtr(*BB->getParent()); 8848 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); 8849 } 8850 8851 return emitPatchPoint(MI, BB); 8852 } 8853 8854 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || 8855 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) { 8856 return emitEHSjLjSetJmp(MI, BB); 8857 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || 8858 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) { 8859 return emitEHSjLjLongJmp(MI, BB); 8860 } 8861 8862 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8863 8864 // To "insert" these instructions we actually have to insert their 8865 // control-flow patterns. 8866 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8867 MachineFunction::iterator It = ++BB->getIterator(); 8868 8869 MachineFunction *F = BB->getParent(); 8870 8871 if (Subtarget.hasISEL() && 8872 (MI.getOpcode() == PPC::SELECT_CC_I4 || 8873 MI.getOpcode() == PPC::SELECT_CC_I8 || 8874 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) { 8875 SmallVector<MachineOperand, 2> Cond; 8876 if (MI.getOpcode() == PPC::SELECT_CC_I4 || 8877 MI.getOpcode() == PPC::SELECT_CC_I8) 8878 Cond.push_back(MI.getOperand(4)); 8879 else 8880 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 8881 Cond.push_back(MI.getOperand(1)); 8882 8883 DebugLoc dl = MI.getDebugLoc(); 8884 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, 8885 MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); 8886 } else if (MI.getOpcode() == PPC::SELECT_CC_I4 || 8887 MI.getOpcode() == PPC::SELECT_CC_I8 || 8888 MI.getOpcode() == PPC::SELECT_CC_F4 || 8889 MI.getOpcode() == PPC::SELECT_CC_F8 || 8890 MI.getOpcode() == PPC::SELECT_CC_QFRC || 8891 MI.getOpcode() == PPC::SELECT_CC_QSRC || 8892 MI.getOpcode() == PPC::SELECT_CC_QBRC || 8893 MI.getOpcode() == PPC::SELECT_CC_VRRC || 8894 MI.getOpcode() == PPC::SELECT_CC_VSFRC || 8895 MI.getOpcode() == PPC::SELECT_CC_VSSRC || 8896 MI.getOpcode() == PPC::SELECT_CC_VSRC || 8897 MI.getOpcode() == PPC::SELECT_I4 || 8898 MI.getOpcode() == PPC::SELECT_I8 || 8899 MI.getOpcode() == PPC::SELECT_F4 || 8900 MI.getOpcode() == PPC::SELECT_F8 || 8901 MI.getOpcode() == PPC::SELECT_QFRC || 8902 MI.getOpcode() == PPC::SELECT_QSRC || 8903 MI.getOpcode() == PPC::SELECT_QBRC || 8904 MI.getOpcode() == PPC::SELECT_VRRC || 8905 MI.getOpcode() == PPC::SELECT_VSFRC || 8906 MI.getOpcode() == PPC::SELECT_VSSRC || 8907 MI.getOpcode() == PPC::SELECT_VSRC) { 8908 // The incoming instruction knows the destination vreg to set, the 8909 // condition code register to branch on, the true/false values to 8910 // select between, and a branch opcode to use. 8911 8912 // thisMBB: 8913 // ... 8914 // TrueVal = ... 8915 // cmpTY ccX, r1, r2 8916 // bCC copy1MBB 8917 // fallthrough --> copy0MBB 8918 MachineBasicBlock *thisMBB = BB; 8919 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 8920 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 8921 DebugLoc dl = MI.getDebugLoc(); 8922 F->insert(It, copy0MBB); 8923 F->insert(It, sinkMBB); 8924 8925 // Transfer the remainder of BB and its successor edges to sinkMBB. 8926 sinkMBB->splice(sinkMBB->begin(), BB, 8927 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8928 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 8929 8930 // Next, add the true and fallthrough blocks as its successors. 8931 BB->addSuccessor(copy0MBB); 8932 BB->addSuccessor(sinkMBB); 8933 8934 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || 8935 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || 8936 MI.getOpcode() == PPC::SELECT_QFRC || 8937 MI.getOpcode() == PPC::SELECT_QSRC || 8938 MI.getOpcode() == PPC::SELECT_QBRC || 8939 MI.getOpcode() == PPC::SELECT_VRRC || 8940 MI.getOpcode() == PPC::SELECT_VSFRC || 8941 MI.getOpcode() == PPC::SELECT_VSSRC || 8942 MI.getOpcode() == PPC::SELECT_VSRC) { 8943 BuildMI(BB, dl, TII->get(PPC::BC)) 8944 .addReg(MI.getOperand(1).getReg()) 8945 .addMBB(sinkMBB); 8946 } else { 8947 unsigned SelectPred = MI.getOperand(4).getImm(); 8948 BuildMI(BB, dl, TII->get(PPC::BCC)) 8949 .addImm(SelectPred) 8950 .addReg(MI.getOperand(1).getReg()) 8951 .addMBB(sinkMBB); 8952 } 8953 8954 // copy0MBB: 8955 // %FalseValue = ... 8956 // # fallthrough to sinkMBB 8957 BB = copy0MBB; 8958 8959 // Update machine-CFG edges 8960 BB->addSuccessor(sinkMBB); 8961 8962 // sinkMBB: 8963 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 8964 // ... 8965 BB = sinkMBB; 8966 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) 8967 .addReg(MI.getOperand(3).getReg()) 8968 .addMBB(copy0MBB) 8969 .addReg(MI.getOperand(2).getReg()) 8970 .addMBB(thisMBB); 8971 } else if (MI.getOpcode() == PPC::ReadTB) { 8972 // To read the 64-bit time-base register on a 32-bit target, we read the 8973 // two halves. Should the counter have wrapped while it was being read, we 8974 // need to try again. 8975 // ... 8976 // readLoop: 8977 // mfspr Rx,TBU # load from TBU 8978 // mfspr Ry,TB # load from TB 8979 // mfspr Rz,TBU # load from TBU 8980 // cmpw crX,Rx,Rz # check if 'old'='new' 8981 // bne readLoop # branch if they're not equal 8982 // ... 8983 8984 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); 8985 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 8986 DebugLoc dl = MI.getDebugLoc(); 8987 F->insert(It, readMBB); 8988 F->insert(It, sinkMBB); 8989 8990 // Transfer the remainder of BB and its successor edges to sinkMBB. 8991 sinkMBB->splice(sinkMBB->begin(), BB, 8992 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8993 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 8994 8995 BB->addSuccessor(readMBB); 8996 BB = readMBB; 8997 8998 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8999 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 9000 unsigned LoReg = MI.getOperand(0).getReg(); 9001 unsigned HiReg = MI.getOperand(1).getReg(); 9002 9003 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); 9004 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); 9005 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); 9006 9007 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 9008 9009 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) 9010 .addReg(HiReg).addReg(ReadAgainReg); 9011 BuildMI(BB, dl, TII->get(PPC::BCC)) 9012 .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); 9013 9014 BB->addSuccessor(readMBB); 9015 BB->addSuccessor(sinkMBB); 9016 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 9017 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 9018 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 9019 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 9020 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 9021 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); 9022 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 9023 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); 9024 9025 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 9026 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 9027 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 9028 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 9029 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 9030 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); 9031 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 9032 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); 9033 9034 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 9035 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 9036 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 9037 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 9038 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 9039 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); 9040 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 9041 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); 9042 9043 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 9044 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 9045 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 9046 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 9047 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 9048 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); 9049 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 9050 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); 9051 9052 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 9053 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 9054 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 9055 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 9056 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 9057 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); 9058 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 9059 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); 9060 9061 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 9062 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 9063 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 9064 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 9065 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 9066 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); 9067 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 9068 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); 9069 9070 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) 9071 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 9072 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) 9073 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 9074 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32) 9075 BB = EmitAtomicBinary(MI, BB, 4, 0); 9076 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) 9077 BB = EmitAtomicBinary(MI, BB, 8, 0); 9078 9079 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 9080 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || 9081 (Subtarget.hasPartwordAtomics() && 9082 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || 9083 (Subtarget.hasPartwordAtomics() && 9084 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { 9085 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 9086 9087 auto LoadMnemonic = PPC::LDARX; 9088 auto StoreMnemonic = PPC::STDCX; 9089 switch (MI.getOpcode()) { 9090 default: 9091 llvm_unreachable("Compare and swap of unknown size"); 9092 case PPC::ATOMIC_CMP_SWAP_I8: 9093 LoadMnemonic = PPC::LBARX; 9094 StoreMnemonic = PPC::STBCX; 9095 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 9096 break; 9097 case PPC::ATOMIC_CMP_SWAP_I16: 9098 LoadMnemonic = PPC::LHARX; 9099 StoreMnemonic = PPC::STHCX; 9100 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 9101 break; 9102 case PPC::ATOMIC_CMP_SWAP_I32: 9103 LoadMnemonic = PPC::LWARX; 9104 StoreMnemonic = PPC::STWCX; 9105 break; 9106 case PPC::ATOMIC_CMP_SWAP_I64: 9107 LoadMnemonic = PPC::LDARX; 9108 StoreMnemonic = PPC::STDCX; 9109 break; 9110 } 9111 unsigned dest = MI.getOperand(0).getReg(); 9112 unsigned ptrA = MI.getOperand(1).getReg(); 9113 unsigned ptrB = MI.getOperand(2).getReg(); 9114 unsigned oldval = MI.getOperand(3).getReg(); 9115 unsigned newval = MI.getOperand(4).getReg(); 9116 DebugLoc dl = MI.getDebugLoc(); 9117 9118 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 9119 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 9120 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 9121 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9122 F->insert(It, loop1MBB); 9123 F->insert(It, loop2MBB); 9124 F->insert(It, midMBB); 9125 F->insert(It, exitMBB); 9126 exitMBB->splice(exitMBB->begin(), BB, 9127 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9128 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9129 9130 // thisMBB: 9131 // ... 9132 // fallthrough --> loopMBB 9133 BB->addSuccessor(loop1MBB); 9134 9135 // loop1MBB: 9136 // l[bhwd]arx dest, ptr 9137 // cmp[wd] dest, oldval 9138 // bne- midMBB 9139 // loop2MBB: 9140 // st[bhwd]cx. newval, ptr 9141 // bne- loopMBB 9142 // b exitBB 9143 // midMBB: 9144 // st[bhwd]cx. dest, ptr 9145 // exitBB: 9146 BB = loop1MBB; 9147 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 9148 .addReg(ptrA).addReg(ptrB); 9149 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 9150 .addReg(oldval).addReg(dest); 9151 BuildMI(BB, dl, TII->get(PPC::BCC)) 9152 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 9153 BB->addSuccessor(loop2MBB); 9154 BB->addSuccessor(midMBB); 9155 9156 BB = loop2MBB; 9157 BuildMI(BB, dl, TII->get(StoreMnemonic)) 9158 .addReg(newval).addReg(ptrA).addReg(ptrB); 9159 BuildMI(BB, dl, TII->get(PPC::BCC)) 9160 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 9161 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 9162 BB->addSuccessor(loop1MBB); 9163 BB->addSuccessor(exitMBB); 9164 9165 BB = midMBB; 9166 BuildMI(BB, dl, TII->get(StoreMnemonic)) 9167 .addReg(dest).addReg(ptrA).addReg(ptrB); 9168 BB->addSuccessor(exitMBB); 9169 9170 // exitMBB: 9171 // ... 9172 BB = exitMBB; 9173 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 9174 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 9175 // We must use 64-bit registers for addresses when targeting 64-bit, 9176 // since we're actually doing arithmetic on them. Other registers 9177 // can be 32-bit. 9178 bool is64bit = Subtarget.isPPC64(); 9179 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 9180 9181 unsigned dest = MI.getOperand(0).getReg(); 9182 unsigned ptrA = MI.getOperand(1).getReg(); 9183 unsigned ptrB = MI.getOperand(2).getReg(); 9184 unsigned oldval = MI.getOperand(3).getReg(); 9185 unsigned newval = MI.getOperand(4).getReg(); 9186 DebugLoc dl = MI.getDebugLoc(); 9187 9188 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 9189 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 9190 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 9191 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9192 F->insert(It, loop1MBB); 9193 F->insert(It, loop2MBB); 9194 F->insert(It, midMBB); 9195 F->insert(It, exitMBB); 9196 exitMBB->splice(exitMBB->begin(), BB, 9197 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9198 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9199 9200 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9201 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 9202 : &PPC::GPRCRegClass; 9203 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 9204 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 9205 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 9206 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 9207 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 9208 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 9209 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 9210 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 9211 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 9212 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 9213 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 9214 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 9215 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 9216 unsigned Ptr1Reg; 9217 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 9218 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 9219 // thisMBB: 9220 // ... 9221 // fallthrough --> loopMBB 9222 BB->addSuccessor(loop1MBB); 9223 9224 // The 4-byte load must be aligned, while a char or short may be 9225 // anywhere in the word. Hence all this nasty bookkeeping code. 9226 // add ptr1, ptrA, ptrB [copy if ptrA==0] 9227 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 9228 // xori shift, shift1, 24 [16] 9229 // rlwinm ptr, ptr1, 0, 0, 29 9230 // slw newval2, newval, shift 9231 // slw oldval2, oldval,shift 9232 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 9233 // slw mask, mask2, shift 9234 // and newval3, newval2, mask 9235 // and oldval3, oldval2, mask 9236 // loop1MBB: 9237 // lwarx tmpDest, ptr 9238 // and tmp, tmpDest, mask 9239 // cmpw tmp, oldval3 9240 // bne- midMBB 9241 // loop2MBB: 9242 // andc tmp2, tmpDest, mask 9243 // or tmp4, tmp2, newval3 9244 // stwcx. tmp4, ptr 9245 // bne- loop1MBB 9246 // b exitBB 9247 // midMBB: 9248 // stwcx. tmpDest, ptr 9249 // exitBB: 9250 // srw dest, tmpDest, shift 9251 if (ptrA != ZeroReg) { 9252 Ptr1Reg = RegInfo.createVirtualRegister(RC); 9253 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 9254 .addReg(ptrA).addReg(ptrB); 9255 } else { 9256 Ptr1Reg = ptrB; 9257 } 9258 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 9259 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 9260 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 9261 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 9262 if (is64bit) 9263 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 9264 .addReg(Ptr1Reg).addImm(0).addImm(61); 9265 else 9266 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 9267 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 9268 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 9269 .addReg(newval).addReg(ShiftReg); 9270 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 9271 .addReg(oldval).addReg(ShiftReg); 9272 if (is8bit) 9273 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 9274 else { 9275 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 9276 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 9277 .addReg(Mask3Reg).addImm(65535); 9278 } 9279 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 9280 .addReg(Mask2Reg).addReg(ShiftReg); 9281 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 9282 .addReg(NewVal2Reg).addReg(MaskReg); 9283 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 9284 .addReg(OldVal2Reg).addReg(MaskReg); 9285 9286 BB = loop1MBB; 9287 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 9288 .addReg(ZeroReg).addReg(PtrReg); 9289 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 9290 .addReg(TmpDestReg).addReg(MaskReg); 9291 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 9292 .addReg(TmpReg).addReg(OldVal3Reg); 9293 BuildMI(BB, dl, TII->get(PPC::BCC)) 9294 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 9295 BB->addSuccessor(loop2MBB); 9296 BB->addSuccessor(midMBB); 9297 9298 BB = loop2MBB; 9299 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 9300 .addReg(TmpDestReg).addReg(MaskReg); 9301 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 9302 .addReg(Tmp2Reg).addReg(NewVal3Reg); 9303 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 9304 .addReg(ZeroReg).addReg(PtrReg); 9305 BuildMI(BB, dl, TII->get(PPC::BCC)) 9306 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 9307 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 9308 BB->addSuccessor(loop1MBB); 9309 BB->addSuccessor(exitMBB); 9310 9311 BB = midMBB; 9312 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 9313 .addReg(ZeroReg).addReg(PtrReg); 9314 BB->addSuccessor(exitMBB); 9315 9316 // exitMBB: 9317 // ... 9318 BB = exitMBB; 9319 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 9320 .addReg(ShiftReg); 9321 } else if (MI.getOpcode() == PPC::FADDrtz) { 9322 // This pseudo performs an FADD with rounding mode temporarily forced 9323 // to round-to-zero. We emit this via custom inserter since the FPSCR 9324 // is not modeled at the SelectionDAG level. 9325 unsigned Dest = MI.getOperand(0).getReg(); 9326 unsigned Src1 = MI.getOperand(1).getReg(); 9327 unsigned Src2 = MI.getOperand(2).getReg(); 9328 DebugLoc dl = MI.getDebugLoc(); 9329 9330 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9331 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 9332 9333 // Save FPSCR value. 9334 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 9335 9336 // Set rounding mode to round-to-zero. 9337 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 9338 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 9339 9340 // Perform addition. 9341 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 9342 9343 // Restore FPSCR value. 9344 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); 9345 } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 9346 MI.getOpcode() == PPC::ANDIo_1_GT_BIT || 9347 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 9348 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) { 9349 unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 9350 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) 9351 ? PPC::ANDIo8 9352 : PPC::ANDIo; 9353 bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 9354 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); 9355 9356 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9357 unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? 9358 &PPC::GPRCRegClass : 9359 &PPC::G8RCRegClass); 9360 9361 DebugLoc dl = MI.getDebugLoc(); 9362 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 9363 .addReg(MI.getOperand(1).getReg()) 9364 .addImm(1); 9365 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 9366 MI.getOperand(0).getReg()) 9367 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 9368 } else if (MI.getOpcode() == PPC::TCHECK_RET) { 9369 DebugLoc Dl = MI.getDebugLoc(); 9370 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9371 unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 9372 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); 9373 return BB; 9374 } else { 9375 llvm_unreachable("Unexpected instr type to insert"); 9376 } 9377 9378 MI.eraseFromParent(); // The pseudo instruction is gone now. 9379 return BB; 9380 } 9381 9382 //===----------------------------------------------------------------------===// 9383 // Target Optimization Hooks 9384 //===----------------------------------------------------------------------===// 9385 9386 static std::string getRecipOp(const char *Base, EVT VT) { 9387 std::string RecipOp(Base); 9388 if (VT.getScalarType() == MVT::f64) 9389 RecipOp += "d"; 9390 else 9391 RecipOp += "f"; 9392 9393 if (VT.isVector()) 9394 RecipOp = "vec-" + RecipOp; 9395 9396 return RecipOp; 9397 } 9398 9399 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, 9400 DAGCombinerInfo &DCI, 9401 unsigned &RefinementSteps, 9402 bool &UseOneConstNR) const { 9403 EVT VT = Operand.getValueType(); 9404 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 9405 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 9406 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 9407 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 9408 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 9409 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 9410 TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; 9411 std::string RecipOp = getRecipOp("sqrt", VT); 9412 if (!Recips.isEnabled(RecipOp)) 9413 return SDValue(); 9414 9415 RefinementSteps = Recips.getRefinementSteps(RecipOp); 9416 UseOneConstNR = true; 9417 return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); 9418 } 9419 return SDValue(); 9420 } 9421 9422 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, 9423 DAGCombinerInfo &DCI, 9424 unsigned &RefinementSteps) const { 9425 EVT VT = Operand.getValueType(); 9426 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 9427 (VT == MVT::f64 && Subtarget.hasFRE()) || 9428 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 9429 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 9430 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 9431 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 9432 TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; 9433 std::string RecipOp = getRecipOp("div", VT); 9434 if (!Recips.isEnabled(RecipOp)) 9435 return SDValue(); 9436 9437 RefinementSteps = Recips.getRefinementSteps(RecipOp); 9438 return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); 9439 } 9440 return SDValue(); 9441 } 9442 9443 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { 9444 // Note: This functionality is used only when unsafe-fp-math is enabled, and 9445 // on cores with reciprocal estimates (which are used when unsafe-fp-math is 9446 // enabled for division), this functionality is redundant with the default 9447 // combiner logic (once the division -> reciprocal/multiply transformation 9448 // has taken place). As a result, this matters more for older cores than for 9449 // newer ones. 9450 9451 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 9452 // reciprocal if there are two or more FDIVs (for embedded cores with only 9453 // one FP pipeline) for three or more FDIVs (for generic OOO cores). 9454 switch (Subtarget.getDarwinDirective()) { 9455 default: 9456 return 3; 9457 case PPC::DIR_440: 9458 case PPC::DIR_A2: 9459 case PPC::DIR_E500mc: 9460 case PPC::DIR_E5500: 9461 return 2; 9462 } 9463 } 9464 9465 // isConsecutiveLSLoc needs to work even if all adds have not yet been 9466 // collapsed, and so we need to look through chains of them. 9467 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, 9468 int64_t& Offset, SelectionDAG &DAG) { 9469 if (DAG.isBaseWithConstantOffset(Loc)) { 9470 Base = Loc.getOperand(0); 9471 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); 9472 9473 // The base might itself be a base plus an offset, and if so, accumulate 9474 // that as well. 9475 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); 9476 } 9477 } 9478 9479 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, 9480 unsigned Bytes, int Dist, 9481 SelectionDAG &DAG) { 9482 if (VT.getSizeInBits() / 8 != Bytes) 9483 return false; 9484 9485 SDValue BaseLoc = Base->getBasePtr(); 9486 if (Loc.getOpcode() == ISD::FrameIndex) { 9487 if (BaseLoc.getOpcode() != ISD::FrameIndex) 9488 return false; 9489 const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 9490 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 9491 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 9492 int FS = MFI->getObjectSize(FI); 9493 int BFS = MFI->getObjectSize(BFI); 9494 if (FS != BFS || FS != (int)Bytes) return false; 9495 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); 9496 } 9497 9498 SDValue Base1 = Loc, Base2 = BaseLoc; 9499 int64_t Offset1 = 0, Offset2 = 0; 9500 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); 9501 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); 9502 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) 9503 return true; 9504 9505 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9506 const GlobalValue *GV1 = nullptr; 9507 const GlobalValue *GV2 = nullptr; 9508 Offset1 = 0; 9509 Offset2 = 0; 9510 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 9511 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 9512 if (isGA1 && isGA2 && GV1 == GV2) 9513 return Offset1 == (Offset2 + Dist*Bytes); 9514 return false; 9515 } 9516 9517 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 9518 // not enforce equality of the chain operands. 9519 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, 9520 unsigned Bytes, int Dist, 9521 SelectionDAG &DAG) { 9522 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { 9523 EVT VT = LS->getMemoryVT(); 9524 SDValue Loc = LS->getBasePtr(); 9525 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); 9526 } 9527 9528 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 9529 EVT VT; 9530 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9531 default: return false; 9532 case Intrinsic::ppc_qpx_qvlfd: 9533 case Intrinsic::ppc_qpx_qvlfda: 9534 VT = MVT::v4f64; 9535 break; 9536 case Intrinsic::ppc_qpx_qvlfs: 9537 case Intrinsic::ppc_qpx_qvlfsa: 9538 VT = MVT::v4f32; 9539 break; 9540 case Intrinsic::ppc_qpx_qvlfcd: 9541 case Intrinsic::ppc_qpx_qvlfcda: 9542 VT = MVT::v2f64; 9543 break; 9544 case Intrinsic::ppc_qpx_qvlfcs: 9545 case Intrinsic::ppc_qpx_qvlfcsa: 9546 VT = MVT::v2f32; 9547 break; 9548 case Intrinsic::ppc_qpx_qvlfiwa: 9549 case Intrinsic::ppc_qpx_qvlfiwz: 9550 case Intrinsic::ppc_altivec_lvx: 9551 case Intrinsic::ppc_altivec_lvxl: 9552 case Intrinsic::ppc_vsx_lxvw4x: 9553 VT = MVT::v4i32; 9554 break; 9555 case Intrinsic::ppc_vsx_lxvd2x: 9556 VT = MVT::v2f64; 9557 break; 9558 case Intrinsic::ppc_altivec_lvebx: 9559 VT = MVT::i8; 9560 break; 9561 case Intrinsic::ppc_altivec_lvehx: 9562 VT = MVT::i16; 9563 break; 9564 case Intrinsic::ppc_altivec_lvewx: 9565 VT = MVT::i32; 9566 break; 9567 } 9568 9569 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); 9570 } 9571 9572 if (N->getOpcode() == ISD::INTRINSIC_VOID) { 9573 EVT VT; 9574 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9575 default: return false; 9576 case Intrinsic::ppc_qpx_qvstfd: 9577 case Intrinsic::ppc_qpx_qvstfda: 9578 VT = MVT::v4f64; 9579 break; 9580 case Intrinsic::ppc_qpx_qvstfs: 9581 case Intrinsic::ppc_qpx_qvstfsa: 9582 VT = MVT::v4f32; 9583 break; 9584 case Intrinsic::ppc_qpx_qvstfcd: 9585 case Intrinsic::ppc_qpx_qvstfcda: 9586 VT = MVT::v2f64; 9587 break; 9588 case Intrinsic::ppc_qpx_qvstfcs: 9589 case Intrinsic::ppc_qpx_qvstfcsa: 9590 VT = MVT::v2f32; 9591 break; 9592 case Intrinsic::ppc_qpx_qvstfiw: 9593 case Intrinsic::ppc_qpx_qvstfiwa: 9594 case Intrinsic::ppc_altivec_stvx: 9595 case Intrinsic::ppc_altivec_stvxl: 9596 case Intrinsic::ppc_vsx_stxvw4x: 9597 VT = MVT::v4i32; 9598 break; 9599 case Intrinsic::ppc_vsx_stxvd2x: 9600 VT = MVT::v2f64; 9601 break; 9602 case Intrinsic::ppc_altivec_stvebx: 9603 VT = MVT::i8; 9604 break; 9605 case Intrinsic::ppc_altivec_stvehx: 9606 VT = MVT::i16; 9607 break; 9608 case Intrinsic::ppc_altivec_stvewx: 9609 VT = MVT::i32; 9610 break; 9611 } 9612 9613 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); 9614 } 9615 9616 return false; 9617 } 9618 9619 // Return true is there is a nearyby consecutive load to the one provided 9620 // (regardless of alignment). We search up and down the chain, looking though 9621 // token factors and other loads (but nothing else). As a result, a true result 9622 // indicates that it is safe to create a new consecutive load adjacent to the 9623 // load provided. 9624 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 9625 SDValue Chain = LD->getChain(); 9626 EVT VT = LD->getMemoryVT(); 9627 9628 SmallSet<SDNode *, 16> LoadRoots; 9629 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 9630 SmallSet<SDNode *, 16> Visited; 9631 9632 // First, search up the chain, branching to follow all token-factor operands. 9633 // If we find a consecutive load, then we're done, otherwise, record all 9634 // nodes just above the top-level loads and token factors. 9635 while (!Queue.empty()) { 9636 SDNode *ChainNext = Queue.pop_back_val(); 9637 if (!Visited.insert(ChainNext).second) 9638 continue; 9639 9640 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { 9641 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 9642 return true; 9643 9644 if (!Visited.count(ChainLD->getChain().getNode())) 9645 Queue.push_back(ChainLD->getChain().getNode()); 9646 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 9647 for (const SDUse &O : ChainNext->ops()) 9648 if (!Visited.count(O.getNode())) 9649 Queue.push_back(O.getNode()); 9650 } else 9651 LoadRoots.insert(ChainNext); 9652 } 9653 9654 // Second, search down the chain, starting from the top-level nodes recorded 9655 // in the first phase. These top-level nodes are the nodes just above all 9656 // loads and token factors. Starting with their uses, recursively look though 9657 // all loads (just the chain uses) and token factors to find a consecutive 9658 // load. 9659 Visited.clear(); 9660 Queue.clear(); 9661 9662 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 9663 IE = LoadRoots.end(); I != IE; ++I) { 9664 Queue.push_back(*I); 9665 9666 while (!Queue.empty()) { 9667 SDNode *LoadRoot = Queue.pop_back_val(); 9668 if (!Visited.insert(LoadRoot).second) 9669 continue; 9670 9671 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) 9672 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 9673 return true; 9674 9675 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 9676 UE = LoadRoot->use_end(); UI != UE; ++UI) 9677 if (((isa<MemSDNode>(*UI) && 9678 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || 9679 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 9680 Queue.push_back(*UI); 9681 } 9682 } 9683 9684 return false; 9685 } 9686 9687 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 9688 DAGCombinerInfo &DCI) const { 9689 SelectionDAG &DAG = DCI.DAG; 9690 SDLoc dl(N); 9691 9692 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); 9693 // If we're tracking CR bits, we need to be careful that we don't have: 9694 // trunc(binary-ops(zext(x), zext(y))) 9695 // or 9696 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 9697 // such that we're unnecessarily moving things into GPRs when it would be 9698 // better to keep them in CR bits. 9699 9700 // Note that trunc here can be an actual i1 trunc, or can be the effective 9701 // truncation that comes from a setcc or select_cc. 9702 if (N->getOpcode() == ISD::TRUNCATE && 9703 N->getValueType(0) != MVT::i1) 9704 return SDValue(); 9705 9706 if (N->getOperand(0).getValueType() != MVT::i32 && 9707 N->getOperand(0).getValueType() != MVT::i64) 9708 return SDValue(); 9709 9710 if (N->getOpcode() == ISD::SETCC || 9711 N->getOpcode() == ISD::SELECT_CC) { 9712 // If we're looking at a comparison, then we need to make sure that the 9713 // high bits (all except for the first) don't matter the result. 9714 ISD::CondCode CC = 9715 cast<CondCodeSDNode>(N->getOperand( 9716 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 9717 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 9718 9719 if (ISD::isSignedIntSetCC(CC)) { 9720 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 9721 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 9722 return SDValue(); 9723 } else if (ISD::isUnsignedIntSetCC(CC)) { 9724 if (!DAG.MaskedValueIsZero(N->getOperand(0), 9725 APInt::getHighBitsSet(OpBits, OpBits-1)) || 9726 !DAG.MaskedValueIsZero(N->getOperand(1), 9727 APInt::getHighBitsSet(OpBits, OpBits-1))) 9728 return SDValue(); 9729 } else { 9730 // This is neither a signed nor an unsigned comparison, just make sure 9731 // that the high bits are equal. 9732 APInt Op1Zero, Op1One; 9733 APInt Op2Zero, Op2One; 9734 DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); 9735 DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); 9736 9737 // We don't really care about what is known about the first bit (if 9738 // anything), so clear it in all masks prior to comparing them. 9739 Op1Zero.clearBit(0); Op1One.clearBit(0); 9740 Op2Zero.clearBit(0); Op2One.clearBit(0); 9741 9742 if (Op1Zero != Op2Zero || Op1One != Op2One) 9743 return SDValue(); 9744 } 9745 } 9746 9747 // We now know that the higher-order bits are irrelevant, we just need to 9748 // make sure that all of the intermediate operations are bit operations, and 9749 // all inputs are extensions. 9750 if (N->getOperand(0).getOpcode() != ISD::AND && 9751 N->getOperand(0).getOpcode() != ISD::OR && 9752 N->getOperand(0).getOpcode() != ISD::XOR && 9753 N->getOperand(0).getOpcode() != ISD::SELECT && 9754 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 9755 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 9756 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 9757 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 9758 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 9759 return SDValue(); 9760 9761 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 9762 N->getOperand(1).getOpcode() != ISD::AND && 9763 N->getOperand(1).getOpcode() != ISD::OR && 9764 N->getOperand(1).getOpcode() != ISD::XOR && 9765 N->getOperand(1).getOpcode() != ISD::SELECT && 9766 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 9767 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 9768 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 9769 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 9770 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 9771 return SDValue(); 9772 9773 SmallVector<SDValue, 4> Inputs; 9774 SmallVector<SDValue, 8> BinOps, PromOps; 9775 SmallPtrSet<SDNode *, 16> Visited; 9776 9777 for (unsigned i = 0; i < 2; ++i) { 9778 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 9779 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 9780 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 9781 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 9782 isa<ConstantSDNode>(N->getOperand(i))) 9783 Inputs.push_back(N->getOperand(i)); 9784 else 9785 BinOps.push_back(N->getOperand(i)); 9786 9787 if (N->getOpcode() == ISD::TRUNCATE) 9788 break; 9789 } 9790 9791 // Visit all inputs, collect all binary operations (and, or, xor and 9792 // select) that are all fed by extensions. 9793 while (!BinOps.empty()) { 9794 SDValue BinOp = BinOps.back(); 9795 BinOps.pop_back(); 9796 9797 if (!Visited.insert(BinOp.getNode()).second) 9798 continue; 9799 9800 PromOps.push_back(BinOp); 9801 9802 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 9803 // The condition of the select is not promoted. 9804 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 9805 continue; 9806 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 9807 continue; 9808 9809 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 9810 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 9811 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 9812 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 9813 isa<ConstantSDNode>(BinOp.getOperand(i))) { 9814 Inputs.push_back(BinOp.getOperand(i)); 9815 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 9816 BinOp.getOperand(i).getOpcode() == ISD::OR || 9817 BinOp.getOperand(i).getOpcode() == ISD::XOR || 9818 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 9819 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 9820 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 9821 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 9822 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 9823 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 9824 BinOps.push_back(BinOp.getOperand(i)); 9825 } else { 9826 // We have an input that is not an extension or another binary 9827 // operation; we'll abort this transformation. 9828 return SDValue(); 9829 } 9830 } 9831 } 9832 9833 // Make sure that this is a self-contained cluster of operations (which 9834 // is not quite the same thing as saying that everything has only one 9835 // use). 9836 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 9837 if (isa<ConstantSDNode>(Inputs[i])) 9838 continue; 9839 9840 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 9841 UE = Inputs[i].getNode()->use_end(); 9842 UI != UE; ++UI) { 9843 SDNode *User = *UI; 9844 if (User != N && !Visited.count(User)) 9845 return SDValue(); 9846 9847 // Make sure that we're not going to promote the non-output-value 9848 // operand(s) or SELECT or SELECT_CC. 9849 // FIXME: Although we could sometimes handle this, and it does occur in 9850 // practice that one of the condition inputs to the select is also one of 9851 // the outputs, we currently can't deal with this. 9852 if (User->getOpcode() == ISD::SELECT) { 9853 if (User->getOperand(0) == Inputs[i]) 9854 return SDValue(); 9855 } else if (User->getOpcode() == ISD::SELECT_CC) { 9856 if (User->getOperand(0) == Inputs[i] || 9857 User->getOperand(1) == Inputs[i]) 9858 return SDValue(); 9859 } 9860 } 9861 } 9862 9863 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 9864 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 9865 UE = PromOps[i].getNode()->use_end(); 9866 UI != UE; ++UI) { 9867 SDNode *User = *UI; 9868 if (User != N && !Visited.count(User)) 9869 return SDValue(); 9870 9871 // Make sure that we're not going to promote the non-output-value 9872 // operand(s) or SELECT or SELECT_CC. 9873 // FIXME: Although we could sometimes handle this, and it does occur in 9874 // practice that one of the condition inputs to the select is also one of 9875 // the outputs, we currently can't deal with this. 9876 if (User->getOpcode() == ISD::SELECT) { 9877 if (User->getOperand(0) == PromOps[i]) 9878 return SDValue(); 9879 } else if (User->getOpcode() == ISD::SELECT_CC) { 9880 if (User->getOperand(0) == PromOps[i] || 9881 User->getOperand(1) == PromOps[i]) 9882 return SDValue(); 9883 } 9884 } 9885 } 9886 9887 // Replace all inputs with the extension operand. 9888 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 9889 // Constants may have users outside the cluster of to-be-promoted nodes, 9890 // and so we need to replace those as we do the promotions. 9891 if (isa<ConstantSDNode>(Inputs[i])) 9892 continue; 9893 else 9894 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 9895 } 9896 9897 std::list<HandleSDNode> PromOpHandles; 9898 for (auto &PromOp : PromOps) 9899 PromOpHandles.emplace_back(PromOp); 9900 9901 // Replace all operations (these are all the same, but have a different 9902 // (i1) return type). DAG.getNode will validate that the types of 9903 // a binary operator match, so go through the list in reverse so that 9904 // we've likely promoted both operands first. Any intermediate truncations or 9905 // extensions disappear. 9906 while (!PromOpHandles.empty()) { 9907 SDValue PromOp = PromOpHandles.back().getValue(); 9908 PromOpHandles.pop_back(); 9909 9910 if (PromOp.getOpcode() == ISD::TRUNCATE || 9911 PromOp.getOpcode() == ISD::SIGN_EXTEND || 9912 PromOp.getOpcode() == ISD::ZERO_EXTEND || 9913 PromOp.getOpcode() == ISD::ANY_EXTEND) { 9914 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 9915 PromOp.getOperand(0).getValueType() != MVT::i1) { 9916 // The operand is not yet ready (see comment below). 9917 PromOpHandles.emplace_front(PromOp); 9918 continue; 9919 } 9920 9921 SDValue RepValue = PromOp.getOperand(0); 9922 if (isa<ConstantSDNode>(RepValue)) 9923 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 9924 9925 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 9926 continue; 9927 } 9928 9929 unsigned C; 9930 switch (PromOp.getOpcode()) { 9931 default: C = 0; break; 9932 case ISD::SELECT: C = 1; break; 9933 case ISD::SELECT_CC: C = 2; break; 9934 } 9935 9936 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 9937 PromOp.getOperand(C).getValueType() != MVT::i1) || 9938 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 9939 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 9940 // The to-be-promoted operands of this node have not yet been 9941 // promoted (this should be rare because we're going through the 9942 // list backward, but if one of the operands has several users in 9943 // this cluster of to-be-promoted nodes, it is possible). 9944 PromOpHandles.emplace_front(PromOp); 9945 continue; 9946 } 9947 9948 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 9949 PromOp.getNode()->op_end()); 9950 9951 // If there are any constant inputs, make sure they're replaced now. 9952 for (unsigned i = 0; i < 2; ++i) 9953 if (isa<ConstantSDNode>(Ops[C+i])) 9954 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 9955 9956 DAG.ReplaceAllUsesOfValueWith(PromOp, 9957 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 9958 } 9959 9960 // Now we're left with the initial truncation itself. 9961 if (N->getOpcode() == ISD::TRUNCATE) 9962 return N->getOperand(0); 9963 9964 // Otherwise, this is a comparison. The operands to be compared have just 9965 // changed type (to i1), but everything else is the same. 9966 return SDValue(N, 0); 9967 } 9968 9969 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 9970 DAGCombinerInfo &DCI) const { 9971 SelectionDAG &DAG = DCI.DAG; 9972 SDLoc dl(N); 9973 9974 // If we're tracking CR bits, we need to be careful that we don't have: 9975 // zext(binary-ops(trunc(x), trunc(y))) 9976 // or 9977 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 9978 // such that we're unnecessarily moving things into CR bits that can more 9979 // efficiently stay in GPRs. Note that if we're not certain that the high 9980 // bits are set as required by the final extension, we still may need to do 9981 // some masking to get the proper behavior. 9982 9983 // This same functionality is important on PPC64 when dealing with 9984 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 9985 // the return values of functions. Because it is so similar, it is handled 9986 // here as well. 9987 9988 if (N->getValueType(0) != MVT::i32 && 9989 N->getValueType(0) != MVT::i64) 9990 return SDValue(); 9991 9992 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || 9993 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) 9994 return SDValue(); 9995 9996 if (N->getOperand(0).getOpcode() != ISD::AND && 9997 N->getOperand(0).getOpcode() != ISD::OR && 9998 N->getOperand(0).getOpcode() != ISD::XOR && 9999 N->getOperand(0).getOpcode() != ISD::SELECT && 10000 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 10001 return SDValue(); 10002 10003 SmallVector<SDValue, 4> Inputs; 10004 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 10005 SmallPtrSet<SDNode *, 16> Visited; 10006 10007 // Visit all inputs, collect all binary operations (and, or, xor and 10008 // select) that are all fed by truncations. 10009 while (!BinOps.empty()) { 10010 SDValue BinOp = BinOps.back(); 10011 BinOps.pop_back(); 10012 10013 if (!Visited.insert(BinOp.getNode()).second) 10014 continue; 10015 10016 PromOps.push_back(BinOp); 10017 10018 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 10019 // The condition of the select is not promoted. 10020 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 10021 continue; 10022 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 10023 continue; 10024 10025 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 10026 isa<ConstantSDNode>(BinOp.getOperand(i))) { 10027 Inputs.push_back(BinOp.getOperand(i)); 10028 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 10029 BinOp.getOperand(i).getOpcode() == ISD::OR || 10030 BinOp.getOperand(i).getOpcode() == ISD::XOR || 10031 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 10032 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 10033 BinOps.push_back(BinOp.getOperand(i)); 10034 } else { 10035 // We have an input that is not a truncation or another binary 10036 // operation; we'll abort this transformation. 10037 return SDValue(); 10038 } 10039 } 10040 } 10041 10042 // The operands of a select that must be truncated when the select is 10043 // promoted because the operand is actually part of the to-be-promoted set. 10044 DenseMap<SDNode *, EVT> SelectTruncOp[2]; 10045 10046 // Make sure that this is a self-contained cluster of operations (which 10047 // is not quite the same thing as saying that everything has only one 10048 // use). 10049 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10050 if (isa<ConstantSDNode>(Inputs[i])) 10051 continue; 10052 10053 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 10054 UE = Inputs[i].getNode()->use_end(); 10055 UI != UE; ++UI) { 10056 SDNode *User = *UI; 10057 if (User != N && !Visited.count(User)) 10058 return SDValue(); 10059 10060 // If we're going to promote the non-output-value operand(s) or SELECT or 10061 // SELECT_CC, record them for truncation. 10062 if (User->getOpcode() == ISD::SELECT) { 10063 if (User->getOperand(0) == Inputs[i]) 10064 SelectTruncOp[0].insert(std::make_pair(User, 10065 User->getOperand(0).getValueType())); 10066 } else if (User->getOpcode() == ISD::SELECT_CC) { 10067 if (User->getOperand(0) == Inputs[i]) 10068 SelectTruncOp[0].insert(std::make_pair(User, 10069 User->getOperand(0).getValueType())); 10070 if (User->getOperand(1) == Inputs[i]) 10071 SelectTruncOp[1].insert(std::make_pair(User, 10072 User->getOperand(1).getValueType())); 10073 } 10074 } 10075 } 10076 10077 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 10078 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 10079 UE = PromOps[i].getNode()->use_end(); 10080 UI != UE; ++UI) { 10081 SDNode *User = *UI; 10082 if (User != N && !Visited.count(User)) 10083 return SDValue(); 10084 10085 // If we're going to promote the non-output-value operand(s) or SELECT or 10086 // SELECT_CC, record them for truncation. 10087 if (User->getOpcode() == ISD::SELECT) { 10088 if (User->getOperand(0) == PromOps[i]) 10089 SelectTruncOp[0].insert(std::make_pair(User, 10090 User->getOperand(0).getValueType())); 10091 } else if (User->getOpcode() == ISD::SELECT_CC) { 10092 if (User->getOperand(0) == PromOps[i]) 10093 SelectTruncOp[0].insert(std::make_pair(User, 10094 User->getOperand(0).getValueType())); 10095 if (User->getOperand(1) == PromOps[i]) 10096 SelectTruncOp[1].insert(std::make_pair(User, 10097 User->getOperand(1).getValueType())); 10098 } 10099 } 10100 } 10101 10102 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 10103 bool ReallyNeedsExt = false; 10104 if (N->getOpcode() != ISD::ANY_EXTEND) { 10105 // If all of the inputs are not already sign/zero extended, then 10106 // we'll still need to do that at the end. 10107 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10108 if (isa<ConstantSDNode>(Inputs[i])) 10109 continue; 10110 10111 unsigned OpBits = 10112 Inputs[i].getOperand(0).getValueSizeInBits(); 10113 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 10114 10115 if ((N->getOpcode() == ISD::ZERO_EXTEND && 10116 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 10117 APInt::getHighBitsSet(OpBits, 10118 OpBits-PromBits))) || 10119 (N->getOpcode() == ISD::SIGN_EXTEND && 10120 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 10121 (OpBits-(PromBits-1)))) { 10122 ReallyNeedsExt = true; 10123 break; 10124 } 10125 } 10126 } 10127 10128 // Replace all inputs, either with the truncation operand, or a 10129 // truncation or extension to the final output type. 10130 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10131 // Constant inputs need to be replaced with the to-be-promoted nodes that 10132 // use them because they might have users outside of the cluster of 10133 // promoted nodes. 10134 if (isa<ConstantSDNode>(Inputs[i])) 10135 continue; 10136 10137 SDValue InSrc = Inputs[i].getOperand(0); 10138 if (Inputs[i].getValueType() == N->getValueType(0)) 10139 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 10140 else if (N->getOpcode() == ISD::SIGN_EXTEND) 10141 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10142 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 10143 else if (N->getOpcode() == ISD::ZERO_EXTEND) 10144 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10145 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 10146 else 10147 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10148 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 10149 } 10150 10151 std::list<HandleSDNode> PromOpHandles; 10152 for (auto &PromOp : PromOps) 10153 PromOpHandles.emplace_back(PromOp); 10154 10155 // Replace all operations (these are all the same, but have a different 10156 // (promoted) return type). DAG.getNode will validate that the types of 10157 // a binary operator match, so go through the list in reverse so that 10158 // we've likely promoted both operands first. 10159 while (!PromOpHandles.empty()) { 10160 SDValue PromOp = PromOpHandles.back().getValue(); 10161 PromOpHandles.pop_back(); 10162 10163 unsigned C; 10164 switch (PromOp.getOpcode()) { 10165 default: C = 0; break; 10166 case ISD::SELECT: C = 1; break; 10167 case ISD::SELECT_CC: C = 2; break; 10168 } 10169 10170 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 10171 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 10172 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 10173 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 10174 // The to-be-promoted operands of this node have not yet been 10175 // promoted (this should be rare because we're going through the 10176 // list backward, but if one of the operands has several users in 10177 // this cluster of to-be-promoted nodes, it is possible). 10178 PromOpHandles.emplace_front(PromOp); 10179 continue; 10180 } 10181 10182 // For SELECT and SELECT_CC nodes, we do a similar check for any 10183 // to-be-promoted comparison inputs. 10184 if (PromOp.getOpcode() == ISD::SELECT || 10185 PromOp.getOpcode() == ISD::SELECT_CC) { 10186 if ((SelectTruncOp[0].count(PromOp.getNode()) && 10187 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || 10188 (SelectTruncOp[1].count(PromOp.getNode()) && 10189 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { 10190 PromOpHandles.emplace_front(PromOp); 10191 continue; 10192 } 10193 } 10194 10195 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 10196 PromOp.getNode()->op_end()); 10197 10198 // If this node has constant inputs, then they'll need to be promoted here. 10199 for (unsigned i = 0; i < 2; ++i) { 10200 if (!isa<ConstantSDNode>(Ops[C+i])) 10201 continue; 10202 if (Ops[C+i].getValueType() == N->getValueType(0)) 10203 continue; 10204 10205 if (N->getOpcode() == ISD::SIGN_EXTEND) 10206 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10207 else if (N->getOpcode() == ISD::ZERO_EXTEND) 10208 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10209 else 10210 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10211 } 10212 10213 // If we've promoted the comparison inputs of a SELECT or SELECT_CC, 10214 // truncate them again to the original value type. 10215 if (PromOp.getOpcode() == ISD::SELECT || 10216 PromOp.getOpcode() == ISD::SELECT_CC) { 10217 auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); 10218 if (SI0 != SelectTruncOp[0].end()) 10219 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); 10220 auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); 10221 if (SI1 != SelectTruncOp[1].end()) 10222 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); 10223 } 10224 10225 DAG.ReplaceAllUsesOfValueWith(PromOp, 10226 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 10227 } 10228 10229 // Now we're left with the initial extension itself. 10230 if (!ReallyNeedsExt) 10231 return N->getOperand(0); 10232 10233 // To zero extend, just mask off everything except for the first bit (in the 10234 // i1 case). 10235 if (N->getOpcode() == ISD::ZERO_EXTEND) 10236 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 10237 DAG.getConstant(APInt::getLowBitsSet( 10238 N->getValueSizeInBits(0), PromBits), 10239 dl, N->getValueType(0))); 10240 10241 assert(N->getOpcode() == ISD::SIGN_EXTEND && 10242 "Invalid extension type"); 10243 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); 10244 SDValue ShiftCst = 10245 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); 10246 return DAG.getNode( 10247 ISD::SRA, dl, N->getValueType(0), 10248 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), 10249 ShiftCst); 10250 } 10251 10252 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, 10253 DAGCombinerInfo &DCI) const { 10254 assert(N->getOpcode() == ISD::BUILD_VECTOR && 10255 "Should be called with a BUILD_VECTOR node"); 10256 10257 SelectionDAG &DAG = DCI.DAG; 10258 SDLoc dl(N); 10259 if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX()) 10260 return SDValue(); 10261 10262 // Looking for: 10263 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) 10264 if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP && 10265 N->getOperand(0).getOpcode() != ISD::UINT_TO_FP) 10266 return SDValue(); 10267 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && 10268 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) 10269 return SDValue(); 10270 if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode()) 10271 return SDValue(); 10272 10273 SDValue Ext1 = N->getOperand(0).getOperand(0); 10274 SDValue Ext2 = N->getOperand(1).getOperand(0); 10275 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 10276 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 10277 return SDValue(); 10278 10279 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1)); 10280 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1)); 10281 if (!Ext1Op || !Ext2Op) 10282 return SDValue(); 10283 if (Ext1.getValueType() != MVT::i32 || 10284 Ext2.getValueType() != MVT::i32) 10285 if (Ext1.getOperand(0) != Ext2.getOperand(0)) 10286 return SDValue(); 10287 10288 int FirstElem = Ext1Op->getZExtValue(); 10289 int SecondElem = Ext2Op->getZExtValue(); 10290 int SubvecIdx; 10291 if (FirstElem == 0 && SecondElem == 1) 10292 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; 10293 else if (FirstElem == 2 && SecondElem == 3) 10294 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; 10295 else 10296 return SDValue(); 10297 10298 SDValue SrcVec = Ext1.getOperand(0); 10299 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? 10300 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; 10301 return DAG.getNode(NodeType, dl, MVT::v2f64, 10302 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); 10303 } 10304 10305 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, 10306 DAGCombinerInfo &DCI) const { 10307 assert((N->getOpcode() == ISD::SINT_TO_FP || 10308 N->getOpcode() == ISD::UINT_TO_FP) && 10309 "Need an int -> FP conversion node here"); 10310 10311 if (!Subtarget.has64BitSupport()) 10312 return SDValue(); 10313 10314 SelectionDAG &DAG = DCI.DAG; 10315 SDLoc dl(N); 10316 SDValue Op(N, 0); 10317 10318 // Don't handle ppc_fp128 here or i1 conversions. 10319 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 10320 return SDValue(); 10321 if (Op.getOperand(0).getValueType() == MVT::i1) 10322 return SDValue(); 10323 10324 // For i32 intermediate values, unfortunately, the conversion functions 10325 // leave the upper 32 bits of the value are undefined. Within the set of 10326 // scalar instructions, we have no method for zero- or sign-extending the 10327 // value. Thus, we cannot handle i32 intermediate values here. 10328 if (Op.getOperand(0).getValueType() == MVT::i32) 10329 return SDValue(); 10330 10331 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 10332 "UINT_TO_FP is supported only with FPCVT"); 10333 10334 // If we have FCFIDS, then use it when converting to single-precision. 10335 // Otherwise, convert to double-precision and then round. 10336 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 10337 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 10338 : PPCISD::FCFIDS) 10339 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 10340 : PPCISD::FCFID); 10341 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 10342 ? MVT::f32 10343 : MVT::f64; 10344 10345 // If we're converting from a float, to an int, and back to a float again, 10346 // then we don't need the store/load pair at all. 10347 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && 10348 Subtarget.hasFPCVT()) || 10349 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { 10350 SDValue Src = Op.getOperand(0).getOperand(0); 10351 if (Src.getValueType() == MVT::f32) { 10352 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 10353 DCI.AddToWorklist(Src.getNode()); 10354 } else if (Src.getValueType() != MVT::f64) { 10355 // Make sure that we don't pick up a ppc_fp128 source value. 10356 return SDValue(); 10357 } 10358 10359 unsigned FCTOp = 10360 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 10361 PPCISD::FCTIDUZ; 10362 10363 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); 10364 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); 10365 10366 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { 10367 FP = DAG.getNode(ISD::FP_ROUND, dl, 10368 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 10369 DCI.AddToWorklist(FP.getNode()); 10370 } 10371 10372 return FP; 10373 } 10374 10375 return SDValue(); 10376 } 10377 10378 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for 10379 // builtins) into loads with swaps. 10380 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, 10381 DAGCombinerInfo &DCI) const { 10382 SelectionDAG &DAG = DCI.DAG; 10383 SDLoc dl(N); 10384 SDValue Chain; 10385 SDValue Base; 10386 MachineMemOperand *MMO; 10387 10388 switch (N->getOpcode()) { 10389 default: 10390 llvm_unreachable("Unexpected opcode for little endian VSX load"); 10391 case ISD::LOAD: { 10392 LoadSDNode *LD = cast<LoadSDNode>(N); 10393 Chain = LD->getChain(); 10394 Base = LD->getBasePtr(); 10395 MMO = LD->getMemOperand(); 10396 // If the MMO suggests this isn't a load of a full vector, leave 10397 // things alone. For a built-in, we have to make the change for 10398 // correctness, so if there is a size problem that will be a bug. 10399 if (MMO->getSize() < 16) 10400 return SDValue(); 10401 break; 10402 } 10403 case ISD::INTRINSIC_W_CHAIN: { 10404 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 10405 Chain = Intrin->getChain(); 10406 // Similarly to the store case below, Intrin->getBasePtr() doesn't get 10407 // us what we want. Get operand 2 instead. 10408 Base = Intrin->getOperand(2); 10409 MMO = Intrin->getMemOperand(); 10410 break; 10411 } 10412 } 10413 10414 MVT VecTy = N->getValueType(0).getSimpleVT(); 10415 SDValue LoadOps[] = { Chain, Base }; 10416 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, 10417 DAG.getVTList(MVT::v2f64, MVT::Other), 10418 LoadOps, MVT::v2f64, MMO); 10419 10420 DCI.AddToWorklist(Load.getNode()); 10421 Chain = Load.getValue(1); 10422 SDValue Swap = DAG.getNode( 10423 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); 10424 DCI.AddToWorklist(Swap.getNode()); 10425 10426 // Add a bitcast if the resulting load type doesn't match v2f64. 10427 if (VecTy != MVT::v2f64) { 10428 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); 10429 DCI.AddToWorklist(N.getNode()); 10430 // Package {bitcast value, swap's chain} to match Load's shape. 10431 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), 10432 N, Swap.getValue(1)); 10433 } 10434 10435 return Swap; 10436 } 10437 10438 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for 10439 // builtins) into stores with swaps. 10440 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, 10441 DAGCombinerInfo &DCI) const { 10442 SelectionDAG &DAG = DCI.DAG; 10443 SDLoc dl(N); 10444 SDValue Chain; 10445 SDValue Base; 10446 unsigned SrcOpnd; 10447 MachineMemOperand *MMO; 10448 10449 switch (N->getOpcode()) { 10450 default: 10451 llvm_unreachable("Unexpected opcode for little endian VSX store"); 10452 case ISD::STORE: { 10453 StoreSDNode *ST = cast<StoreSDNode>(N); 10454 Chain = ST->getChain(); 10455 Base = ST->getBasePtr(); 10456 MMO = ST->getMemOperand(); 10457 SrcOpnd = 1; 10458 // If the MMO suggests this isn't a store of a full vector, leave 10459 // things alone. For a built-in, we have to make the change for 10460 // correctness, so if there is a size problem that will be a bug. 10461 if (MMO->getSize() < 16) 10462 return SDValue(); 10463 break; 10464 } 10465 case ISD::INTRINSIC_VOID: { 10466 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 10467 Chain = Intrin->getChain(); 10468 // Intrin->getBasePtr() oddly does not get what we want. 10469 Base = Intrin->getOperand(3); 10470 MMO = Intrin->getMemOperand(); 10471 SrcOpnd = 2; 10472 break; 10473 } 10474 } 10475 10476 SDValue Src = N->getOperand(SrcOpnd); 10477 MVT VecTy = Src.getValueType().getSimpleVT(); 10478 10479 // All stores are done as v2f64 and possible bit cast. 10480 if (VecTy != MVT::v2f64) { 10481 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); 10482 DCI.AddToWorklist(Src.getNode()); 10483 } 10484 10485 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 10486 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); 10487 DCI.AddToWorklist(Swap.getNode()); 10488 Chain = Swap.getValue(1); 10489 SDValue StoreOps[] = { Chain, Swap, Base }; 10490 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, 10491 DAG.getVTList(MVT::Other), 10492 StoreOps, VecTy, MMO); 10493 DCI.AddToWorklist(Store.getNode()); 10494 return Store; 10495 } 10496 10497 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 10498 DAGCombinerInfo &DCI) const { 10499 SelectionDAG &DAG = DCI.DAG; 10500 SDLoc dl(N); 10501 switch (N->getOpcode()) { 10502 default: break; 10503 case PPCISD::SHL: 10504 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. 10505 return N->getOperand(0); 10506 break; 10507 case PPCISD::SRL: 10508 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. 10509 return N->getOperand(0); 10510 break; 10511 case PPCISD::SRA: 10512 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 10513 if (C->isNullValue() || // 0 >>s V -> 0. 10514 C->isAllOnesValue()) // -1 >>s V -> -1. 10515 return N->getOperand(0); 10516 } 10517 break; 10518 case ISD::SIGN_EXTEND: 10519 case ISD::ZERO_EXTEND: 10520 case ISD::ANY_EXTEND: 10521 return DAGCombineExtBoolTrunc(N, DCI); 10522 case ISD::TRUNCATE: 10523 case ISD::SETCC: 10524 case ISD::SELECT_CC: 10525 return DAGCombineTruncBoolExt(N, DCI); 10526 case ISD::SINT_TO_FP: 10527 case ISD::UINT_TO_FP: 10528 return combineFPToIntToFP(N, DCI); 10529 case ISD::STORE: { 10530 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 10531 if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() && 10532 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 10533 N->getOperand(1).getValueType() == MVT::i32 && 10534 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 10535 SDValue Val = N->getOperand(1).getOperand(0); 10536 if (Val.getValueType() == MVT::f32) { 10537 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 10538 DCI.AddToWorklist(Val.getNode()); 10539 } 10540 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 10541 DCI.AddToWorklist(Val.getNode()); 10542 10543 SDValue Ops[] = { 10544 N->getOperand(0), Val, N->getOperand(2), 10545 DAG.getValueType(N->getOperand(1).getValueType()) 10546 }; 10547 10548 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 10549 DAG.getVTList(MVT::Other), Ops, 10550 cast<StoreSDNode>(N)->getMemoryVT(), 10551 cast<StoreSDNode>(N)->getMemOperand()); 10552 DCI.AddToWorklist(Val.getNode()); 10553 return Val; 10554 } 10555 10556 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 10557 if (cast<StoreSDNode>(N)->isUnindexed() && 10558 N->getOperand(1).getOpcode() == ISD::BSWAP && 10559 N->getOperand(1).getNode()->hasOneUse() && 10560 (N->getOperand(1).getValueType() == MVT::i32 || 10561 N->getOperand(1).getValueType() == MVT::i16 || 10562 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 10563 N->getOperand(1).getValueType() == MVT::i64))) { 10564 SDValue BSwapOp = N->getOperand(1).getOperand(0); 10565 // Do an any-extend to 32-bits if this is a half-word input. 10566 if (BSwapOp.getValueType() == MVT::i16) 10567 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 10568 10569 SDValue Ops[] = { 10570 N->getOperand(0), BSwapOp, N->getOperand(2), 10571 DAG.getValueType(N->getOperand(1).getValueType()) 10572 }; 10573 return 10574 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 10575 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 10576 cast<StoreSDNode>(N)->getMemOperand()); 10577 } 10578 10579 // For little endian, VSX stores require generating xxswapd/lxvd2x. 10580 EVT VT = N->getOperand(1).getValueType(); 10581 if (VT.isSimple()) { 10582 MVT StoreVT = VT.getSimpleVT(); 10583 if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && 10584 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || 10585 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) 10586 return expandVSXStoreForLE(N, DCI); 10587 } 10588 break; 10589 } 10590 case ISD::LOAD: { 10591 LoadSDNode *LD = cast<LoadSDNode>(N); 10592 EVT VT = LD->getValueType(0); 10593 10594 // For little endian, VSX loads require generating lxvd2x/xxswapd. 10595 if (VT.isSimple()) { 10596 MVT LoadVT = VT.getSimpleVT(); 10597 if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && 10598 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || 10599 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) 10600 return expandVSXLoadForLE(N, DCI); 10601 } 10602 10603 // We sometimes end up with a 64-bit integer load, from which we extract 10604 // two single-precision floating-point numbers. This happens with 10605 // std::complex<float>, and other similar structures, because of the way we 10606 // canonicalize structure copies. However, if we lack direct moves, 10607 // then the final bitcasts from the extracted integer values to the 10608 // floating-point numbers turn into store/load pairs. Even with direct moves, 10609 // just loading the two floating-point numbers is likely better. 10610 auto ReplaceTwoFloatLoad = [&]() { 10611 if (VT != MVT::i64) 10612 return false; 10613 10614 if (LD->getExtensionType() != ISD::NON_EXTLOAD || 10615 LD->isVolatile()) 10616 return false; 10617 10618 // We're looking for a sequence like this: 10619 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 10620 // t16: i64 = srl t13, Constant:i32<32> 10621 // t17: i32 = truncate t16 10622 // t18: f32 = bitcast t17 10623 // t19: i32 = truncate t13 10624 // t20: f32 = bitcast t19 10625 10626 if (!LD->hasNUsesOfValue(2, 0)) 10627 return false; 10628 10629 auto UI = LD->use_begin(); 10630 while (UI.getUse().getResNo() != 0) ++UI; 10631 SDNode *Trunc = *UI++; 10632 while (UI.getUse().getResNo() != 0) ++UI; 10633 SDNode *RightShift = *UI; 10634 if (Trunc->getOpcode() != ISD::TRUNCATE) 10635 std::swap(Trunc, RightShift); 10636 10637 if (Trunc->getOpcode() != ISD::TRUNCATE || 10638 Trunc->getValueType(0) != MVT::i32 || 10639 !Trunc->hasOneUse()) 10640 return false; 10641 if (RightShift->getOpcode() != ISD::SRL || 10642 !isa<ConstantSDNode>(RightShift->getOperand(1)) || 10643 RightShift->getConstantOperandVal(1) != 32 || 10644 !RightShift->hasOneUse()) 10645 return false; 10646 10647 SDNode *Trunc2 = *RightShift->use_begin(); 10648 if (Trunc2->getOpcode() != ISD::TRUNCATE || 10649 Trunc2->getValueType(0) != MVT::i32 || 10650 !Trunc2->hasOneUse()) 10651 return false; 10652 10653 SDNode *Bitcast = *Trunc->use_begin(); 10654 SDNode *Bitcast2 = *Trunc2->use_begin(); 10655 10656 if (Bitcast->getOpcode() != ISD::BITCAST || 10657 Bitcast->getValueType(0) != MVT::f32) 10658 return false; 10659 if (Bitcast2->getOpcode() != ISD::BITCAST || 10660 Bitcast2->getValueType(0) != MVT::f32) 10661 return false; 10662 10663 if (Subtarget.isLittleEndian()) 10664 std::swap(Bitcast, Bitcast2); 10665 10666 // Bitcast has the second float (in memory-layout order) and Bitcast2 10667 // has the first one. 10668 10669 SDValue BasePtr = LD->getBasePtr(); 10670 if (LD->isIndexed()) { 10671 assert(LD->getAddressingMode() == ISD::PRE_INC && 10672 "Non-pre-inc AM on PPC?"); 10673 BasePtr = 10674 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 10675 LD->getOffset()); 10676 } 10677 10678 auto MMOFlags = 10679 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; 10680 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, 10681 LD->getPointerInfo(), LD->getAlignment(), 10682 MMOFlags, LD->getAAInfo()); 10683 SDValue AddPtr = 10684 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), 10685 BasePtr, DAG.getIntPtrConstant(4, dl)); 10686 SDValue FloatLoad2 = DAG.getLoad( 10687 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, 10688 LD->getPointerInfo().getWithOffset(4), 10689 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); 10690 10691 if (LD->isIndexed()) { 10692 // Note that DAGCombine should re-form any pre-increment load(s) from 10693 // what is produced here if that makes sense. 10694 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); 10695 } 10696 10697 DCI.CombineTo(Bitcast2, FloatLoad); 10698 DCI.CombineTo(Bitcast, FloatLoad2); 10699 10700 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), 10701 SDValue(FloatLoad2.getNode(), 1)); 10702 return true; 10703 }; 10704 10705 if (ReplaceTwoFloatLoad()) 10706 return SDValue(N, 0); 10707 10708 EVT MemVT = LD->getMemoryVT(); 10709 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); 10710 unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); 10711 Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); 10712 unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); 10713 if (LD->isUnindexed() && VT.isVector() && 10714 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && 10715 // P8 and later hardware should just use LOAD. 10716 !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || 10717 VT == MVT::v4i32 || VT == MVT::v4f32)) || 10718 (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && 10719 LD->getAlignment() >= ScalarABIAlignment)) && 10720 LD->getAlignment() < ABIAlignment) { 10721 // This is a type-legal unaligned Altivec or QPX load. 10722 SDValue Chain = LD->getChain(); 10723 SDValue Ptr = LD->getBasePtr(); 10724 bool isLittleEndian = Subtarget.isLittleEndian(); 10725 10726 // This implements the loading of unaligned vectors as described in 10727 // the venerable Apple Velocity Engine overview. Specifically: 10728 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 10729 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 10730 // 10731 // The general idea is to expand a sequence of one or more unaligned 10732 // loads into an alignment-based permutation-control instruction (lvsl 10733 // or lvsr), a series of regular vector loads (which always truncate 10734 // their input address to an aligned address), and a series of 10735 // permutations. The results of these permutations are the requested 10736 // loaded values. The trick is that the last "extra" load is not taken 10737 // from the address you might suspect (sizeof(vector) bytes after the 10738 // last requested load), but rather sizeof(vector) - 1 bytes after the 10739 // last requested vector. The point of this is to avoid a page fault if 10740 // the base address happened to be aligned. This works because if the 10741 // base address is aligned, then adding less than a full vector length 10742 // will cause the last vector in the sequence to be (re)loaded. 10743 // Otherwise, the next vector will be fetched as you might suspect was 10744 // necessary. 10745 10746 // We might be able to reuse the permutation generation from 10747 // a different base address offset from this one by an aligned amount. 10748 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 10749 // optimization later. 10750 Intrinsic::ID Intr, IntrLD, IntrPerm; 10751 MVT PermCntlTy, PermTy, LDTy; 10752 if (Subtarget.hasAltivec()) { 10753 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : 10754 Intrinsic::ppc_altivec_lvsl; 10755 IntrLD = Intrinsic::ppc_altivec_lvx; 10756 IntrPerm = Intrinsic::ppc_altivec_vperm; 10757 PermCntlTy = MVT::v16i8; 10758 PermTy = MVT::v4i32; 10759 LDTy = MVT::v4i32; 10760 } else { 10761 Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : 10762 Intrinsic::ppc_qpx_qvlpcls; 10763 IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : 10764 Intrinsic::ppc_qpx_qvlfs; 10765 IntrPerm = Intrinsic::ppc_qpx_qvfperm; 10766 PermCntlTy = MVT::v4f64; 10767 PermTy = MVT::v4f64; 10768 LDTy = MemVT.getSimpleVT(); 10769 } 10770 10771 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); 10772 10773 // Create the new MMO for the new base load. It is like the original MMO, 10774 // but represents an area in memory almost twice the vector size centered 10775 // on the original address. If the address is unaligned, we might start 10776 // reading up to (sizeof(vector)-1) bytes below the address of the 10777 // original unaligned load. 10778 MachineFunction &MF = DAG.getMachineFunction(); 10779 MachineMemOperand *BaseMMO = 10780 MF.getMachineMemOperand(LD->getMemOperand(), 10781 -(long)MemVT.getStoreSize()+1, 10782 2*MemVT.getStoreSize()-1); 10783 10784 // Create the new base load. 10785 SDValue LDXIntID = 10786 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); 10787 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; 10788 SDValue BaseLoad = 10789 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 10790 DAG.getVTList(PermTy, MVT::Other), 10791 BaseLoadOps, LDTy, BaseMMO); 10792 10793 // Note that the value of IncOffset (which is provided to the next 10794 // load's pointer info offset value, and thus used to calculate the 10795 // alignment), and the value of IncValue (which is actually used to 10796 // increment the pointer value) are different! This is because we 10797 // require the next load to appear to be aligned, even though it 10798 // is actually offset from the base pointer by a lesser amount. 10799 int IncOffset = VT.getSizeInBits() / 8; 10800 int IncValue = IncOffset; 10801 10802 // Walk (both up and down) the chain looking for another load at the real 10803 // (aligned) offset (the alignment of the other load does not matter in 10804 // this case). If found, then do not use the offset reduction trick, as 10805 // that will prevent the loads from being later combined (as they would 10806 // otherwise be duplicates). 10807 if (!findConsecutiveLoad(LD, DAG)) 10808 --IncValue; 10809 10810 SDValue Increment = 10811 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); 10812 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 10813 10814 MachineMemOperand *ExtraMMO = 10815 MF.getMachineMemOperand(LD->getMemOperand(), 10816 1, 2*MemVT.getStoreSize()-1); 10817 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; 10818 SDValue ExtraLoad = 10819 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 10820 DAG.getVTList(PermTy, MVT::Other), 10821 ExtraLoadOps, LDTy, ExtraMMO); 10822 10823 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 10824 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 10825 10826 // Because vperm has a big-endian bias, we must reverse the order 10827 // of the input vectors and complement the permute control vector 10828 // when generating little endian code. We have already handled the 10829 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 10830 // and ExtraLoad here. 10831 SDValue Perm; 10832 if (isLittleEndian) 10833 Perm = BuildIntrinsicOp(IntrPerm, 10834 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 10835 else 10836 Perm = BuildIntrinsicOp(IntrPerm, 10837 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 10838 10839 if (VT != PermTy) 10840 Perm = Subtarget.hasAltivec() ? 10841 DAG.getNode(ISD::BITCAST, dl, VT, Perm) : 10842 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX 10843 DAG.getTargetConstant(1, dl, MVT::i64)); 10844 // second argument is 1 because this rounding 10845 // is always exact. 10846 10847 // The output of the permutation is our loaded result, the TokenFactor is 10848 // our new chain. 10849 DCI.CombineTo(N, Perm, TF); 10850 return SDValue(N, 0); 10851 } 10852 } 10853 break; 10854 case ISD::INTRINSIC_WO_CHAIN: { 10855 bool isLittleEndian = Subtarget.isLittleEndian(); 10856 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 10857 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr 10858 : Intrinsic::ppc_altivec_lvsl); 10859 if ((IID == Intr || 10860 IID == Intrinsic::ppc_qpx_qvlpcld || 10861 IID == Intrinsic::ppc_qpx_qvlpcls) && 10862 N->getOperand(1)->getOpcode() == ISD::ADD) { 10863 SDValue Add = N->getOperand(1); 10864 10865 int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? 10866 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; 10867 10868 if (DAG.MaskedValueIsZero( 10869 Add->getOperand(1), 10870 APInt::getAllOnesValue(Bits /* alignment */) 10871 .zext( 10872 Add.getValueType().getScalarType().getSizeInBits()))) { 10873 SDNode *BasePtr = Add->getOperand(0).getNode(); 10874 for (SDNode::use_iterator UI = BasePtr->use_begin(), 10875 UE = BasePtr->use_end(); 10876 UI != UE; ++UI) { 10877 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 10878 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) { 10879 // We've found another LVSL/LVSR, and this address is an aligned 10880 // multiple of that one. The results will be the same, so use the 10881 // one we've just found instead. 10882 10883 return SDValue(*UI, 0); 10884 } 10885 } 10886 } 10887 10888 if (isa<ConstantSDNode>(Add->getOperand(1))) { 10889 SDNode *BasePtr = Add->getOperand(0).getNode(); 10890 for (SDNode::use_iterator UI = BasePtr->use_begin(), 10891 UE = BasePtr->use_end(); UI != UE; ++UI) { 10892 if (UI->getOpcode() == ISD::ADD && 10893 isa<ConstantSDNode>(UI->getOperand(1)) && 10894 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() - 10895 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) % 10896 (1ULL << Bits) == 0) { 10897 SDNode *OtherAdd = *UI; 10898 for (SDNode::use_iterator VI = OtherAdd->use_begin(), 10899 VE = OtherAdd->use_end(); VI != VE; ++VI) { 10900 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 10901 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) { 10902 return SDValue(*VI, 0); 10903 } 10904 } 10905 } 10906 } 10907 } 10908 } 10909 } 10910 10911 break; 10912 case ISD::INTRINSIC_W_CHAIN: { 10913 // For little endian, VSX loads require generating lxvd2x/xxswapd. 10914 if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { 10915 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 10916 default: 10917 break; 10918 case Intrinsic::ppc_vsx_lxvw4x: 10919 case Intrinsic::ppc_vsx_lxvd2x: 10920 return expandVSXLoadForLE(N, DCI); 10921 } 10922 } 10923 break; 10924 } 10925 case ISD::INTRINSIC_VOID: { 10926 // For little endian, VSX stores require generating xxswapd/stxvd2x. 10927 if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { 10928 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 10929 default: 10930 break; 10931 case Intrinsic::ppc_vsx_stxvw4x: 10932 case Intrinsic::ppc_vsx_stxvd2x: 10933 return expandVSXStoreForLE(N, DCI); 10934 } 10935 } 10936 break; 10937 } 10938 case ISD::BSWAP: 10939 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 10940 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 10941 N->getOperand(0).hasOneUse() && 10942 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 10943 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 10944 N->getValueType(0) == MVT::i64))) { 10945 SDValue Load = N->getOperand(0); 10946 LoadSDNode *LD = cast<LoadSDNode>(Load); 10947 // Create the byte-swapping load. 10948 SDValue Ops[] = { 10949 LD->getChain(), // Chain 10950 LD->getBasePtr(), // Ptr 10951 DAG.getValueType(N->getValueType(0)) // VT 10952 }; 10953 SDValue BSLoad = 10954 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 10955 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 10956 MVT::i64 : MVT::i32, MVT::Other), 10957 Ops, LD->getMemoryVT(), LD->getMemOperand()); 10958 10959 // If this is an i16 load, insert the truncate. 10960 SDValue ResVal = BSLoad; 10961 if (N->getValueType(0) == MVT::i16) 10962 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 10963 10964 // First, combine the bswap away. This makes the value produced by the 10965 // load dead. 10966 DCI.CombineTo(N, ResVal); 10967 10968 // Next, combine the load away, we give it a bogus result value but a real 10969 // chain result. The result value is dead because the bswap is dead. 10970 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 10971 10972 // Return N so it doesn't get rechecked! 10973 return SDValue(N, 0); 10974 } 10975 10976 break; 10977 case PPCISD::VCMP: { 10978 // If a VCMPo node already exists with exactly the same operands as this 10979 // node, use its result instead of this node (VCMPo computes both a CR6 and 10980 // a normal output). 10981 // 10982 if (!N->getOperand(0).hasOneUse() && 10983 !N->getOperand(1).hasOneUse() && 10984 !N->getOperand(2).hasOneUse()) { 10985 10986 // Scan all of the users of the LHS, looking for VCMPo's that match. 10987 SDNode *VCMPoNode = nullptr; 10988 10989 SDNode *LHSN = N->getOperand(0).getNode(); 10990 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 10991 UI != E; ++UI) 10992 if (UI->getOpcode() == PPCISD::VCMPo && 10993 UI->getOperand(1) == N->getOperand(1) && 10994 UI->getOperand(2) == N->getOperand(2) && 10995 UI->getOperand(0) == N->getOperand(0)) { 10996 VCMPoNode = *UI; 10997 break; 10998 } 10999 11000 // If there is no VCMPo node, or if the flag value has a single use, don't 11001 // transform this. 11002 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 11003 break; 11004 11005 // Look at the (necessarily single) use of the flag value. If it has a 11006 // chain, this transformation is more complex. Note that multiple things 11007 // could use the value result, which we should ignore. 11008 SDNode *FlagUser = nullptr; 11009 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 11010 FlagUser == nullptr; ++UI) { 11011 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 11012 SDNode *User = *UI; 11013 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 11014 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 11015 FlagUser = User; 11016 break; 11017 } 11018 } 11019 } 11020 11021 // If the user is a MFOCRF instruction, we know this is safe. 11022 // Otherwise we give up for right now. 11023 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 11024 return SDValue(VCMPoNode, 0); 11025 } 11026 break; 11027 } 11028 case ISD::BRCOND: { 11029 SDValue Cond = N->getOperand(1); 11030 SDValue Target = N->getOperand(2); 11031 11032 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 11033 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 11034 Intrinsic::ppc_is_decremented_ctr_nonzero) { 11035 11036 // We now need to make the intrinsic dead (it cannot be instruction 11037 // selected). 11038 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 11039 assert(Cond.getNode()->hasOneUse() && 11040 "Counter decrement has more than one use"); 11041 11042 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 11043 N->getOperand(0), Target); 11044 } 11045 } 11046 break; 11047 case ISD::BR_CC: { 11048 // If this is a branch on an altivec predicate comparison, lower this so 11049 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 11050 // lowering is done pre-legalize, because the legalizer lowers the predicate 11051 // compare down to code that is difficult to reassemble. 11052 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 11053 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 11054 11055 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 11056 // value. If so, pass-through the AND to get to the intrinsic. 11057 if (LHS.getOpcode() == ISD::AND && 11058 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 11059 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 11060 Intrinsic::ppc_is_decremented_ctr_nonzero && 11061 isa<ConstantSDNode>(LHS.getOperand(1)) && 11062 !isNullConstant(LHS.getOperand(1))) 11063 LHS = LHS.getOperand(0); 11064 11065 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 11066 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 11067 Intrinsic::ppc_is_decremented_ctr_nonzero && 11068 isa<ConstantSDNode>(RHS)) { 11069 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 11070 "Counter decrement comparison is not EQ or NE"); 11071 11072 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 11073 bool isBDNZ = (CC == ISD::SETEQ && Val) || 11074 (CC == ISD::SETNE && !Val); 11075 11076 // We now need to make the intrinsic dead (it cannot be instruction 11077 // selected). 11078 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 11079 assert(LHS.getNode()->hasOneUse() && 11080 "Counter decrement has more than one use"); 11081 11082 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 11083 N->getOperand(0), N->getOperand(4)); 11084 } 11085 11086 int CompareOpc; 11087 bool isDot; 11088 11089 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 11090 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 11091 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { 11092 assert(isDot && "Can't compare against a vector result!"); 11093 11094 // If this is a comparison against something other than 0/1, then we know 11095 // that the condition is never/always true. 11096 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 11097 if (Val != 0 && Val != 1) { 11098 if (CC == ISD::SETEQ) // Cond never true, remove branch. 11099 return N->getOperand(0); 11100 // Always !=, turn it into an unconditional branch. 11101 return DAG.getNode(ISD::BR, dl, MVT::Other, 11102 N->getOperand(0), N->getOperand(4)); 11103 } 11104 11105 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 11106 11107 // Create the PPCISD altivec 'dot' comparison node. 11108 SDValue Ops[] = { 11109 LHS.getOperand(2), // LHS of compare 11110 LHS.getOperand(3), // RHS of compare 11111 DAG.getConstant(CompareOpc, dl, MVT::i32) 11112 }; 11113 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 11114 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 11115 11116 // Unpack the result based on how the target uses it. 11117 PPC::Predicate CompOpc; 11118 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 11119 default: // Can't happen, don't crash on invalid number though. 11120 case 0: // Branch on the value of the EQ bit of CR6. 11121 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 11122 break; 11123 case 1: // Branch on the inverted value of the EQ bit of CR6. 11124 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 11125 break; 11126 case 2: // Branch on the value of the LT bit of CR6. 11127 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 11128 break; 11129 case 3: // Branch on the inverted value of the LT bit of CR6. 11130 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 11131 break; 11132 } 11133 11134 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 11135 DAG.getConstant(CompOpc, dl, MVT::i32), 11136 DAG.getRegister(PPC::CR6, MVT::i32), 11137 N->getOperand(4), CompNode.getValue(1)); 11138 } 11139 break; 11140 } 11141 case ISD::BUILD_VECTOR: 11142 return DAGCombineBuildVector(N, DCI); 11143 } 11144 11145 return SDValue(); 11146 } 11147 11148 SDValue 11149 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 11150 SelectionDAG &DAG, 11151 std::vector<SDNode *> *Created) const { 11152 // fold (sdiv X, pow2) 11153 EVT VT = N->getValueType(0); 11154 if (VT == MVT::i64 && !Subtarget.isPPC64()) 11155 return SDValue(); 11156 if ((VT != MVT::i32 && VT != MVT::i64) || 11157 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) 11158 return SDValue(); 11159 11160 SDLoc DL(N); 11161 SDValue N0 = N->getOperand(0); 11162 11163 bool IsNegPow2 = (-Divisor).isPowerOf2(); 11164 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); 11165 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); 11166 11167 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); 11168 if (Created) 11169 Created->push_back(Op.getNode()); 11170 11171 if (IsNegPow2) { 11172 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); 11173 if (Created) 11174 Created->push_back(Op.getNode()); 11175 } 11176 11177 return Op; 11178 } 11179 11180 //===----------------------------------------------------------------------===// 11181 // Inline Assembly Support 11182 //===----------------------------------------------------------------------===// 11183 11184 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 11185 APInt &KnownZero, 11186 APInt &KnownOne, 11187 const SelectionDAG &DAG, 11188 unsigned Depth) const { 11189 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 11190 switch (Op.getOpcode()) { 11191 default: break; 11192 case PPCISD::LBRX: { 11193 // lhbrx is known to have the top bits cleared out. 11194 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 11195 KnownZero = 0xFFFF0000; 11196 break; 11197 } 11198 case ISD::INTRINSIC_WO_CHAIN: { 11199 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 11200 default: break; 11201 case Intrinsic::ppc_altivec_vcmpbfp_p: 11202 case Intrinsic::ppc_altivec_vcmpeqfp_p: 11203 case Intrinsic::ppc_altivec_vcmpequb_p: 11204 case Intrinsic::ppc_altivec_vcmpequh_p: 11205 case Intrinsic::ppc_altivec_vcmpequw_p: 11206 case Intrinsic::ppc_altivec_vcmpequd_p: 11207 case Intrinsic::ppc_altivec_vcmpgefp_p: 11208 case Intrinsic::ppc_altivec_vcmpgtfp_p: 11209 case Intrinsic::ppc_altivec_vcmpgtsb_p: 11210 case Intrinsic::ppc_altivec_vcmpgtsh_p: 11211 case Intrinsic::ppc_altivec_vcmpgtsw_p: 11212 case Intrinsic::ppc_altivec_vcmpgtsd_p: 11213 case Intrinsic::ppc_altivec_vcmpgtub_p: 11214 case Intrinsic::ppc_altivec_vcmpgtuh_p: 11215 case Intrinsic::ppc_altivec_vcmpgtuw_p: 11216 case Intrinsic::ppc_altivec_vcmpgtud_p: 11217 KnownZero = ~1U; // All bits but the low one are known to be zero. 11218 break; 11219 } 11220 } 11221 } 11222 } 11223 11224 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { 11225 switch (Subtarget.getDarwinDirective()) { 11226 default: break; 11227 case PPC::DIR_970: 11228 case PPC::DIR_PWR4: 11229 case PPC::DIR_PWR5: 11230 case PPC::DIR_PWR5X: 11231 case PPC::DIR_PWR6: 11232 case PPC::DIR_PWR6X: 11233 case PPC::DIR_PWR7: 11234 case PPC::DIR_PWR8: 11235 case PPC::DIR_PWR9: { 11236 if (!ML) 11237 break; 11238 11239 const PPCInstrInfo *TII = Subtarget.getInstrInfo(); 11240 11241 // For small loops (between 5 and 8 instructions), align to a 32-byte 11242 // boundary so that the entire loop fits in one instruction-cache line. 11243 uint64_t LoopSize = 0; 11244 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) 11245 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { 11246 LoopSize += TII->GetInstSizeInBytes(*J); 11247 if (LoopSize > 32) 11248 break; 11249 } 11250 11251 if (LoopSize > 16 && LoopSize <= 32) 11252 return 5; 11253 11254 break; 11255 } 11256 } 11257 11258 return TargetLowering::getPrefLoopAlignment(ML); 11259 } 11260 11261 /// getConstraintType - Given a constraint, return the type of 11262 /// constraint it is for this target. 11263 PPCTargetLowering::ConstraintType 11264 PPCTargetLowering::getConstraintType(StringRef Constraint) const { 11265 if (Constraint.size() == 1) { 11266 switch (Constraint[0]) { 11267 default: break; 11268 case 'b': 11269 case 'r': 11270 case 'f': 11271 case 'd': 11272 case 'v': 11273 case 'y': 11274 return C_RegisterClass; 11275 case 'Z': 11276 // FIXME: While Z does indicate a memory constraint, it specifically 11277 // indicates an r+r address (used in conjunction with the 'y' modifier 11278 // in the replacement string). Currently, we're forcing the base 11279 // register to be r0 in the asm printer (which is interpreted as zero) 11280 // and forming the complete address in the second register. This is 11281 // suboptimal. 11282 return C_Memory; 11283 } 11284 } else if (Constraint == "wc") { // individual CR bits. 11285 return C_RegisterClass; 11286 } else if (Constraint == "wa" || Constraint == "wd" || 11287 Constraint == "wf" || Constraint == "ws") { 11288 return C_RegisterClass; // VSX registers. 11289 } 11290 return TargetLowering::getConstraintType(Constraint); 11291 } 11292 11293 /// Examine constraint type and operand type and determine a weight value. 11294 /// This object must already have been set up with the operand type 11295 /// and the current alternative constraint selected. 11296 TargetLowering::ConstraintWeight 11297 PPCTargetLowering::getSingleConstraintMatchWeight( 11298 AsmOperandInfo &info, const char *constraint) const { 11299 ConstraintWeight weight = CW_Invalid; 11300 Value *CallOperandVal = info.CallOperandVal; 11301 // If we don't have a value, we can't do a match, 11302 // but allow it at the lowest weight. 11303 if (!CallOperandVal) 11304 return CW_Default; 11305 Type *type = CallOperandVal->getType(); 11306 11307 // Look at the constraint type. 11308 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 11309 return CW_Register; // an individual CR bit. 11310 else if ((StringRef(constraint) == "wa" || 11311 StringRef(constraint) == "wd" || 11312 StringRef(constraint) == "wf") && 11313 type->isVectorTy()) 11314 return CW_Register; 11315 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 11316 return CW_Register; 11317 11318 switch (*constraint) { 11319 default: 11320 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 11321 break; 11322 case 'b': 11323 if (type->isIntegerTy()) 11324 weight = CW_Register; 11325 break; 11326 case 'f': 11327 if (type->isFloatTy()) 11328 weight = CW_Register; 11329 break; 11330 case 'd': 11331 if (type->isDoubleTy()) 11332 weight = CW_Register; 11333 break; 11334 case 'v': 11335 if (type->isVectorTy()) 11336 weight = CW_Register; 11337 break; 11338 case 'y': 11339 weight = CW_Register; 11340 break; 11341 case 'Z': 11342 weight = CW_Memory; 11343 break; 11344 } 11345 return weight; 11346 } 11347 11348 std::pair<unsigned, const TargetRegisterClass *> 11349 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 11350 StringRef Constraint, 11351 MVT VT) const { 11352 if (Constraint.size() == 1) { 11353 // GCC RS6000 Constraint Letters 11354 switch (Constraint[0]) { 11355 case 'b': // R1-R31 11356 if (VT == MVT::i64 && Subtarget.isPPC64()) 11357 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 11358 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 11359 case 'r': // R0-R31 11360 if (VT == MVT::i64 && Subtarget.isPPC64()) 11361 return std::make_pair(0U, &PPC::G8RCRegClass); 11362 return std::make_pair(0U, &PPC::GPRCRegClass); 11363 // 'd' and 'f' constraints are both defined to be "the floating point 11364 // registers", where one is for 32-bit and the other for 64-bit. We don't 11365 // really care overly much here so just give them all the same reg classes. 11366 case 'd': 11367 case 'f': 11368 if (VT == MVT::f32 || VT == MVT::i32) 11369 return std::make_pair(0U, &PPC::F4RCRegClass); 11370 if (VT == MVT::f64 || VT == MVT::i64) 11371 return std::make_pair(0U, &PPC::F8RCRegClass); 11372 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 11373 return std::make_pair(0U, &PPC::QFRCRegClass); 11374 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 11375 return std::make_pair(0U, &PPC::QSRCRegClass); 11376 break; 11377 case 'v': 11378 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 11379 return std::make_pair(0U, &PPC::QFRCRegClass); 11380 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 11381 return std::make_pair(0U, &PPC::QSRCRegClass); 11382 if (Subtarget.hasAltivec()) 11383 return std::make_pair(0U, &PPC::VRRCRegClass); 11384 case 'y': // crrc 11385 return std::make_pair(0U, &PPC::CRRCRegClass); 11386 } 11387 } else if (Constraint == "wc" && Subtarget.useCRBits()) { 11388 // An individual CR bit. 11389 return std::make_pair(0U, &PPC::CRBITRCRegClass); 11390 } else if ((Constraint == "wa" || Constraint == "wd" || 11391 Constraint == "wf") && Subtarget.hasVSX()) { 11392 return std::make_pair(0U, &PPC::VSRCRegClass); 11393 } else if (Constraint == "ws" && Subtarget.hasVSX()) { 11394 if (VT == MVT::f32 && Subtarget.hasP8Vector()) 11395 return std::make_pair(0U, &PPC::VSSRCRegClass); 11396 else 11397 return std::make_pair(0U, &PPC::VSFRCRegClass); 11398 } 11399 11400 std::pair<unsigned, const TargetRegisterClass *> R = 11401 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 11402 11403 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 11404 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 11405 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 11406 // register. 11407 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 11408 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 11409 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 11410 PPC::GPRCRegClass.contains(R.first)) 11411 return std::make_pair(TRI->getMatchingSuperReg(R.first, 11412 PPC::sub_32, &PPC::G8RCRegClass), 11413 &PPC::G8RCRegClass); 11414 11415 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 11416 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 11417 R.first = PPC::CR0; 11418 R.second = &PPC::CRRCRegClass; 11419 } 11420 11421 return R; 11422 } 11423 11424 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 11425 /// vector. If it is invalid, don't add anything to Ops. 11426 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 11427 std::string &Constraint, 11428 std::vector<SDValue>&Ops, 11429 SelectionDAG &DAG) const { 11430 SDValue Result; 11431 11432 // Only support length 1 constraints. 11433 if (Constraint.length() > 1) return; 11434 11435 char Letter = Constraint[0]; 11436 switch (Letter) { 11437 default: break; 11438 case 'I': 11439 case 'J': 11440 case 'K': 11441 case 'L': 11442 case 'M': 11443 case 'N': 11444 case 'O': 11445 case 'P': { 11446 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 11447 if (!CST) return; // Must be an immediate to match. 11448 SDLoc dl(Op); 11449 int64_t Value = CST->getSExtValue(); 11450 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 11451 // numbers are printed as such. 11452 switch (Letter) { 11453 default: llvm_unreachable("Unknown constraint letter!"); 11454 case 'I': // "I" is a signed 16-bit constant. 11455 if (isInt<16>(Value)) 11456 Result = DAG.getTargetConstant(Value, dl, TCVT); 11457 break; 11458 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 11459 if (isShiftedUInt<16, 16>(Value)) 11460 Result = DAG.getTargetConstant(Value, dl, TCVT); 11461 break; 11462 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 11463 if (isShiftedInt<16, 16>(Value)) 11464 Result = DAG.getTargetConstant(Value, dl, TCVT); 11465 break; 11466 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 11467 if (isUInt<16>(Value)) 11468 Result = DAG.getTargetConstant(Value, dl, TCVT); 11469 break; 11470 case 'M': // "M" is a constant that is greater than 31. 11471 if (Value > 31) 11472 Result = DAG.getTargetConstant(Value, dl, TCVT); 11473 break; 11474 case 'N': // "N" is a positive constant that is an exact power of two. 11475 if (Value > 0 && isPowerOf2_64(Value)) 11476 Result = DAG.getTargetConstant(Value, dl, TCVT); 11477 break; 11478 case 'O': // "O" is the constant zero. 11479 if (Value == 0) 11480 Result = DAG.getTargetConstant(Value, dl, TCVT); 11481 break; 11482 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 11483 if (isInt<16>(-Value)) 11484 Result = DAG.getTargetConstant(Value, dl, TCVT); 11485 break; 11486 } 11487 break; 11488 } 11489 } 11490 11491 if (Result.getNode()) { 11492 Ops.push_back(Result); 11493 return; 11494 } 11495 11496 // Handle standard constraint letters. 11497 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 11498 } 11499 11500 // isLegalAddressingMode - Return true if the addressing mode represented 11501 // by AM is legal for this target, for a load/store of the specified type. 11502 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, 11503 const AddrMode &AM, Type *Ty, 11504 unsigned AS) const { 11505 // PPC does not allow r+i addressing modes for vectors! 11506 if (Ty->isVectorTy() && AM.BaseOffs != 0) 11507 return false; 11508 11509 // PPC allows a sign-extended 16-bit immediate field. 11510 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 11511 return false; 11512 11513 // No global is ever allowed as a base. 11514 if (AM.BaseGV) 11515 return false; 11516 11517 // PPC only support r+r, 11518 switch (AM.Scale) { 11519 case 0: // "r+i" or just "i", depending on HasBaseReg. 11520 break; 11521 case 1: 11522 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 11523 return false; 11524 // Otherwise we have r+r or r+i. 11525 break; 11526 case 2: 11527 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 11528 return false; 11529 // Allow 2*r as r+r. 11530 break; 11531 default: 11532 // No other scales are supported. 11533 return false; 11534 } 11535 11536 return true; 11537 } 11538 11539 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 11540 SelectionDAG &DAG) const { 11541 MachineFunction &MF = DAG.getMachineFunction(); 11542 MachineFrameInfo *MFI = MF.getFrameInfo(); 11543 MFI->setReturnAddressIsTaken(true); 11544 11545 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 11546 return SDValue(); 11547 11548 SDLoc dl(Op); 11549 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 11550 11551 // Make sure the function does not optimize away the store of the RA to 11552 // the stack. 11553 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 11554 FuncInfo->setLRStoreRequired(); 11555 bool isPPC64 = Subtarget.isPPC64(); 11556 auto PtrVT = getPointerTy(MF.getDataLayout()); 11557 11558 if (Depth > 0) { 11559 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 11560 SDValue Offset = 11561 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, 11562 isPPC64 ? MVT::i64 : MVT::i32); 11563 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 11564 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), 11565 MachinePointerInfo()); 11566 } 11567 11568 // Just load the return address off the stack. 11569 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 11570 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, 11571 MachinePointerInfo()); 11572 } 11573 11574 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 11575 SelectionDAG &DAG) const { 11576 SDLoc dl(Op); 11577 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 11578 11579 MachineFunction &MF = DAG.getMachineFunction(); 11580 MachineFrameInfo *MFI = MF.getFrameInfo(); 11581 MFI->setFrameAddressIsTaken(true); 11582 11583 EVT PtrVT = getPointerTy(MF.getDataLayout()); 11584 bool isPPC64 = PtrVT == MVT::i64; 11585 11586 // Naked functions never have a frame pointer, and so we use r1. For all 11587 // other functions, this decision must be delayed until during PEI. 11588 unsigned FrameReg; 11589 if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) 11590 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 11591 else 11592 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 11593 11594 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 11595 PtrVT); 11596 while (Depth--) 11597 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 11598 FrameAddr, MachinePointerInfo()); 11599 return FrameAddr; 11600 } 11601 11602 // FIXME? Maybe this could be a TableGen attribute on some registers and 11603 // this table could be generated automatically from RegInfo. 11604 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, 11605 SelectionDAG &DAG) const { 11606 bool isPPC64 = Subtarget.isPPC64(); 11607 bool isDarwinABI = Subtarget.isDarwinABI(); 11608 11609 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 11610 (!isPPC64 && VT != MVT::i32)) 11611 report_fatal_error("Invalid register global variable type"); 11612 11613 bool is64Bit = isPPC64 && VT == MVT::i64; 11614 unsigned Reg = StringSwitch<unsigned>(RegName) 11615 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 11616 .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) 11617 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 11618 (is64Bit ? PPC::X13 : PPC::R13)) 11619 .Default(0); 11620 11621 if (Reg) 11622 return Reg; 11623 report_fatal_error("Invalid register name global variable"); 11624 } 11625 11626 bool 11627 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 11628 // The PowerPC target isn't yet aware of offsets. 11629 return false; 11630 } 11631 11632 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 11633 const CallInst &I, 11634 unsigned Intrinsic) const { 11635 11636 switch (Intrinsic) { 11637 case Intrinsic::ppc_qpx_qvlfd: 11638 case Intrinsic::ppc_qpx_qvlfs: 11639 case Intrinsic::ppc_qpx_qvlfcd: 11640 case Intrinsic::ppc_qpx_qvlfcs: 11641 case Intrinsic::ppc_qpx_qvlfiwa: 11642 case Intrinsic::ppc_qpx_qvlfiwz: 11643 case Intrinsic::ppc_altivec_lvx: 11644 case Intrinsic::ppc_altivec_lvxl: 11645 case Intrinsic::ppc_altivec_lvebx: 11646 case Intrinsic::ppc_altivec_lvehx: 11647 case Intrinsic::ppc_altivec_lvewx: 11648 case Intrinsic::ppc_vsx_lxvd2x: 11649 case Intrinsic::ppc_vsx_lxvw4x: { 11650 EVT VT; 11651 switch (Intrinsic) { 11652 case Intrinsic::ppc_altivec_lvebx: 11653 VT = MVT::i8; 11654 break; 11655 case Intrinsic::ppc_altivec_lvehx: 11656 VT = MVT::i16; 11657 break; 11658 case Intrinsic::ppc_altivec_lvewx: 11659 VT = MVT::i32; 11660 break; 11661 case Intrinsic::ppc_vsx_lxvd2x: 11662 VT = MVT::v2f64; 11663 break; 11664 case Intrinsic::ppc_qpx_qvlfd: 11665 VT = MVT::v4f64; 11666 break; 11667 case Intrinsic::ppc_qpx_qvlfs: 11668 VT = MVT::v4f32; 11669 break; 11670 case Intrinsic::ppc_qpx_qvlfcd: 11671 VT = MVT::v2f64; 11672 break; 11673 case Intrinsic::ppc_qpx_qvlfcs: 11674 VT = MVT::v2f32; 11675 break; 11676 default: 11677 VT = MVT::v4i32; 11678 break; 11679 } 11680 11681 Info.opc = ISD::INTRINSIC_W_CHAIN; 11682 Info.memVT = VT; 11683 Info.ptrVal = I.getArgOperand(0); 11684 Info.offset = -VT.getStoreSize()+1; 11685 Info.size = 2*VT.getStoreSize()-1; 11686 Info.align = 1; 11687 Info.vol = false; 11688 Info.readMem = true; 11689 Info.writeMem = false; 11690 return true; 11691 } 11692 case Intrinsic::ppc_qpx_qvlfda: 11693 case Intrinsic::ppc_qpx_qvlfsa: 11694 case Intrinsic::ppc_qpx_qvlfcda: 11695 case Intrinsic::ppc_qpx_qvlfcsa: 11696 case Intrinsic::ppc_qpx_qvlfiwaa: 11697 case Intrinsic::ppc_qpx_qvlfiwza: { 11698 EVT VT; 11699 switch (Intrinsic) { 11700 case Intrinsic::ppc_qpx_qvlfda: 11701 VT = MVT::v4f64; 11702 break; 11703 case Intrinsic::ppc_qpx_qvlfsa: 11704 VT = MVT::v4f32; 11705 break; 11706 case Intrinsic::ppc_qpx_qvlfcda: 11707 VT = MVT::v2f64; 11708 break; 11709 case Intrinsic::ppc_qpx_qvlfcsa: 11710 VT = MVT::v2f32; 11711 break; 11712 default: 11713 VT = MVT::v4i32; 11714 break; 11715 } 11716 11717 Info.opc = ISD::INTRINSIC_W_CHAIN; 11718 Info.memVT = VT; 11719 Info.ptrVal = I.getArgOperand(0); 11720 Info.offset = 0; 11721 Info.size = VT.getStoreSize(); 11722 Info.align = 1; 11723 Info.vol = false; 11724 Info.readMem = true; 11725 Info.writeMem = false; 11726 return true; 11727 } 11728 case Intrinsic::ppc_qpx_qvstfd: 11729 case Intrinsic::ppc_qpx_qvstfs: 11730 case Intrinsic::ppc_qpx_qvstfcd: 11731 case Intrinsic::ppc_qpx_qvstfcs: 11732 case Intrinsic::ppc_qpx_qvstfiw: 11733 case Intrinsic::ppc_altivec_stvx: 11734 case Intrinsic::ppc_altivec_stvxl: 11735 case Intrinsic::ppc_altivec_stvebx: 11736 case Intrinsic::ppc_altivec_stvehx: 11737 case Intrinsic::ppc_altivec_stvewx: 11738 case Intrinsic::ppc_vsx_stxvd2x: 11739 case Intrinsic::ppc_vsx_stxvw4x: { 11740 EVT VT; 11741 switch (Intrinsic) { 11742 case Intrinsic::ppc_altivec_stvebx: 11743 VT = MVT::i8; 11744 break; 11745 case Intrinsic::ppc_altivec_stvehx: 11746 VT = MVT::i16; 11747 break; 11748 case Intrinsic::ppc_altivec_stvewx: 11749 VT = MVT::i32; 11750 break; 11751 case Intrinsic::ppc_vsx_stxvd2x: 11752 VT = MVT::v2f64; 11753 break; 11754 case Intrinsic::ppc_qpx_qvstfd: 11755 VT = MVT::v4f64; 11756 break; 11757 case Intrinsic::ppc_qpx_qvstfs: 11758 VT = MVT::v4f32; 11759 break; 11760 case Intrinsic::ppc_qpx_qvstfcd: 11761 VT = MVT::v2f64; 11762 break; 11763 case Intrinsic::ppc_qpx_qvstfcs: 11764 VT = MVT::v2f32; 11765 break; 11766 default: 11767 VT = MVT::v4i32; 11768 break; 11769 } 11770 11771 Info.opc = ISD::INTRINSIC_VOID; 11772 Info.memVT = VT; 11773 Info.ptrVal = I.getArgOperand(1); 11774 Info.offset = -VT.getStoreSize()+1; 11775 Info.size = 2*VT.getStoreSize()-1; 11776 Info.align = 1; 11777 Info.vol = false; 11778 Info.readMem = false; 11779 Info.writeMem = true; 11780 return true; 11781 } 11782 case Intrinsic::ppc_qpx_qvstfda: 11783 case Intrinsic::ppc_qpx_qvstfsa: 11784 case Intrinsic::ppc_qpx_qvstfcda: 11785 case Intrinsic::ppc_qpx_qvstfcsa: 11786 case Intrinsic::ppc_qpx_qvstfiwa: { 11787 EVT VT; 11788 switch (Intrinsic) { 11789 case Intrinsic::ppc_qpx_qvstfda: 11790 VT = MVT::v4f64; 11791 break; 11792 case Intrinsic::ppc_qpx_qvstfsa: 11793 VT = MVT::v4f32; 11794 break; 11795 case Intrinsic::ppc_qpx_qvstfcda: 11796 VT = MVT::v2f64; 11797 break; 11798 case Intrinsic::ppc_qpx_qvstfcsa: 11799 VT = MVT::v2f32; 11800 break; 11801 default: 11802 VT = MVT::v4i32; 11803 break; 11804 } 11805 11806 Info.opc = ISD::INTRINSIC_VOID; 11807 Info.memVT = VT; 11808 Info.ptrVal = I.getArgOperand(1); 11809 Info.offset = 0; 11810 Info.size = VT.getStoreSize(); 11811 Info.align = 1; 11812 Info.vol = false; 11813 Info.readMem = false; 11814 Info.writeMem = true; 11815 return true; 11816 } 11817 default: 11818 break; 11819 } 11820 11821 return false; 11822 } 11823 11824 /// getOptimalMemOpType - Returns the target specific optimal type for load 11825 /// and store operations as a result of memset, memcpy, and memmove 11826 /// lowering. If DstAlign is zero that means it's safe to destination 11827 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 11828 /// means there isn't a need to check it against alignment requirement, 11829 /// probably because the source does not need to be loaded. If 'IsMemset' is 11830 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 11831 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 11832 /// source is constant so it does not need to be loaded. 11833 /// It returns EVT::Other if the type should be determined using generic 11834 /// target-independent logic. 11835 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 11836 unsigned DstAlign, unsigned SrcAlign, 11837 bool IsMemset, bool ZeroMemset, 11838 bool MemcpyStrSrc, 11839 MachineFunction &MF) const { 11840 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { 11841 const Function *F = MF.getFunction(); 11842 // When expanding a memset, require at least two QPX instructions to cover 11843 // the cost of loading the value to be stored from the constant pool. 11844 if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && 11845 (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && 11846 !F->hasFnAttribute(Attribute::NoImplicitFloat)) { 11847 return MVT::v4f64; 11848 } 11849 11850 // We should use Altivec/VSX loads and stores when available. For unaligned 11851 // addresses, unaligned VSX loads are only fast starting with the P8. 11852 if (Subtarget.hasAltivec() && Size >= 16 && 11853 (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || 11854 ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) 11855 return MVT::v4i32; 11856 } 11857 11858 if (Subtarget.isPPC64()) { 11859 return MVT::i64; 11860 } 11861 11862 return MVT::i32; 11863 } 11864 11865 /// \brief Returns true if it is beneficial to convert a load of a constant 11866 /// to just the constant itself. 11867 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 11868 Type *Ty) const { 11869 assert(Ty->isIntegerTy()); 11870 11871 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 11872 return !(BitSize == 0 || BitSize > 64); 11873 } 11874 11875 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 11876 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 11877 return false; 11878 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 11879 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 11880 return NumBits1 == 64 && NumBits2 == 32; 11881 } 11882 11883 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 11884 if (!VT1.isInteger() || !VT2.isInteger()) 11885 return false; 11886 unsigned NumBits1 = VT1.getSizeInBits(); 11887 unsigned NumBits2 = VT2.getSizeInBits(); 11888 return NumBits1 == 64 && NumBits2 == 32; 11889 } 11890 11891 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 11892 // Generally speaking, zexts are not free, but they are free when they can be 11893 // folded with other operations. 11894 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) { 11895 EVT MemVT = LD->getMemoryVT(); 11896 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || 11897 (Subtarget.isPPC64() && MemVT == MVT::i32)) && 11898 (LD->getExtensionType() == ISD::NON_EXTLOAD || 11899 LD->getExtensionType() == ISD::ZEXTLOAD)) 11900 return true; 11901 } 11902 11903 // FIXME: Add other cases... 11904 // - 32-bit shifts with a zext to i64 11905 // - zext after ctlz, bswap, etc. 11906 // - zext after and by a constant mask 11907 11908 return TargetLowering::isZExtFree(Val, VT2); 11909 } 11910 11911 bool PPCTargetLowering::isFPExtFree(EVT VT) const { 11912 assert(VT.isFloatingPoint()); 11913 return true; 11914 } 11915 11916 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 11917 return isInt<16>(Imm) || isUInt<16>(Imm); 11918 } 11919 11920 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 11921 return isInt<16>(Imm) || isUInt<16>(Imm); 11922 } 11923 11924 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 11925 unsigned, 11926 unsigned, 11927 bool *Fast) const { 11928 if (DisablePPCUnaligned) 11929 return false; 11930 11931 // PowerPC supports unaligned memory access for simple non-vector types. 11932 // Although accessing unaligned addresses is not as efficient as accessing 11933 // aligned addresses, it is generally more efficient than manual expansion, 11934 // and generally only traps for software emulation when crossing page 11935 // boundaries. 11936 11937 if (!VT.isSimple()) 11938 return false; 11939 11940 if (VT.getSimpleVT().isVector()) { 11941 if (Subtarget.hasVSX()) { 11942 if (VT != MVT::v2f64 && VT != MVT::v2i64 && 11943 VT != MVT::v4f32 && VT != MVT::v4i32) 11944 return false; 11945 } else { 11946 return false; 11947 } 11948 } 11949 11950 if (VT == MVT::ppcf128) 11951 return false; 11952 11953 if (Fast) 11954 *Fast = true; 11955 11956 return true; 11957 } 11958 11959 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 11960 VT = VT.getScalarType(); 11961 11962 if (!VT.isSimple()) 11963 return false; 11964 11965 switch (VT.getSimpleVT().SimpleTy) { 11966 case MVT::f32: 11967 case MVT::f64: 11968 return true; 11969 default: 11970 break; 11971 } 11972 11973 return false; 11974 } 11975 11976 const MCPhysReg * 11977 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { 11978 // LR is a callee-save register, but we must treat it as clobbered by any call 11979 // site. Hence we include LR in the scratch registers, which are in turn added 11980 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies 11981 // to CTR, which is used by any indirect call. 11982 static const MCPhysReg ScratchRegs[] = { 11983 PPC::X12, PPC::LR8, PPC::CTR8, 0 11984 }; 11985 11986 return ScratchRegs; 11987 } 11988 11989 unsigned PPCTargetLowering::getExceptionPointerRegister( 11990 const Constant *PersonalityFn) const { 11991 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; 11992 } 11993 11994 unsigned PPCTargetLowering::getExceptionSelectorRegister( 11995 const Constant *PersonalityFn) const { 11996 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; 11997 } 11998 11999 bool 12000 PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 12001 EVT VT , unsigned DefinedValues) const { 12002 if (VT == MVT::v2i64) 12003 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves 12004 12005 if (Subtarget.hasVSX() || Subtarget.hasQPX()) 12006 return true; 12007 12008 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 12009 } 12010 12011 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 12012 if (DisableILPPref || Subtarget.enableMachineScheduler()) 12013 return TargetLowering::getSchedulingPreference(N); 12014 12015 return Sched::ILP; 12016 } 12017 12018 // Create a fast isel object. 12019 FastISel * 12020 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 12021 const TargetLibraryInfo *LibInfo) const { 12022 return PPC::createFastISel(FuncInfo, LibInfo); 12023 } 12024 12025 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { 12026 if (Subtarget.isDarwinABI()) return; 12027 if (!Subtarget.isPPC64()) return; 12028 12029 // Update IsSplitCSR in PPCFunctionInfo 12030 PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>(); 12031 PFI->setIsSplitCSR(true); 12032 } 12033 12034 void PPCTargetLowering::insertCopiesSplitCSR( 12035 MachineBasicBlock *Entry, 12036 const SmallVectorImpl<MachineBasicBlock *> &Exits) const { 12037 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 12038 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); 12039 if (!IStart) 12040 return; 12041 12042 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 12043 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); 12044 MachineBasicBlock::iterator MBBI = Entry->begin(); 12045 for (const MCPhysReg *I = IStart; *I; ++I) { 12046 const TargetRegisterClass *RC = nullptr; 12047 if (PPC::G8RCRegClass.contains(*I)) 12048 RC = &PPC::G8RCRegClass; 12049 else if (PPC::F8RCRegClass.contains(*I)) 12050 RC = &PPC::F8RCRegClass; 12051 else if (PPC::CRRCRegClass.contains(*I)) 12052 RC = &PPC::CRRCRegClass; 12053 else if (PPC::VRRCRegClass.contains(*I)) 12054 RC = &PPC::VRRCRegClass; 12055 else 12056 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 12057 12058 unsigned NewVR = MRI->createVirtualRegister(RC); 12059 // Create copy from CSR to a virtual register. 12060 // FIXME: this currently does not emit CFI pseudo-instructions, it works 12061 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be 12062 // nounwind. If we want to generalize this later, we may need to emit 12063 // CFI pseudo-instructions. 12064 assert(Entry->getParent()->getFunction()->hasFnAttribute( 12065 Attribute::NoUnwind) && 12066 "Function should be nounwind in insertCopiesSplitCSR!"); 12067 Entry->addLiveIn(*I); 12068 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) 12069 .addReg(*I); 12070 12071 // Insert the copy-back instructions right before the terminator 12072 for (auto *Exit : Exits) 12073 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), 12074 TII->get(TargetOpcode::COPY), *I) 12075 .addReg(NewVR); 12076 } 12077 } 12078 12079 // Override to enable LOAD_STACK_GUARD lowering on Linux. 12080 bool PPCTargetLowering::useLoadStackGuardNode() const { 12081 if (!Subtarget.isTargetLinux()) 12082 return TargetLowering::useLoadStackGuardNode(); 12083 return true; 12084 } 12085 12086 // Override to disable global variable loading on Linux. 12087 void PPCTargetLowering::insertSSPDeclarations(Module &M) const { 12088 if (!Subtarget.isTargetLinux()) 12089 return TargetLowering::insertSSPDeclarations(M); 12090 } 12091