1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PPCISelLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCISelLowering.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPCCallingConv.h" 17 #include "PPCCCState.h" 18 #include "PPCMachineFunctionInfo.h" 19 #include "PPCPerfectShuffle.h" 20 #include "PPCTargetMachine.h" 21 #include "PPCTargetObjectFile.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/ADT/StringSwitch.h" 25 #include "llvm/ADT/Triple.h" 26 #include "llvm/CodeGen/CallingConvLower.h" 27 #include "llvm/CodeGen/MachineFrameInfo.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineJumpTableInfo.h" 31 #include "llvm/CodeGen/MachineLoopInfo.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/SelectionDAG.h" 34 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 35 #include "llvm/IR/CallingConv.h" 36 #include "llvm/IR/Constants.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/IR/Intrinsics.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/Format.h" 43 #include "llvm/Support/MathExtras.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include "llvm/Target/TargetOptions.h" 46 #include <list> 47 48 using namespace llvm; 49 50 #define DEBUG_TYPE "ppc-lowering" 51 52 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 53 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 54 55 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 56 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 57 58 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 59 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 60 61 static cl::opt<bool> DisableSCO("disable-ppc-sco", 62 cl::desc("disable sibling call optimization on ppc"), cl::Hidden); 63 64 STATISTIC(NumTailCalls, "Number of tail calls"); 65 STATISTIC(NumSiblingCalls, "Number of sibling calls"); 66 67 // FIXME: Remove this once the bug has been fixed! 68 extern cl::opt<bool> ANDIGlueBug; 69 70 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, 71 const PPCSubtarget &STI) 72 : TargetLowering(TM), Subtarget(STI) { 73 // Use _setjmp/_longjmp instead of setjmp/longjmp. 74 setUseUnderscoreSetJmp(true); 75 setUseUnderscoreLongJmp(true); 76 77 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 78 // arguments are at least 4/8 bytes aligned. 79 bool isPPC64 = Subtarget.isPPC64(); 80 setMinStackArgumentAlignment(isPPC64 ? 8:4); 81 82 // Set up the register classes. 83 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 84 if (!useSoftFloat()) { 85 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 86 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 87 } 88 89 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 90 for (MVT VT : MVT::integer_valuetypes()) { 91 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 92 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 93 } 94 95 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 96 97 // PowerPC has pre-inc load and store's. 98 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 99 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 100 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 101 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 102 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 103 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); 104 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); 105 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 106 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 107 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 108 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 109 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 110 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); 111 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); 112 113 if (Subtarget.useCRBits()) { 114 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 115 116 if (isPPC64 || Subtarget.hasFPCVT()) { 117 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 118 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 119 isPPC64 ? MVT::i64 : MVT::i32); 120 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 121 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, 122 isPPC64 ? MVT::i64 : MVT::i32); 123 } else { 124 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 125 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 126 } 127 128 // PowerPC does not support direct load / store of condition registers 129 setOperationAction(ISD::LOAD, MVT::i1, Custom); 130 setOperationAction(ISD::STORE, MVT::i1, Custom); 131 132 // FIXME: Remove this once the ANDI glue bug is fixed: 133 if (ANDIGlueBug) 134 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 135 136 for (MVT VT : MVT::integer_valuetypes()) { 137 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 138 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 139 setTruncStoreAction(VT, MVT::i1, Expand); 140 } 141 142 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 143 } 144 145 // This is used in the ppcf128->int sequence. Note it has different semantics 146 // from FP_ROUND: that rounds to nearest, this rounds to zero. 147 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 148 149 // We do not currently implement these libm ops for PowerPC. 150 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 151 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 152 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 153 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 154 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 155 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 156 157 // PowerPC has no SREM/UREM instructions 158 setOperationAction(ISD::SREM, MVT::i32, Expand); 159 setOperationAction(ISD::UREM, MVT::i32, Expand); 160 setOperationAction(ISD::SREM, MVT::i64, Expand); 161 setOperationAction(ISD::UREM, MVT::i64, Expand); 162 163 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 164 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 165 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 166 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 167 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 168 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 169 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 170 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 171 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 172 173 // We don't support sin/cos/sqrt/fmod/pow 174 setOperationAction(ISD::FSIN , MVT::f64, Expand); 175 setOperationAction(ISD::FCOS , MVT::f64, Expand); 176 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 177 setOperationAction(ISD::FREM , MVT::f64, Expand); 178 setOperationAction(ISD::FPOW , MVT::f64, Expand); 179 setOperationAction(ISD::FMA , MVT::f64, Legal); 180 setOperationAction(ISD::FSIN , MVT::f32, Expand); 181 setOperationAction(ISD::FCOS , MVT::f32, Expand); 182 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 183 setOperationAction(ISD::FREM , MVT::f32, Expand); 184 setOperationAction(ISD::FPOW , MVT::f32, Expand); 185 setOperationAction(ISD::FMA , MVT::f32, Legal); 186 187 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 188 189 // If we're enabling GP optimizations, use hardware square root 190 if (!Subtarget.hasFSQRT() && 191 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && 192 Subtarget.hasFRE())) 193 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 194 195 if (!Subtarget.hasFSQRT() && 196 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && 197 Subtarget.hasFRES())) 198 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 199 200 if (Subtarget.hasFCPSGN()) { 201 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 202 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 203 } else { 204 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 205 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 206 } 207 208 if (Subtarget.hasFPRND()) { 209 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 210 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 211 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 212 setOperationAction(ISD::FROUND, MVT::f64, Legal); 213 214 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 215 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 216 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 217 setOperationAction(ISD::FROUND, MVT::f32, Legal); 218 } 219 220 // PowerPC does not have BSWAP 221 // CTPOP or CTTZ were introduced in P8/P9 respectivelly 222 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 223 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 224 if (Subtarget.isISA3_0()) { 225 setOperationAction(ISD::CTTZ , MVT::i32 , Legal); 226 setOperationAction(ISD::CTTZ , MVT::i64 , Legal); 227 } else { 228 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 229 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 230 } 231 232 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { 233 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 234 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 235 } else { 236 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 237 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 238 } 239 240 // PowerPC does not have ROTR 241 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 242 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 243 244 if (!Subtarget.useCRBits()) { 245 // PowerPC does not have Select 246 setOperationAction(ISD::SELECT, MVT::i32, Expand); 247 setOperationAction(ISD::SELECT, MVT::i64, Expand); 248 setOperationAction(ISD::SELECT, MVT::f32, Expand); 249 setOperationAction(ISD::SELECT, MVT::f64, Expand); 250 } 251 252 // PowerPC wants to turn select_cc of FP into fsel when possible. 253 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 254 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 255 256 // PowerPC wants to optimize integer setcc a bit 257 if (!Subtarget.useCRBits()) 258 setOperationAction(ISD::SETCC, MVT::i32, Custom); 259 260 // PowerPC does not have BRCOND which requires SetCC 261 if (!Subtarget.useCRBits()) 262 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 263 264 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 265 266 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 267 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 268 269 // PowerPC does not have [U|S]INT_TO_FP 270 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 271 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 272 273 if (Subtarget.hasDirectMove() && isPPC64) { 274 setOperationAction(ISD::BITCAST, MVT::f32, Legal); 275 setOperationAction(ISD::BITCAST, MVT::i32, Legal); 276 setOperationAction(ISD::BITCAST, MVT::i64, Legal); 277 setOperationAction(ISD::BITCAST, MVT::f64, Legal); 278 } else { 279 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 280 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 281 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 282 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 283 } 284 285 // We cannot sextinreg(i1). Expand to shifts. 286 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 287 288 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 289 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 290 // support continuation, user-level threading, and etc.. As a result, no 291 // other SjLj exception interfaces are implemented and please don't build 292 // your own exception handling based on them. 293 // LLVM/Clang supports zero-cost DWARF exception handling. 294 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 295 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 296 297 // We want to legalize GlobalAddress and ConstantPool nodes into the 298 // appropriate instructions to materialize the address. 299 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 300 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 301 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 302 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 303 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 304 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 305 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 306 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 307 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 308 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 309 310 // TRAP is legal. 311 setOperationAction(ISD::TRAP, MVT::Other, Legal); 312 313 // TRAMPOLINE is custom lowered. 314 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 315 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 316 317 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 318 setOperationAction(ISD::VASTART , MVT::Other, Custom); 319 320 if (Subtarget.isSVR4ABI()) { 321 if (isPPC64) { 322 // VAARG always uses double-word chunks, so promote anything smaller. 323 setOperationAction(ISD::VAARG, MVT::i1, Promote); 324 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 325 setOperationAction(ISD::VAARG, MVT::i8, Promote); 326 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 327 setOperationAction(ISD::VAARG, MVT::i16, Promote); 328 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 329 setOperationAction(ISD::VAARG, MVT::i32, Promote); 330 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 331 setOperationAction(ISD::VAARG, MVT::Other, Expand); 332 } else { 333 // VAARG is custom lowered with the 32-bit SVR4 ABI. 334 setOperationAction(ISD::VAARG, MVT::Other, Custom); 335 setOperationAction(ISD::VAARG, MVT::i64, Custom); 336 } 337 } else 338 setOperationAction(ISD::VAARG, MVT::Other, Expand); 339 340 if (Subtarget.isSVR4ABI() && !isPPC64) 341 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 342 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 343 else 344 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 345 346 // Use the default implementation. 347 setOperationAction(ISD::VAEND , MVT::Other, Expand); 348 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 349 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 350 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 351 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 352 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); 353 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); 354 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 355 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 356 357 // We want to custom lower some of our intrinsics. 358 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 359 360 // To handle counter-based loop conditions. 361 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 362 363 // Comparisons that require checking two conditions. 364 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 365 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 366 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 367 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 368 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 369 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 370 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 371 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 372 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 373 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 374 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 375 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 376 377 if (Subtarget.has64BitSupport()) { 378 // They also have instructions for converting between i64 and fp. 379 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 380 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 381 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 382 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 383 // This is just the low 32 bits of a (signed) fp->i64 conversion. 384 // We cannot do this with Promote because i64 is not a legal type. 385 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 386 387 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 388 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 389 } else { 390 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 391 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 392 } 393 394 // With the instructions enabled under FPCVT, we can do everything. 395 if (Subtarget.hasFPCVT()) { 396 if (Subtarget.has64BitSupport()) { 397 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 398 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 399 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 400 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 401 } 402 403 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 404 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 405 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 406 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 407 } 408 409 if (Subtarget.use64BitRegs()) { 410 // 64-bit PowerPC implementations can support i64 types directly 411 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 412 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 413 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 414 // 64-bit PowerPC wants to expand i128 shifts itself. 415 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 416 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 417 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 418 } else { 419 // 32-bit PowerPC wants to expand i64 shifts itself. 420 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 421 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 422 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 423 } 424 425 if (Subtarget.hasAltivec()) { 426 // First set operation action for all vector types to expand. Then we 427 // will selectively turn on ones that can be effectively codegen'd. 428 for (MVT VT : MVT::vector_valuetypes()) { 429 // add/sub are legal for all supported vector VT's. 430 setOperationAction(ISD::ADD, VT, Legal); 431 setOperationAction(ISD::SUB, VT, Legal); 432 433 // Vector instructions introduced in P8 434 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { 435 setOperationAction(ISD::CTPOP, VT, Legal); 436 setOperationAction(ISD::CTLZ, VT, Legal); 437 } 438 else { 439 setOperationAction(ISD::CTPOP, VT, Expand); 440 setOperationAction(ISD::CTLZ, VT, Expand); 441 } 442 443 // Vector instructions introduced in P9 444 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)) 445 setOperationAction(ISD::CTTZ, VT, Legal); 446 else 447 setOperationAction(ISD::CTTZ, VT, Expand); 448 449 // We promote all shuffles to v16i8. 450 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 451 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 452 453 // We promote all non-typed operations to v4i32. 454 setOperationAction(ISD::AND , VT, Promote); 455 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 456 setOperationAction(ISD::OR , VT, Promote); 457 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 458 setOperationAction(ISD::XOR , VT, Promote); 459 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 460 setOperationAction(ISD::LOAD , VT, Promote); 461 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 462 setOperationAction(ISD::SELECT, VT, Promote); 463 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 464 setOperationAction(ISD::SELECT_CC, VT, Promote); 465 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); 466 setOperationAction(ISD::STORE, VT, Promote); 467 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 468 469 // No other operations are legal. 470 setOperationAction(ISD::MUL , VT, Expand); 471 setOperationAction(ISD::SDIV, VT, Expand); 472 setOperationAction(ISD::SREM, VT, Expand); 473 setOperationAction(ISD::UDIV, VT, Expand); 474 setOperationAction(ISD::UREM, VT, Expand); 475 setOperationAction(ISD::FDIV, VT, Expand); 476 setOperationAction(ISD::FREM, VT, Expand); 477 setOperationAction(ISD::FNEG, VT, Expand); 478 setOperationAction(ISD::FSQRT, VT, Expand); 479 setOperationAction(ISD::FLOG, VT, Expand); 480 setOperationAction(ISD::FLOG10, VT, Expand); 481 setOperationAction(ISD::FLOG2, VT, Expand); 482 setOperationAction(ISD::FEXP, VT, Expand); 483 setOperationAction(ISD::FEXP2, VT, Expand); 484 setOperationAction(ISD::FSIN, VT, Expand); 485 setOperationAction(ISD::FCOS, VT, Expand); 486 setOperationAction(ISD::FABS, VT, Expand); 487 setOperationAction(ISD::FPOWI, VT, Expand); 488 setOperationAction(ISD::FFLOOR, VT, Expand); 489 setOperationAction(ISD::FCEIL, VT, Expand); 490 setOperationAction(ISD::FTRUNC, VT, Expand); 491 setOperationAction(ISD::FRINT, VT, Expand); 492 setOperationAction(ISD::FNEARBYINT, VT, Expand); 493 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 494 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 495 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 496 setOperationAction(ISD::MULHU, VT, Expand); 497 setOperationAction(ISD::MULHS, VT, Expand); 498 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 499 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 500 setOperationAction(ISD::UDIVREM, VT, Expand); 501 setOperationAction(ISD::SDIVREM, VT, Expand); 502 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 503 setOperationAction(ISD::FPOW, VT, Expand); 504 setOperationAction(ISD::BSWAP, VT, Expand); 505 setOperationAction(ISD::VSELECT, VT, Expand); 506 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 507 setOperationAction(ISD::ROTL, VT, Expand); 508 setOperationAction(ISD::ROTR, VT, Expand); 509 510 for (MVT InnerVT : MVT::vector_valuetypes()) { 511 setTruncStoreAction(VT, InnerVT, Expand); 512 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 513 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 514 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 515 } 516 } 517 518 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 519 // with merges, splats, etc. 520 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 521 522 setOperationAction(ISD::AND , MVT::v4i32, Legal); 523 setOperationAction(ISD::OR , MVT::v4i32, Legal); 524 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 525 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 526 setOperationAction(ISD::SELECT, MVT::v4i32, 527 Subtarget.useCRBits() ? Legal : Expand); 528 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 529 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 530 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 531 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 532 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 533 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 534 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 535 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 536 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 537 538 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 539 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 540 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 541 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 542 543 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 544 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 545 546 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 547 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 548 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 549 } 550 551 if (Subtarget.hasP8Altivec()) 552 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 553 else 554 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 555 556 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 557 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 558 559 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 560 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 561 562 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 563 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 564 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 565 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 566 567 // Altivec does not contain unordered floating-point compare instructions 568 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 569 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 570 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 571 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 572 573 if (Subtarget.hasVSX()) { 574 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 575 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 576 if (Subtarget.hasP8Vector()) { 577 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 578 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); 579 } 580 if (Subtarget.hasDirectMove() && isPPC64) { 581 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); 582 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); 583 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); 584 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); 585 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); 586 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); 587 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); 588 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 589 } 590 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 591 592 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 593 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 594 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 595 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 596 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 597 598 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 599 600 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 601 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 602 603 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 604 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 605 606 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 607 setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); 608 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); 609 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 610 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); 611 612 // Share the Altivec comparison restrictions. 613 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 614 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 615 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 616 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 617 618 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 619 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 620 621 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 622 623 if (Subtarget.hasP8Vector()) 624 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); 625 626 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 627 628 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); 629 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 630 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 631 632 if (Subtarget.hasP8Altivec()) { 633 setOperationAction(ISD::SHL, MVT::v2i64, Legal); 634 setOperationAction(ISD::SRA, MVT::v2i64, Legal); 635 setOperationAction(ISD::SRL, MVT::v2i64, Legal); 636 637 setOperationAction(ISD::SETCC, MVT::v2i64, Legal); 638 } 639 else { 640 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 641 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 642 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 643 644 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 645 646 // VSX v2i64 only supports non-arithmetic operations. 647 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 648 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 649 } 650 651 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 652 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 653 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 654 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 655 656 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 657 658 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 659 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 660 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 661 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 662 663 // Vector operation legalization checks the result type of 664 // SIGN_EXTEND_INREG, overall legalization checks the inner type. 665 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); 666 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); 667 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); 668 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); 669 670 setOperationAction(ISD::FNEG, MVT::v4f32, Legal); 671 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 672 setOperationAction(ISD::FABS, MVT::v4f32, Legal); 673 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 674 675 if (Subtarget.hasDirectMove()) 676 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 677 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 678 679 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 680 } 681 682 if (Subtarget.hasP8Altivec()) { 683 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); 684 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); 685 } 686 687 if (Subtarget.hasP9Vector()) { 688 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 689 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 690 } 691 } 692 693 if (Subtarget.hasQPX()) { 694 setOperationAction(ISD::FADD, MVT::v4f64, Legal); 695 setOperationAction(ISD::FSUB, MVT::v4f64, Legal); 696 setOperationAction(ISD::FMUL, MVT::v4f64, Legal); 697 setOperationAction(ISD::FREM, MVT::v4f64, Expand); 698 699 setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); 700 setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); 701 702 setOperationAction(ISD::LOAD , MVT::v4f64, Custom); 703 setOperationAction(ISD::STORE , MVT::v4f64, Custom); 704 705 setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); 706 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); 707 708 if (!Subtarget.useCRBits()) 709 setOperationAction(ISD::SELECT, MVT::v4f64, Expand); 710 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); 711 712 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); 713 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); 714 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); 715 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); 716 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); 717 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); 718 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); 719 720 setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); 721 setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); 722 723 setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); 724 setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); 725 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); 726 727 setOperationAction(ISD::FNEG , MVT::v4f64, Legal); 728 setOperationAction(ISD::FABS , MVT::v4f64, Legal); 729 setOperationAction(ISD::FSIN , MVT::v4f64, Expand); 730 setOperationAction(ISD::FCOS , MVT::v4f64, Expand); 731 setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); 732 setOperationAction(ISD::FPOW , MVT::v4f64, Expand); 733 setOperationAction(ISD::FLOG , MVT::v4f64, Expand); 734 setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); 735 setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); 736 setOperationAction(ISD::FEXP , MVT::v4f64, Expand); 737 setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); 738 739 setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); 740 setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); 741 742 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); 743 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); 744 745 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); 746 747 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 748 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 749 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 750 setOperationAction(ISD::FREM, MVT::v4f32, Expand); 751 752 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); 753 setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); 754 755 setOperationAction(ISD::LOAD , MVT::v4f32, Custom); 756 setOperationAction(ISD::STORE , MVT::v4f32, Custom); 757 758 if (!Subtarget.useCRBits()) 759 setOperationAction(ISD::SELECT, MVT::v4f32, Expand); 760 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 761 762 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); 763 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); 764 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); 765 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); 766 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); 767 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 768 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 769 770 setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); 771 setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); 772 773 setOperationAction(ISD::FNEG , MVT::v4f32, Legal); 774 setOperationAction(ISD::FABS , MVT::v4f32, Legal); 775 setOperationAction(ISD::FSIN , MVT::v4f32, Expand); 776 setOperationAction(ISD::FCOS , MVT::v4f32, Expand); 777 setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); 778 setOperationAction(ISD::FPOW , MVT::v4f32, Expand); 779 setOperationAction(ISD::FLOG , MVT::v4f32, Expand); 780 setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); 781 setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); 782 setOperationAction(ISD::FEXP , MVT::v4f32, Expand); 783 setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); 784 785 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 786 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 787 788 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); 789 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); 790 791 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); 792 793 setOperationAction(ISD::AND , MVT::v4i1, Legal); 794 setOperationAction(ISD::OR , MVT::v4i1, Legal); 795 setOperationAction(ISD::XOR , MVT::v4i1, Legal); 796 797 if (!Subtarget.useCRBits()) 798 setOperationAction(ISD::SELECT, MVT::v4i1, Expand); 799 setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); 800 801 setOperationAction(ISD::LOAD , MVT::v4i1, Custom); 802 setOperationAction(ISD::STORE , MVT::v4i1, Custom); 803 804 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); 805 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); 806 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); 807 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); 808 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); 809 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); 810 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); 811 812 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); 813 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); 814 815 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); 816 817 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); 818 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); 819 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); 820 setOperationAction(ISD::FROUND, MVT::v4f64, Legal); 821 822 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 823 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 824 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 825 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 826 827 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); 828 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 829 830 // These need to set FE_INEXACT, and so cannot be vectorized here. 831 setOperationAction(ISD::FRINT, MVT::v4f64, Expand); 832 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 833 834 if (TM.Options.UnsafeFPMath) { 835 setOperationAction(ISD::FDIV, MVT::v4f64, Legal); 836 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); 837 838 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 839 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 840 } else { 841 setOperationAction(ISD::FDIV, MVT::v4f64, Expand); 842 setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); 843 844 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 845 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 846 } 847 } 848 849 if (Subtarget.has64BitSupport()) 850 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 851 852 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); 853 854 if (!isPPC64) { 855 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 856 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 857 } 858 859 setBooleanContents(ZeroOrOneBooleanContent); 860 861 if (Subtarget.hasAltivec()) { 862 // Altivec instructions set fields to all zeros or all ones. 863 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 864 } 865 866 if (!isPPC64) { 867 // These libcalls are not available in 32-bit. 868 setLibcallName(RTLIB::SHL_I128, nullptr); 869 setLibcallName(RTLIB::SRL_I128, nullptr); 870 setLibcallName(RTLIB::SRA_I128, nullptr); 871 } 872 873 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); 874 875 // We have target-specific dag combine patterns for the following nodes: 876 setTargetDAGCombine(ISD::SINT_TO_FP); 877 setTargetDAGCombine(ISD::BUILD_VECTOR); 878 if (Subtarget.hasFPCVT()) 879 setTargetDAGCombine(ISD::UINT_TO_FP); 880 setTargetDAGCombine(ISD::LOAD); 881 setTargetDAGCombine(ISD::STORE); 882 setTargetDAGCombine(ISD::BR_CC); 883 if (Subtarget.useCRBits()) 884 setTargetDAGCombine(ISD::BRCOND); 885 setTargetDAGCombine(ISD::BSWAP); 886 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 887 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 888 setTargetDAGCombine(ISD::INTRINSIC_VOID); 889 890 setTargetDAGCombine(ISD::SIGN_EXTEND); 891 setTargetDAGCombine(ISD::ZERO_EXTEND); 892 setTargetDAGCombine(ISD::ANY_EXTEND); 893 894 if (Subtarget.useCRBits()) { 895 setTargetDAGCombine(ISD::TRUNCATE); 896 setTargetDAGCombine(ISD::SETCC); 897 setTargetDAGCombine(ISD::SELECT_CC); 898 } 899 900 // Use reciprocal estimates. 901 if (TM.Options.UnsafeFPMath) { 902 setTargetDAGCombine(ISD::FDIV); 903 setTargetDAGCombine(ISD::FSQRT); 904 } 905 906 // Darwin long double math library functions have $LDBL128 appended. 907 if (Subtarget.isDarwin()) { 908 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 909 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 910 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 911 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 912 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 913 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 914 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 915 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 916 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 917 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 918 } 919 920 // With 32 condition bits, we don't need to sink (and duplicate) compares 921 // aggressively in CodeGenPrep. 922 if (Subtarget.useCRBits()) { 923 setHasMultipleConditionRegisters(); 924 setJumpIsExpensive(); 925 } 926 927 setMinFunctionAlignment(2); 928 if (Subtarget.isDarwin()) 929 setPrefFunctionAlignment(4); 930 931 switch (Subtarget.getDarwinDirective()) { 932 default: break; 933 case PPC::DIR_970: 934 case PPC::DIR_A2: 935 case PPC::DIR_E500mc: 936 case PPC::DIR_E5500: 937 case PPC::DIR_PWR4: 938 case PPC::DIR_PWR5: 939 case PPC::DIR_PWR5X: 940 case PPC::DIR_PWR6: 941 case PPC::DIR_PWR6X: 942 case PPC::DIR_PWR7: 943 case PPC::DIR_PWR8: 944 case PPC::DIR_PWR9: 945 setPrefFunctionAlignment(4); 946 setPrefLoopAlignment(4); 947 break; 948 } 949 950 if (Subtarget.enableMachineScheduler()) 951 setSchedulingPreference(Sched::Source); 952 else 953 setSchedulingPreference(Sched::Hybrid); 954 955 computeRegisterProperties(STI.getRegisterInfo()); 956 957 // The Freescale cores do better with aggressive inlining of memcpy and 958 // friends. GCC uses same threshold of 128 bytes (= 32 word stores). 959 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 960 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 961 MaxStoresPerMemset = 32; 962 MaxStoresPerMemsetOptSize = 16; 963 MaxStoresPerMemcpy = 32; 964 MaxStoresPerMemcpyOptSize = 8; 965 MaxStoresPerMemmove = 32; 966 MaxStoresPerMemmoveOptSize = 8; 967 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { 968 // The A2 also benefits from (very) aggressive inlining of memcpy and 969 // friends. The overhead of a the function call, even when warm, can be 970 // over one hundred cycles. 971 MaxStoresPerMemset = 128; 972 MaxStoresPerMemcpy = 128; 973 MaxStoresPerMemmove = 128; 974 } 975 } 976 977 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 978 /// the desired ByVal argument alignment. 979 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 980 unsigned MaxMaxAlign) { 981 if (MaxAlign == MaxMaxAlign) 982 return; 983 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 984 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 985 MaxAlign = 32; 986 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 987 MaxAlign = 16; 988 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 989 unsigned EltAlign = 0; 990 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 991 if (EltAlign > MaxAlign) 992 MaxAlign = EltAlign; 993 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 994 for (auto *EltTy : STy->elements()) { 995 unsigned EltAlign = 0; 996 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); 997 if (EltAlign > MaxAlign) 998 MaxAlign = EltAlign; 999 if (MaxAlign == MaxMaxAlign) 1000 break; 1001 } 1002 } 1003 } 1004 1005 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 1006 /// function arguments in the caller parameter area. 1007 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, 1008 const DataLayout &DL) const { 1009 // Darwin passes everything on 4 byte boundary. 1010 if (Subtarget.isDarwin()) 1011 return 4; 1012 1013 // 16byte and wider vectors are passed on 16byte boundary. 1014 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 1015 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 1016 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 1017 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 1018 return Align; 1019 } 1020 1021 bool PPCTargetLowering::useSoftFloat() const { 1022 return Subtarget.useSoftFloat(); 1023 } 1024 1025 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 1026 switch ((PPCISD::NodeType)Opcode) { 1027 case PPCISD::FIRST_NUMBER: break; 1028 case PPCISD::FSEL: return "PPCISD::FSEL"; 1029 case PPCISD::FCFID: return "PPCISD::FCFID"; 1030 case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; 1031 case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; 1032 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; 1033 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 1034 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 1035 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; 1036 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; 1037 case PPCISD::FRE: return "PPCISD::FRE"; 1038 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 1039 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 1040 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 1041 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 1042 case PPCISD::VPERM: return "PPCISD::VPERM"; 1043 case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; 1044 case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; 1045 case PPCISD::VECSHL: return "PPCISD::VECSHL"; 1046 case PPCISD::CMPB: return "PPCISD::CMPB"; 1047 case PPCISD::Hi: return "PPCISD::Hi"; 1048 case PPCISD::Lo: return "PPCISD::Lo"; 1049 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 1050 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 1051 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; 1052 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 1053 case PPCISD::SRL: return "PPCISD::SRL"; 1054 case PPCISD::SRA: return "PPCISD::SRA"; 1055 case PPCISD::SHL: return "PPCISD::SHL"; 1056 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; 1057 case PPCISD::CALL: return "PPCISD::CALL"; 1058 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 1059 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 1060 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 1061 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; 1062 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 1063 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; 1064 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 1065 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 1066 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 1067 case PPCISD::MFVSR: return "PPCISD::MFVSR"; 1068 case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; 1069 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; 1070 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; 1071 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; 1072 case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; 1073 case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; 1074 case PPCISD::VCMP: return "PPCISD::VCMP"; 1075 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 1076 case PPCISD::LBRX: return "PPCISD::LBRX"; 1077 case PPCISD::STBRX: return "PPCISD::STBRX"; 1078 case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; 1079 case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; 1080 case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; 1081 case PPCISD::STXSIX: return "PPCISD::STXSIX"; 1082 case PPCISD::VEXTS: return "PPCISD::VEXTS"; 1083 case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; 1084 case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; 1085 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 1086 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 1087 case PPCISD::BDZ: return "PPCISD::BDZ"; 1088 case PPCISD::MFFS: return "PPCISD::MFFS"; 1089 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 1090 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 1091 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 1092 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 1093 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 1094 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; 1095 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 1096 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 1097 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 1098 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 1099 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 1100 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 1101 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; 1102 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 1103 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 1104 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 1105 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; 1106 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 1107 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 1108 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 1109 case PPCISD::SC: return "PPCISD::SC"; 1110 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; 1111 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; 1112 case PPCISD::RFEBB: return "PPCISD::RFEBB"; 1113 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; 1114 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; 1115 case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; 1116 case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; 1117 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; 1118 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; 1119 case PPCISD::QBFLT: return "PPCISD::QBFLT"; 1120 case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; 1121 } 1122 return nullptr; 1123 } 1124 1125 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, 1126 EVT VT) const { 1127 if (!VT.isVector()) 1128 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 1129 1130 if (Subtarget.hasQPX()) 1131 return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); 1132 1133 return VT.changeVectorElementTypeToInteger(); 1134 } 1135 1136 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { 1137 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); 1138 return true; 1139 } 1140 1141 //===----------------------------------------------------------------------===// 1142 // Node matching predicates, for use by the tblgen matching code. 1143 //===----------------------------------------------------------------------===// 1144 1145 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 1146 static bool isFloatingPointZero(SDValue Op) { 1147 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 1148 return CFP->getValueAPF().isZero(); 1149 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 1150 // Maybe this has already been legalized into the constant pool? 1151 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 1152 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 1153 return CFP->getValueAPF().isZero(); 1154 } 1155 return false; 1156 } 1157 1158 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 1159 /// true if Op is undef or if it matches the specified value. 1160 static bool isConstantOrUndef(int Op, int Val) { 1161 return Op < 0 || Op == Val; 1162 } 1163 1164 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 1165 /// VPKUHUM instruction. 1166 /// The ShuffleKind distinguishes between big-endian operations with 1167 /// two different inputs (0), either-endian operations with two identical 1168 /// inputs (1), and little-endian operations with two different inputs (2). 1169 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1170 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1171 SelectionDAG &DAG) { 1172 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1173 if (ShuffleKind == 0) { 1174 if (IsLE) 1175 return false; 1176 for (unsigned i = 0; i != 16; ++i) 1177 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 1178 return false; 1179 } else if (ShuffleKind == 2) { 1180 if (!IsLE) 1181 return false; 1182 for (unsigned i = 0; i != 16; ++i) 1183 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 1184 return false; 1185 } else if (ShuffleKind == 1) { 1186 unsigned j = IsLE ? 0 : 1; 1187 for (unsigned i = 0; i != 8; ++i) 1188 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 1189 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 1190 return false; 1191 } 1192 return true; 1193 } 1194 1195 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 1196 /// VPKUWUM instruction. 1197 /// The ShuffleKind distinguishes between big-endian operations with 1198 /// two different inputs (0), either-endian operations with two identical 1199 /// inputs (1), and little-endian operations with two different inputs (2). 1200 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1201 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1202 SelectionDAG &DAG) { 1203 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1204 if (ShuffleKind == 0) { 1205 if (IsLE) 1206 return false; 1207 for (unsigned i = 0; i != 16; i += 2) 1208 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 1209 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 1210 return false; 1211 } else if (ShuffleKind == 2) { 1212 if (!IsLE) 1213 return false; 1214 for (unsigned i = 0; i != 16; i += 2) 1215 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1216 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 1217 return false; 1218 } else if (ShuffleKind == 1) { 1219 unsigned j = IsLE ? 0 : 2; 1220 for (unsigned i = 0; i != 8; i += 2) 1221 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1222 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1223 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1224 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 1225 return false; 1226 } 1227 return true; 1228 } 1229 1230 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a 1231 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the 1232 /// current subtarget. 1233 /// 1234 /// The ShuffleKind distinguishes between big-endian operations with 1235 /// two different inputs (0), either-endian operations with two identical 1236 /// inputs (1), and little-endian operations with two different inputs (2). 1237 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 1238 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 1239 SelectionDAG &DAG) { 1240 const PPCSubtarget& Subtarget = 1241 static_cast<const PPCSubtarget&>(DAG.getSubtarget()); 1242 if (!Subtarget.hasP8Vector()) 1243 return false; 1244 1245 bool IsLE = DAG.getDataLayout().isLittleEndian(); 1246 if (ShuffleKind == 0) { 1247 if (IsLE) 1248 return false; 1249 for (unsigned i = 0; i != 16; i += 4) 1250 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || 1251 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || 1252 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || 1253 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) 1254 return false; 1255 } else if (ShuffleKind == 2) { 1256 if (!IsLE) 1257 return false; 1258 for (unsigned i = 0; i != 16; i += 4) 1259 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 1260 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || 1261 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || 1262 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) 1263 return false; 1264 } else if (ShuffleKind == 1) { 1265 unsigned j = IsLE ? 0 : 4; 1266 for (unsigned i = 0; i != 8; i += 4) 1267 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 1268 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 1269 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || 1270 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || 1271 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 1272 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || 1273 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || 1274 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) 1275 return false; 1276 } 1277 return true; 1278 } 1279 1280 /// isVMerge - Common function, used to match vmrg* shuffles. 1281 /// 1282 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 1283 unsigned LHSStart, unsigned RHSStart) { 1284 if (N->getValueType(0) != MVT::v16i8) 1285 return false; 1286 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 1287 "Unsupported merge size!"); 1288 1289 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 1290 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 1291 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 1292 LHSStart+j+i*UnitSize) || 1293 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 1294 RHSStart+j+i*UnitSize)) 1295 return false; 1296 } 1297 return true; 1298 } 1299 1300 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 1301 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 1302 /// The ShuffleKind distinguishes between big-endian merges with two 1303 /// different inputs (0), either-endian merges with two identical inputs (1), 1304 /// and little-endian merges with two different inputs (2). For the latter, 1305 /// the input operands are swapped (see PPCInstrAltivec.td). 1306 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1307 unsigned ShuffleKind, SelectionDAG &DAG) { 1308 if (DAG.getDataLayout().isLittleEndian()) { 1309 if (ShuffleKind == 1) // unary 1310 return isVMerge(N, UnitSize, 0, 0); 1311 else if (ShuffleKind == 2) // swapped 1312 return isVMerge(N, UnitSize, 0, 16); 1313 else 1314 return false; 1315 } else { 1316 if (ShuffleKind == 1) // unary 1317 return isVMerge(N, UnitSize, 8, 8); 1318 else if (ShuffleKind == 0) // normal 1319 return isVMerge(N, UnitSize, 8, 24); 1320 else 1321 return false; 1322 } 1323 } 1324 1325 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 1326 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 1327 /// The ShuffleKind distinguishes between big-endian merges with two 1328 /// different inputs (0), either-endian merges with two identical inputs (1), 1329 /// and little-endian merges with two different inputs (2). For the latter, 1330 /// the input operands are swapped (see PPCInstrAltivec.td). 1331 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 1332 unsigned ShuffleKind, SelectionDAG &DAG) { 1333 if (DAG.getDataLayout().isLittleEndian()) { 1334 if (ShuffleKind == 1) // unary 1335 return isVMerge(N, UnitSize, 8, 8); 1336 else if (ShuffleKind == 2) // swapped 1337 return isVMerge(N, UnitSize, 8, 24); 1338 else 1339 return false; 1340 } else { 1341 if (ShuffleKind == 1) // unary 1342 return isVMerge(N, UnitSize, 0, 0); 1343 else if (ShuffleKind == 0) // normal 1344 return isVMerge(N, UnitSize, 0, 16); 1345 else 1346 return false; 1347 } 1348 } 1349 1350 /** 1351 * \brief Common function used to match vmrgew and vmrgow shuffles 1352 * 1353 * The indexOffset determines whether to look for even or odd words in 1354 * the shuffle mask. This is based on the of the endianness of the target 1355 * machine. 1356 * - Little Endian: 1357 * - Use offset of 0 to check for odd elements 1358 * - Use offset of 4 to check for even elements 1359 * - Big Endian: 1360 * - Use offset of 0 to check for even elements 1361 * - Use offset of 4 to check for odd elements 1362 * A detailed description of the vector element ordering for little endian and 1363 * big endian can be found at 1364 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html 1365 * Targeting your applications - what little endian and big endian IBM XL C/C++ 1366 * compiler differences mean to you 1367 * 1368 * The mask to the shuffle vector instruction specifies the indices of the 1369 * elements from the two input vectors to place in the result. The elements are 1370 * numbered in array-access order, starting with the first vector. These vectors 1371 * are always of type v16i8, thus each vector will contain 16 elements of size 1372 * 8. More info on the shuffle vector can be found in the 1373 * http://llvm.org/docs/LangRef.html#shufflevector-instruction 1374 * Language Reference. 1375 * 1376 * The RHSStartValue indicates whether the same input vectors are used (unary) 1377 * or two different input vectors are used, based on the following: 1378 * - If the instruction uses the same vector for both inputs, the range of the 1379 * indices will be 0 to 15. In this case, the RHSStart value passed should 1380 * be 0. 1381 * - If the instruction has two different vectors then the range of the 1382 * indices will be 0 to 31. In this case, the RHSStart value passed should 1383 * be 16 (indices 0-15 specify elements in the first vector while indices 16 1384 * to 31 specify elements in the second vector). 1385 * 1386 * \param[in] N The shuffle vector SD Node to analyze 1387 * \param[in] IndexOffset Specifies whether to look for even or odd elements 1388 * \param[in] RHSStartValue Specifies the starting index for the righthand input 1389 * vector to the shuffle_vector instruction 1390 * \return true iff this shuffle vector represents an even or odd word merge 1391 */ 1392 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, 1393 unsigned RHSStartValue) { 1394 if (N->getValueType(0) != MVT::v16i8) 1395 return false; 1396 1397 for (unsigned i = 0; i < 2; ++i) 1398 for (unsigned j = 0; j < 4; ++j) 1399 if (!isConstantOrUndef(N->getMaskElt(i*4+j), 1400 i*RHSStartValue+j+IndexOffset) || 1401 !isConstantOrUndef(N->getMaskElt(i*4+j+8), 1402 i*RHSStartValue+j+IndexOffset+8)) 1403 return false; 1404 return true; 1405 } 1406 1407 /** 1408 * \brief Determine if the specified shuffle mask is suitable for the vmrgew or 1409 * vmrgow instructions. 1410 * 1411 * \param[in] N The shuffle vector SD Node to analyze 1412 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) 1413 * \param[in] ShuffleKind Identify the type of merge: 1414 * - 0 = big-endian merge with two different inputs; 1415 * - 1 = either-endian merge with two identical inputs; 1416 * - 2 = little-endian merge with two different inputs (inputs are swapped for 1417 * little-endian merges). 1418 * \param[in] DAG The current SelectionDAG 1419 * \return true iff this shuffle mask 1420 */ 1421 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, 1422 unsigned ShuffleKind, SelectionDAG &DAG) { 1423 if (DAG.getDataLayout().isLittleEndian()) { 1424 unsigned indexOffset = CheckEven ? 4 : 0; 1425 if (ShuffleKind == 1) // Unary 1426 return isVMerge(N, indexOffset, 0); 1427 else if (ShuffleKind == 2) // swapped 1428 return isVMerge(N, indexOffset, 16); 1429 else 1430 return false; 1431 } 1432 else { 1433 unsigned indexOffset = CheckEven ? 0 : 4; 1434 if (ShuffleKind == 1) // Unary 1435 return isVMerge(N, indexOffset, 0); 1436 else if (ShuffleKind == 0) // Normal 1437 return isVMerge(N, indexOffset, 16); 1438 else 1439 return false; 1440 } 1441 return false; 1442 } 1443 1444 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 1445 /// amount, otherwise return -1. 1446 /// The ShuffleKind distinguishes between big-endian operations with two 1447 /// different inputs (0), either-endian operations with two identical inputs 1448 /// (1), and little-endian operations with two different inputs (2). For the 1449 /// latter, the input operands are swapped (see PPCInstrAltivec.td). 1450 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 1451 SelectionDAG &DAG) { 1452 if (N->getValueType(0) != MVT::v16i8) 1453 return -1; 1454 1455 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1456 1457 // Find the first non-undef value in the shuffle mask. 1458 unsigned i; 1459 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1460 /*search*/; 1461 1462 if (i == 16) return -1; // all undef. 1463 1464 // Otherwise, check to see if the rest of the elements are consecutively 1465 // numbered from this value. 1466 unsigned ShiftAmt = SVOp->getMaskElt(i); 1467 if (ShiftAmt < i) return -1; 1468 1469 ShiftAmt -= i; 1470 bool isLE = DAG.getDataLayout().isLittleEndian(); 1471 1472 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1473 // Check the rest of the elements to see if they are consecutive. 1474 for (++i; i != 16; ++i) 1475 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1476 return -1; 1477 } else if (ShuffleKind == 1) { 1478 // Check the rest of the elements to see if they are consecutive. 1479 for (++i; i != 16; ++i) 1480 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1481 return -1; 1482 } else 1483 return -1; 1484 1485 if (isLE) 1486 ShiftAmt = 16 - ShiftAmt; 1487 1488 return ShiftAmt; 1489 } 1490 1491 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1492 /// specifies a splat of a single element that is suitable for input to 1493 /// VSPLTB/VSPLTH/VSPLTW. 1494 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1495 assert(N->getValueType(0) == MVT::v16i8 && 1496 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1497 1498 // The consecutive indices need to specify an element, not part of two 1499 // different elements. So abandon ship early if this isn't the case. 1500 if (N->getMaskElt(0) % EltSize != 0) 1501 return false; 1502 1503 // This is a splat operation if each element of the permute is the same, and 1504 // if the value doesn't reference the second vector. 1505 unsigned ElementBase = N->getMaskElt(0); 1506 1507 // FIXME: Handle UNDEF elements too! 1508 if (ElementBase >= 16) 1509 return false; 1510 1511 // Check that the indices are consecutive, in the case of a multi-byte element 1512 // splatted with a v16i8 mask. 1513 for (unsigned i = 1; i != EltSize; ++i) 1514 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1515 return false; 1516 1517 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1518 if (N->getMaskElt(i) < 0) continue; 1519 for (unsigned j = 0; j != EltSize; ++j) 1520 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1521 return false; 1522 } 1523 return true; 1524 } 1525 1526 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, 1527 unsigned &InsertAtByte, bool &Swap, bool IsLE) { 1528 1529 // Check that the mask is shuffling words 1530 for (unsigned i = 0; i < 4; ++i) { 1531 unsigned B0 = N->getMaskElt(i*4); 1532 unsigned B1 = N->getMaskElt(i*4+1); 1533 unsigned B2 = N->getMaskElt(i*4+2); 1534 unsigned B3 = N->getMaskElt(i*4+3); 1535 if (B0 % 4) 1536 return false; 1537 if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1) 1538 return false; 1539 } 1540 1541 // Now we look at mask elements 0,4,8,12 1542 unsigned M0 = N->getMaskElt(0) / 4; 1543 unsigned M1 = N->getMaskElt(4) / 4; 1544 unsigned M2 = N->getMaskElt(8) / 4; 1545 unsigned M3 = N->getMaskElt(12) / 4; 1546 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; 1547 unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; 1548 1549 // Below, let H and L be arbitrary elements of the shuffle mask 1550 // where H is in the range [4,7] and L is in the range [0,3]. 1551 // H, 1, 2, 3 or L, 5, 6, 7 1552 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || 1553 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { 1554 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; 1555 InsertAtByte = IsLE ? 12 : 0; 1556 Swap = M0 < 4; 1557 return true; 1558 } 1559 // 0, H, 2, 3 or 4, L, 6, 7 1560 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || 1561 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { 1562 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; 1563 InsertAtByte = IsLE ? 8 : 4; 1564 Swap = M1 < 4; 1565 return true; 1566 } 1567 // 0, 1, H, 3 or 4, 5, L, 7 1568 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || 1569 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { 1570 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; 1571 InsertAtByte = IsLE ? 4 : 8; 1572 Swap = M2 < 4; 1573 return true; 1574 } 1575 // 0, 1, 2, H or 4, 5, 6, L 1576 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || 1577 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { 1578 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; 1579 InsertAtByte = IsLE ? 0 : 12; 1580 Swap = M3 < 4; 1581 return true; 1582 } 1583 1584 // If both vector operands for the shuffle are the same vector, the mask will 1585 // contain only elements from the first one and the second one will be undef. 1586 if (N->getOperand(1).isUndef()) { 1587 ShiftElts = 0; 1588 Swap = true; 1589 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; 1590 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { 1591 InsertAtByte = IsLE ? 12 : 0; 1592 return true; 1593 } 1594 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { 1595 InsertAtByte = IsLE ? 8 : 4; 1596 return true; 1597 } 1598 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { 1599 InsertAtByte = IsLE ? 4 : 8; 1600 return true; 1601 } 1602 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { 1603 InsertAtByte = IsLE ? 0 : 12; 1604 return true; 1605 } 1606 } 1607 1608 return false; 1609 } 1610 1611 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 1612 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 1613 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 1614 SelectionDAG &DAG) { 1615 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1616 assert(isSplatShuffleMask(SVOp, EltSize)); 1617 if (DAG.getDataLayout().isLittleEndian()) 1618 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 1619 else 1620 return SVOp->getMaskElt(0) / EltSize; 1621 } 1622 1623 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 1624 /// by using a vspltis[bhw] instruction of the specified element size, return 1625 /// the constant being splatted. The ByteSize field indicates the number of 1626 /// bytes of each element [124] -> [bhw]. 1627 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 1628 SDValue OpVal(nullptr, 0); 1629 1630 // If ByteSize of the splat is bigger than the element size of the 1631 // build_vector, then we have a case where we are checking for a splat where 1632 // multiple elements of the buildvector are folded together into a single 1633 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 1634 unsigned EltSize = 16/N->getNumOperands(); 1635 if (EltSize < ByteSize) { 1636 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 1637 SDValue UniquedVals[4]; 1638 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 1639 1640 // See if all of the elements in the buildvector agree across. 1641 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1642 if (N->getOperand(i).isUndef()) continue; 1643 // If the element isn't a constant, bail fully out. 1644 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 1645 1646 1647 if (!UniquedVals[i&(Multiple-1)].getNode()) 1648 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 1649 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 1650 return SDValue(); // no match. 1651 } 1652 1653 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 1654 // either constant or undef values that are identical for each chunk. See 1655 // if these chunks can form into a larger vspltis*. 1656 1657 // Check to see if all of the leading entries are either 0 or -1. If 1658 // neither, then this won't fit into the immediate field. 1659 bool LeadingZero = true; 1660 bool LeadingOnes = true; 1661 for (unsigned i = 0; i != Multiple-1; ++i) { 1662 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 1663 1664 LeadingZero &= isNullConstant(UniquedVals[i]); 1665 LeadingOnes &= isAllOnesConstant(UniquedVals[i]); 1666 } 1667 // Finally, check the least significant entry. 1668 if (LeadingZero) { 1669 if (!UniquedVals[Multiple-1].getNode()) 1670 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef 1671 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 1672 if (Val < 16) // 0,0,0,4 -> vspltisw(4) 1673 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 1674 } 1675 if (LeadingOnes) { 1676 if (!UniquedVals[Multiple-1].getNode()) 1677 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef 1678 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 1679 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 1680 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); 1681 } 1682 1683 return SDValue(); 1684 } 1685 1686 // Check to see if this buildvec has a single non-undef value in its elements. 1687 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1688 if (N->getOperand(i).isUndef()) continue; 1689 if (!OpVal.getNode()) 1690 OpVal = N->getOperand(i); 1691 else if (OpVal != N->getOperand(i)) 1692 return SDValue(); 1693 } 1694 1695 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 1696 1697 unsigned ValSizeInBytes = EltSize; 1698 uint64_t Value = 0; 1699 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1700 Value = CN->getZExtValue(); 1701 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1702 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 1703 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 1704 } 1705 1706 // If the splat value is larger than the element value, then we can never do 1707 // this splat. The only case that we could fit the replicated bits into our 1708 // immediate field for would be zero, and we prefer to use vxor for it. 1709 if (ValSizeInBytes < ByteSize) return SDValue(); 1710 1711 // If the element value is larger than the splat value, check if it consists 1712 // of a repeated bit pattern of size ByteSize. 1713 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) 1714 return SDValue(); 1715 1716 // Properly sign extend the value. 1717 int MaskVal = SignExtend32(Value, ByteSize * 8); 1718 1719 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 1720 if (MaskVal == 0) return SDValue(); 1721 1722 // Finally, if this value fits in a 5 bit sext field, return it 1723 if (SignExtend32<5>(MaskVal) == MaskVal) 1724 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); 1725 return SDValue(); 1726 } 1727 1728 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift 1729 /// amount, otherwise return -1. 1730 int PPC::isQVALIGNIShuffleMask(SDNode *N) { 1731 EVT VT = N->getValueType(0); 1732 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) 1733 return -1; 1734 1735 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1736 1737 // Find the first non-undef value in the shuffle mask. 1738 unsigned i; 1739 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) 1740 /*search*/; 1741 1742 if (i == 4) return -1; // all undef. 1743 1744 // Otherwise, check to see if the rest of the elements are consecutively 1745 // numbered from this value. 1746 unsigned ShiftAmt = SVOp->getMaskElt(i); 1747 if (ShiftAmt < i) return -1; 1748 ShiftAmt -= i; 1749 1750 // Check the rest of the elements to see if they are consecutive. 1751 for (++i; i != 4; ++i) 1752 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1753 return -1; 1754 1755 return ShiftAmt; 1756 } 1757 1758 //===----------------------------------------------------------------------===// 1759 // Addressing Mode Selection 1760 //===----------------------------------------------------------------------===// 1761 1762 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 1763 /// or 64-bit immediate, and if the value can be accurately represented as a 1764 /// sign extension from a 16-bit value. If so, this returns true and the 1765 /// immediate. 1766 static bool isIntS16Immediate(SDNode *N, short &Imm) { 1767 if (!isa<ConstantSDNode>(N)) 1768 return false; 1769 1770 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1771 if (N->getValueType(0) == MVT::i32) 1772 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1773 else 1774 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1775 } 1776 static bool isIntS16Immediate(SDValue Op, short &Imm) { 1777 return isIntS16Immediate(Op.getNode(), Imm); 1778 } 1779 1780 /// SelectAddressRegReg - Given the specified addressed, check to see if it 1781 /// can be represented as an indexed [r+r] operation. Returns false if it 1782 /// can be more efficiently represented with [r+imm]. 1783 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1784 SDValue &Index, 1785 SelectionDAG &DAG) const { 1786 short imm = 0; 1787 if (N.getOpcode() == ISD::ADD) { 1788 if (isIntS16Immediate(N.getOperand(1), imm)) 1789 return false; // r+i 1790 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1791 return false; // r+i 1792 1793 Base = N.getOperand(0); 1794 Index = N.getOperand(1); 1795 return true; 1796 } else if (N.getOpcode() == ISD::OR) { 1797 if (isIntS16Immediate(N.getOperand(1), imm)) 1798 return false; // r+i can fold it if we can. 1799 1800 // If this is an or of disjoint bitfields, we can codegen this as an add 1801 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1802 // disjoint. 1803 APInt LHSKnownZero, LHSKnownOne; 1804 APInt RHSKnownZero, RHSKnownOne; 1805 DAG.computeKnownBits(N.getOperand(0), 1806 LHSKnownZero, LHSKnownOne); 1807 1808 if (LHSKnownZero.getBoolValue()) { 1809 DAG.computeKnownBits(N.getOperand(1), 1810 RHSKnownZero, RHSKnownOne); 1811 // If all of the bits are known zero on the LHS or RHS, the add won't 1812 // carry. 1813 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1814 Base = N.getOperand(0); 1815 Index = N.getOperand(1); 1816 return true; 1817 } 1818 } 1819 } 1820 1821 return false; 1822 } 1823 1824 // If we happen to be doing an i64 load or store into a stack slot that has 1825 // less than a 4-byte alignment, then the frame-index elimination may need to 1826 // use an indexed load or store instruction (because the offset may not be a 1827 // multiple of 4). The extra register needed to hold the offset comes from the 1828 // register scavenger, and it is possible that the scavenger will need to use 1829 // an emergency spill slot. As a result, we need to make sure that a spill slot 1830 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1831 // stack slot. 1832 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1833 // FIXME: This does not handle the LWA case. 1834 if (VT != MVT::i64) 1835 return; 1836 1837 // NOTE: We'll exclude negative FIs here, which come from argument 1838 // lowering, because there are no known test cases triggering this problem 1839 // using packed structures (or similar). We can remove this exclusion if 1840 // we find such a test case. The reason why this is so test-case driven is 1841 // because this entire 'fixup' is only to prevent crashes (from the 1842 // register scavenger) on not-really-valid inputs. For example, if we have: 1843 // %a = alloca i1 1844 // %b = bitcast i1* %a to i64* 1845 // store i64* a, i64 b 1846 // then the store should really be marked as 'align 1', but is not. If it 1847 // were marked as 'align 1' then the indexed form would have been 1848 // instruction-selected initially, and the problem this 'fixup' is preventing 1849 // won't happen regardless. 1850 if (FrameIdx < 0) 1851 return; 1852 1853 MachineFunction &MF = DAG.getMachineFunction(); 1854 MachineFrameInfo &MFI = MF.getFrameInfo(); 1855 1856 unsigned Align = MFI.getObjectAlignment(FrameIdx); 1857 if (Align >= 4) 1858 return; 1859 1860 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1861 FuncInfo->setHasNonRISpills(); 1862 } 1863 1864 /// Returns true if the address N can be represented by a base register plus 1865 /// a signed 16-bit displacement [r+imm], and if it is not better 1866 /// represented as reg+reg. If Aligned is true, only accept displacements 1867 /// suitable for STD and friends, i.e. multiples of 4. 1868 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1869 SDValue &Base, 1870 SelectionDAG &DAG, 1871 bool Aligned) const { 1872 // FIXME dl should come from parent load or store, not from address 1873 SDLoc dl(N); 1874 // If this can be more profitably realized as r+r, fail. 1875 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1876 return false; 1877 1878 if (N.getOpcode() == ISD::ADD) { 1879 short imm = 0; 1880 if (isIntS16Immediate(N.getOperand(1), imm) && 1881 (!Aligned || (imm & 3) == 0)) { 1882 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 1883 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1884 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1885 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1886 } else { 1887 Base = N.getOperand(0); 1888 } 1889 return true; // [r+i] 1890 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1891 // Match LOAD (ADD (X, Lo(G))). 1892 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1893 && "Cannot handle constant offsets yet!"); 1894 Disp = N.getOperand(1).getOperand(0); // The global address. 1895 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1896 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1897 Disp.getOpcode() == ISD::TargetConstantPool || 1898 Disp.getOpcode() == ISD::TargetJumpTable); 1899 Base = N.getOperand(0); 1900 return true; // [&g+r] 1901 } 1902 } else if (N.getOpcode() == ISD::OR) { 1903 short imm = 0; 1904 if (isIntS16Immediate(N.getOperand(1), imm) && 1905 (!Aligned || (imm & 3) == 0)) { 1906 // If this is an or of disjoint bitfields, we can codegen this as an add 1907 // (for better address arithmetic) if the LHS and RHS of the OR are 1908 // provably disjoint. 1909 APInt LHSKnownZero, LHSKnownOne; 1910 DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1911 1912 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1913 // If all of the bits are known zero on the LHS or RHS, the add won't 1914 // carry. 1915 if (FrameIndexSDNode *FI = 1916 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1917 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1918 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1919 } else { 1920 Base = N.getOperand(0); 1921 } 1922 Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); 1923 return true; 1924 } 1925 } 1926 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1927 // Loading from a constant address. 1928 1929 // If this address fits entirely in a 16-bit sext immediate field, codegen 1930 // this as "d, 0" 1931 short Imm; 1932 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1933 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); 1934 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1935 CN->getValueType(0)); 1936 return true; 1937 } 1938 1939 // Handle 32-bit sext immediates with LIS + addr mode. 1940 if ((CN->getValueType(0) == MVT::i32 || 1941 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1942 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1943 int Addr = (int)CN->getZExtValue(); 1944 1945 // Otherwise, break this down into an LIS + disp. 1946 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); 1947 1948 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, 1949 MVT::i32); 1950 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1951 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1952 return true; 1953 } 1954 } 1955 1956 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); 1957 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1958 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1959 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1960 } else 1961 Base = N; 1962 return true; // [r+0] 1963 } 1964 1965 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1966 /// represented as an indexed [r+r] operation. 1967 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1968 SDValue &Index, 1969 SelectionDAG &DAG) const { 1970 // Check to see if we can easily represent this as an [r+r] address. This 1971 // will fail if it thinks that the address is more profitably represented as 1972 // reg+imm, e.g. where imm = 0. 1973 if (SelectAddressRegReg(N, Base, Index, DAG)) 1974 return true; 1975 1976 // If the operand is an addition, always emit this as [r+r], since this is 1977 // better (for code size, and execution, as the memop does the add for free) 1978 // than emitting an explicit add. 1979 if (N.getOpcode() == ISD::ADD) { 1980 Base = N.getOperand(0); 1981 Index = N.getOperand(1); 1982 return true; 1983 } 1984 1985 // Otherwise, do it the hard way, using R0 as the base register. 1986 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1987 N.getValueType()); 1988 Index = N; 1989 return true; 1990 } 1991 1992 /// getPreIndexedAddressParts - returns true by value, base pointer and 1993 /// offset pointer and addressing mode by reference if the node's address 1994 /// can be legally represented as pre-indexed load / store address. 1995 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1996 SDValue &Offset, 1997 ISD::MemIndexedMode &AM, 1998 SelectionDAG &DAG) const { 1999 if (DisablePPCPreinc) return false; 2000 2001 bool isLoad = true; 2002 SDValue Ptr; 2003 EVT VT; 2004 unsigned Alignment; 2005 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2006 Ptr = LD->getBasePtr(); 2007 VT = LD->getMemoryVT(); 2008 Alignment = LD->getAlignment(); 2009 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 2010 Ptr = ST->getBasePtr(); 2011 VT = ST->getMemoryVT(); 2012 Alignment = ST->getAlignment(); 2013 isLoad = false; 2014 } else 2015 return false; 2016 2017 // PowerPC doesn't have preinc load/store instructions for vectors (except 2018 // for QPX, which does have preinc r+r forms). 2019 if (VT.isVector()) { 2020 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { 2021 return false; 2022 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { 2023 AM = ISD::PRE_INC; 2024 return true; 2025 } 2026 } 2027 2028 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 2029 2030 // Common code will reject creating a pre-inc form if the base pointer 2031 // is a frame index, or if N is a store and the base pointer is either 2032 // the same as or a predecessor of the value being stored. Check for 2033 // those situations here, and try with swapped Base/Offset instead. 2034 bool Swap = false; 2035 2036 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 2037 Swap = true; 2038 else if (!isLoad) { 2039 SDValue Val = cast<StoreSDNode>(N)->getValue(); 2040 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 2041 Swap = true; 2042 } 2043 2044 if (Swap) 2045 std::swap(Base, Offset); 2046 2047 AM = ISD::PRE_INC; 2048 return true; 2049 } 2050 2051 // LDU/STU can only handle immediates that are a multiple of 4. 2052 if (VT != MVT::i64) { 2053 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 2054 return false; 2055 } else { 2056 // LDU/STU need an address with at least 4-byte alignment. 2057 if (Alignment < 4) 2058 return false; 2059 2060 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 2061 return false; 2062 } 2063 2064 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 2065 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 2066 // sext i32 to i64 when addr mode is r+i. 2067 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 2068 LD->getExtensionType() == ISD::SEXTLOAD && 2069 isa<ConstantSDNode>(Offset)) 2070 return false; 2071 } 2072 2073 AM = ISD::PRE_INC; 2074 return true; 2075 } 2076 2077 //===----------------------------------------------------------------------===// 2078 // LowerOperation implementation 2079 //===----------------------------------------------------------------------===// 2080 2081 /// Return true if we should reference labels using a PICBase, set the HiOpFlags 2082 /// and LoOpFlags to the target MO flags. 2083 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, 2084 unsigned &HiOpFlags, unsigned &LoOpFlags, 2085 const GlobalValue *GV = nullptr) { 2086 HiOpFlags = PPCII::MO_HA; 2087 LoOpFlags = PPCII::MO_LO; 2088 2089 // Don't use the pic base if not in PIC relocation model. 2090 if (IsPIC) { 2091 HiOpFlags |= PPCII::MO_PIC_FLAG; 2092 LoOpFlags |= PPCII::MO_PIC_FLAG; 2093 } 2094 2095 // If this is a reference to a global value that requires a non-lazy-ptr, make 2096 // sure that instruction lowering adds it. 2097 if (GV && Subtarget.hasLazyResolverStub(GV)) { 2098 HiOpFlags |= PPCII::MO_NLP_FLAG; 2099 LoOpFlags |= PPCII::MO_NLP_FLAG; 2100 2101 if (GV->hasHiddenVisibility()) { 2102 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2103 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 2104 } 2105 } 2106 } 2107 2108 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 2109 SelectionDAG &DAG) { 2110 SDLoc DL(HiPart); 2111 EVT PtrVT = HiPart.getValueType(); 2112 SDValue Zero = DAG.getConstant(0, DL, PtrVT); 2113 2114 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 2115 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 2116 2117 // With PIC, the first instruction is actually "GR+hi(&G)". 2118 if (isPIC) 2119 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 2120 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 2121 2122 // Generate non-pic code that has direct accesses to the constant pool. 2123 // The address of the global is just (hi(&g)+lo(&g)). 2124 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 2125 } 2126 2127 static void setUsesTOCBasePtr(MachineFunction &MF) { 2128 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2129 FuncInfo->setUsesTOCBasePtr(); 2130 } 2131 2132 static void setUsesTOCBasePtr(SelectionDAG &DAG) { 2133 setUsesTOCBasePtr(DAG.getMachineFunction()); 2134 } 2135 2136 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, 2137 SDValue GA) { 2138 EVT VT = Is64Bit ? MVT::i64 : MVT::i32; 2139 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : 2140 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); 2141 2142 SDValue Ops[] = { GA, Reg }; 2143 return DAG.getMemIntrinsicNode( 2144 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, 2145 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true, 2146 false, 0); 2147 } 2148 2149 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 2150 SelectionDAG &DAG) const { 2151 EVT PtrVT = Op.getValueType(); 2152 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2153 const Constant *C = CP->getConstVal(); 2154 2155 // 64-bit SVR4 ABI code is always position-independent. 2156 // The actual address of the GlobalValue is stored in the TOC. 2157 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2158 setUsesTOCBasePtr(DAG); 2159 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 2160 return getTOCEntry(DAG, SDLoc(CP), true, GA); 2161 } 2162 2163 unsigned MOHiFlag, MOLoFlag; 2164 bool IsPIC = isPositionIndependent(); 2165 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2166 2167 if (IsPIC && Subtarget.isSVR4ABI()) { 2168 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 2169 PPCII::MO_PIC_FLAG); 2170 return getTOCEntry(DAG, SDLoc(CP), false, GA); 2171 } 2172 2173 SDValue CPIHi = 2174 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 2175 SDValue CPILo = 2176 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 2177 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); 2178 } 2179 2180 // For 64-bit PowerPC, prefer the more compact relative encodings. 2181 // This trades 32 bits per jump table entry for one or two instructions 2182 // on the jump site. 2183 unsigned PPCTargetLowering::getJumpTableEncoding() const { 2184 if (isJumpTableRelative()) 2185 return MachineJumpTableInfo::EK_LabelDifference32; 2186 2187 return TargetLowering::getJumpTableEncoding(); 2188 } 2189 2190 bool PPCTargetLowering::isJumpTableRelative() const { 2191 if (Subtarget.isPPC64()) 2192 return true; 2193 return TargetLowering::isJumpTableRelative(); 2194 } 2195 2196 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table, 2197 SelectionDAG &DAG) const { 2198 if (!Subtarget.isPPC64()) 2199 return TargetLowering::getPICJumpTableRelocBase(Table, DAG); 2200 2201 switch (getTargetMachine().getCodeModel()) { 2202 case CodeModel::Default: 2203 case CodeModel::Small: 2204 case CodeModel::Medium: 2205 return TargetLowering::getPICJumpTableRelocBase(Table, DAG); 2206 default: 2207 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(), 2208 getPointerTy(DAG.getDataLayout())); 2209 } 2210 } 2211 2212 const MCExpr * 2213 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 2214 unsigned JTI, 2215 MCContext &Ctx) const { 2216 if (!Subtarget.isPPC64()) 2217 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 2218 2219 switch (getTargetMachine().getCodeModel()) { 2220 case CodeModel::Default: 2221 case CodeModel::Small: 2222 case CodeModel::Medium: 2223 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 2224 default: 2225 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); 2226 } 2227 } 2228 2229 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 2230 EVT PtrVT = Op.getValueType(); 2231 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2232 2233 // 64-bit SVR4 ABI code is always position-independent. 2234 // The actual address of the GlobalValue is stored in the TOC. 2235 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2236 setUsesTOCBasePtr(DAG); 2237 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 2238 return getTOCEntry(DAG, SDLoc(JT), true, GA); 2239 } 2240 2241 unsigned MOHiFlag, MOLoFlag; 2242 bool IsPIC = isPositionIndependent(); 2243 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2244 2245 if (IsPIC && Subtarget.isSVR4ABI()) { 2246 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 2247 PPCII::MO_PIC_FLAG); 2248 return getTOCEntry(DAG, SDLoc(GA), false, GA); 2249 } 2250 2251 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 2252 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 2253 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); 2254 } 2255 2256 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 2257 SelectionDAG &DAG) const { 2258 EVT PtrVT = Op.getValueType(); 2259 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 2260 const BlockAddress *BA = BASDN->getBlockAddress(); 2261 2262 // 64-bit SVR4 ABI code is always position-independent. 2263 // The actual BlockAddress is stored in the TOC. 2264 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2265 setUsesTOCBasePtr(DAG); 2266 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 2267 return getTOCEntry(DAG, SDLoc(BASDN), true, GA); 2268 } 2269 2270 unsigned MOHiFlag, MOLoFlag; 2271 bool IsPIC = isPositionIndependent(); 2272 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); 2273 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 2274 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 2275 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); 2276 } 2277 2278 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 2279 SelectionDAG &DAG) const { 2280 2281 // FIXME: TLS addresses currently use medium model code sequences, 2282 // which is the most useful form. Eventually support for small and 2283 // large models could be added if users need it, at the cost of 2284 // additional complexity. 2285 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2286 if (DAG.getTarget().Options.EmulatedTLS) 2287 return LowerToTLSEmulatedModel(GA, DAG); 2288 2289 SDLoc dl(GA); 2290 const GlobalValue *GV = GA->getGlobal(); 2291 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2292 bool is64bit = Subtarget.isPPC64(); 2293 const Module *M = DAG.getMachineFunction().getFunction()->getParent(); 2294 PICLevel::Level picLevel = M->getPICLevel(); 2295 2296 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 2297 2298 if (Model == TLSModel::LocalExec) { 2299 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2300 PPCII::MO_TPREL_HA); 2301 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2302 PPCII::MO_TPREL_LO); 2303 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 2304 is64bit ? MVT::i64 : MVT::i32); 2305 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 2306 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 2307 } 2308 2309 if (Model == TLSModel::InitialExec) { 2310 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2311 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 2312 PPCII::MO_TLS); 2313 SDValue GOTPtr; 2314 if (is64bit) { 2315 setUsesTOCBasePtr(DAG); 2316 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2317 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 2318 PtrVT, GOTReg, TGA); 2319 } else 2320 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 2321 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 2322 PtrVT, TGA, GOTPtr); 2323 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 2324 } 2325 2326 if (Model == TLSModel::GeneralDynamic) { 2327 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2328 SDValue GOTPtr; 2329 if (is64bit) { 2330 setUsesTOCBasePtr(DAG); 2331 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2332 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 2333 GOTReg, TGA); 2334 } else { 2335 if (picLevel == PICLevel::SmallPIC) 2336 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2337 else 2338 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2339 } 2340 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, 2341 GOTPtr, TGA, TGA); 2342 } 2343 2344 if (Model == TLSModel::LocalDynamic) { 2345 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 2346 SDValue GOTPtr; 2347 if (is64bit) { 2348 setUsesTOCBasePtr(DAG); 2349 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 2350 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 2351 GOTReg, TGA); 2352 } else { 2353 if (picLevel == PICLevel::SmallPIC) 2354 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 2355 else 2356 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 2357 } 2358 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, 2359 PtrVT, GOTPtr, TGA, TGA); 2360 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, 2361 PtrVT, TLSAddr, TGA); 2362 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 2363 } 2364 2365 llvm_unreachable("Unknown TLS model!"); 2366 } 2367 2368 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 2369 SelectionDAG &DAG) const { 2370 EVT PtrVT = Op.getValueType(); 2371 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 2372 SDLoc DL(GSDN); 2373 const GlobalValue *GV = GSDN->getGlobal(); 2374 2375 // 64-bit SVR4 ABI code is always position-independent. 2376 // The actual address of the GlobalValue is stored in the TOC. 2377 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 2378 setUsesTOCBasePtr(DAG); 2379 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 2380 return getTOCEntry(DAG, DL, true, GA); 2381 } 2382 2383 unsigned MOHiFlag, MOLoFlag; 2384 bool IsPIC = isPositionIndependent(); 2385 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); 2386 2387 if (IsPIC && Subtarget.isSVR4ABI()) { 2388 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 2389 GSDN->getOffset(), 2390 PPCII::MO_PIC_FLAG); 2391 return getTOCEntry(DAG, DL, false, GA); 2392 } 2393 2394 SDValue GAHi = 2395 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 2396 SDValue GALo = 2397 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 2398 2399 SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG); 2400 2401 // If the global reference is actually to a non-lazy-pointer, we have to do an 2402 // extra load to get the address of the global. 2403 if (MOHiFlag & PPCII::MO_NLP_FLAG) 2404 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 2405 return Ptr; 2406 } 2407 2408 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 2409 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2410 SDLoc dl(Op); 2411 2412 if (Op.getValueType() == MVT::v2i64) { 2413 // When the operands themselves are v2i64 values, we need to do something 2414 // special because VSX has no underlying comparison operations for these. 2415 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 2416 // Equality can be handled by casting to the legal type for Altivec 2417 // comparisons, everything else needs to be expanded. 2418 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2419 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 2420 DAG.getSetCC(dl, MVT::v4i32, 2421 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 2422 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 2423 CC)); 2424 } 2425 2426 return SDValue(); 2427 } 2428 2429 // We handle most of these in the usual way. 2430 return Op; 2431 } 2432 2433 // If we're comparing for equality to zero, expose the fact that this is 2434 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can 2435 // fold the new nodes. 2436 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG)) 2437 return V; 2438 2439 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 2440 // Leave comparisons against 0 and -1 alone for now, since they're usually 2441 // optimized. FIXME: revisit this when we can custom lower all setcc 2442 // optimizations. 2443 if (C->isAllOnesValue() || C->isNullValue()) 2444 return SDValue(); 2445 } 2446 2447 // If we have an integer seteq/setne, turn it into a compare against zero 2448 // by xor'ing the rhs with the lhs, which is faster than setting a 2449 // condition register, reading it back out, and masking the correct bit. The 2450 // normal approach here uses sub to do this instead of xor. Using xor exposes 2451 // the result to other bit-twiddling opportunities. 2452 EVT LHSVT = Op.getOperand(0).getValueType(); 2453 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 2454 EVT VT = Op.getValueType(); 2455 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 2456 Op.getOperand(1)); 2457 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); 2458 } 2459 return SDValue(); 2460 } 2461 2462 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { 2463 SDNode *Node = Op.getNode(); 2464 EVT VT = Node->getValueType(0); 2465 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2466 SDValue InChain = Node->getOperand(0); 2467 SDValue VAListPtr = Node->getOperand(1); 2468 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 2469 SDLoc dl(Node); 2470 2471 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 2472 2473 // gpr_index 2474 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2475 VAListPtr, MachinePointerInfo(SV), MVT::i8); 2476 InChain = GprIndex.getValue(1); 2477 2478 if (VT == MVT::i64) { 2479 // Check if GprIndex is even 2480 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 2481 DAG.getConstant(1, dl, MVT::i32)); 2482 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 2483 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); 2484 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 2485 DAG.getConstant(1, dl, MVT::i32)); 2486 // Align GprIndex to be even if it isn't 2487 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 2488 GprIndex); 2489 } 2490 2491 // fpr index is 1 byte after gpr 2492 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2493 DAG.getConstant(1, dl, MVT::i32)); 2494 2495 // fpr 2496 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 2497 FprPtr, MachinePointerInfo(SV), MVT::i8); 2498 InChain = FprIndex.getValue(1); 2499 2500 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2501 DAG.getConstant(8, dl, MVT::i32)); 2502 2503 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 2504 DAG.getConstant(4, dl, MVT::i32)); 2505 2506 // areas 2507 SDValue OverflowArea = 2508 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); 2509 InChain = OverflowArea.getValue(1); 2510 2511 SDValue RegSaveArea = 2512 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); 2513 InChain = RegSaveArea.getValue(1); 2514 2515 // select overflow_area if index > 8 2516 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 2517 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); 2518 2519 // adjustment constant gpr_index * 4/8 2520 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 2521 VT.isInteger() ? GprIndex : FprIndex, 2522 DAG.getConstant(VT.isInteger() ? 4 : 8, dl, 2523 MVT::i32)); 2524 2525 // OurReg = RegSaveArea + RegConstant 2526 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 2527 RegConstant); 2528 2529 // Floating types are 32 bytes into RegSaveArea 2530 if (VT.isFloatingPoint()) 2531 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 2532 DAG.getConstant(32, dl, MVT::i32)); 2533 2534 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 2535 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 2536 VT.isInteger() ? GprIndex : FprIndex, 2537 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, 2538 MVT::i32)); 2539 2540 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 2541 VT.isInteger() ? VAListPtr : FprPtr, 2542 MachinePointerInfo(SV), MVT::i8); 2543 2544 // determine if we should load from reg_save_area or overflow_area 2545 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 2546 2547 // increase overflow_area by 4/8 if gpr/fpr > 8 2548 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 2549 DAG.getConstant(VT.isInteger() ? 4 : 8, 2550 dl, MVT::i32)); 2551 2552 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 2553 OverflowAreaPlusN); 2554 2555 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, 2556 MachinePointerInfo(), MVT::i32); 2557 2558 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); 2559 } 2560 2561 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { 2562 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 2563 2564 // We have to copy the entire va_list struct: 2565 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 2566 return DAG.getMemcpy(Op.getOperand(0), Op, 2567 Op.getOperand(1), Op.getOperand(2), 2568 DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, 2569 false, MachinePointerInfo(), MachinePointerInfo()); 2570 } 2571 2572 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 2573 SelectionDAG &DAG) const { 2574 return Op.getOperand(0); 2575 } 2576 2577 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 2578 SelectionDAG &DAG) const { 2579 SDValue Chain = Op.getOperand(0); 2580 SDValue Trmp = Op.getOperand(1); // trampoline 2581 SDValue FPtr = Op.getOperand(2); // nested function 2582 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 2583 SDLoc dl(Op); 2584 2585 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2586 bool isPPC64 = (PtrVT == MVT::i64); 2587 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 2588 2589 TargetLowering::ArgListTy Args; 2590 TargetLowering::ArgListEntry Entry; 2591 2592 Entry.Ty = IntPtrTy; 2593 Entry.Node = Trmp; Args.push_back(Entry); 2594 2595 // TrampSize == (isPPC64 ? 48 : 40); 2596 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, 2597 isPPC64 ? MVT::i64 : MVT::i32); 2598 Args.push_back(Entry); 2599 2600 Entry.Node = FPtr; Args.push_back(Entry); 2601 Entry.Node = Nest; Args.push_back(Entry); 2602 2603 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 2604 TargetLowering::CallLoweringInfo CLI(DAG); 2605 CLI.setDebugLoc(dl).setChain(Chain) 2606 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 2607 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 2608 std::move(Args)); 2609 2610 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2611 return CallResult.second; 2612 } 2613 2614 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { 2615 MachineFunction &MF = DAG.getMachineFunction(); 2616 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2617 EVT PtrVT = getPointerTy(MF.getDataLayout()); 2618 2619 SDLoc dl(Op); 2620 2621 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 2622 // vastart just stores the address of the VarArgsFrameIndex slot into the 2623 // memory location argument. 2624 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2625 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2626 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2627 MachinePointerInfo(SV)); 2628 } 2629 2630 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 2631 // We suppose the given va_list is already allocated. 2632 // 2633 // typedef struct { 2634 // char gpr; /* index into the array of 8 GPRs 2635 // * stored in the register save area 2636 // * gpr=0 corresponds to r3, 2637 // * gpr=1 to r4, etc. 2638 // */ 2639 // char fpr; /* index into the array of 8 FPRs 2640 // * stored in the register save area 2641 // * fpr=0 corresponds to f1, 2642 // * fpr=1 to f2, etc. 2643 // */ 2644 // char *overflow_arg_area; 2645 // /* location on stack that holds 2646 // * the next overflow argument 2647 // */ 2648 // char *reg_save_area; 2649 // /* where r3:r10 and f1:f8 (if saved) 2650 // * are stored 2651 // */ 2652 // } va_list[1]; 2653 2654 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); 2655 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); 2656 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 2657 PtrVT); 2658 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2659 PtrVT); 2660 2661 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 2662 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); 2663 2664 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 2665 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); 2666 2667 uint64_t FPROffset = 1; 2668 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); 2669 2670 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2671 2672 // Store first byte : number of int regs 2673 SDValue firstStore = 2674 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), 2675 MachinePointerInfo(SV), MVT::i8); 2676 uint64_t nextOffset = FPROffset; 2677 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 2678 ConstFPROffset); 2679 2680 // Store second byte : number of float regs 2681 SDValue secondStore = 2682 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 2683 MachinePointerInfo(SV, nextOffset), MVT::i8); 2684 nextOffset += StackOffset; 2685 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 2686 2687 // Store second word : arguments given on stack 2688 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 2689 MachinePointerInfo(SV, nextOffset)); 2690 nextOffset += FrameOffset; 2691 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 2692 2693 // Store third word : arguments given in registers 2694 return DAG.getStore(thirdStore, dl, FR, nextPtr, 2695 MachinePointerInfo(SV, nextOffset)); 2696 } 2697 2698 #include "PPCGenCallingConv.inc" 2699 2700 // Function whose sole purpose is to kill compiler warnings 2701 // stemming from unused functions included from PPCGenCallingConv.inc. 2702 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 2703 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 2704 } 2705 2706 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 2707 CCValAssign::LocInfo &LocInfo, 2708 ISD::ArgFlagsTy &ArgFlags, 2709 CCState &State) { 2710 return true; 2711 } 2712 2713 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 2714 MVT &LocVT, 2715 CCValAssign::LocInfo &LocInfo, 2716 ISD::ArgFlagsTy &ArgFlags, 2717 CCState &State) { 2718 static const MCPhysReg ArgRegs[] = { 2719 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2720 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2721 }; 2722 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2723 2724 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2725 2726 // Skip one register if the first unallocated register has an even register 2727 // number and there are still argument registers available which have not been 2728 // allocated yet. RegNum is actually an index into ArgRegs, which means we 2729 // need to skip a register if RegNum is odd. 2730 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 2731 State.AllocateReg(ArgRegs[RegNum]); 2732 } 2733 2734 // Always return false here, as this function only makes sure that the first 2735 // unallocated register has an odd register number and does not actually 2736 // allocate a register for the current argument. 2737 return false; 2738 } 2739 2740 bool 2741 llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, 2742 MVT &LocVT, 2743 CCValAssign::LocInfo &LocInfo, 2744 ISD::ArgFlagsTy &ArgFlags, 2745 CCState &State) { 2746 static const MCPhysReg ArgRegs[] = { 2747 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2748 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2749 }; 2750 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2751 2752 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2753 int RegsLeft = NumArgRegs - RegNum; 2754 2755 // Skip if there is not enough registers left for long double type (4 gpr regs 2756 // in soft float mode) and put long double argument on the stack. 2757 if (RegNum != NumArgRegs && RegsLeft < 4) { 2758 for (int i = 0; i < RegsLeft; i++) { 2759 State.AllocateReg(ArgRegs[RegNum + i]); 2760 } 2761 } 2762 2763 return false; 2764 } 2765 2766 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 2767 MVT &LocVT, 2768 CCValAssign::LocInfo &LocInfo, 2769 ISD::ArgFlagsTy &ArgFlags, 2770 CCState &State) { 2771 static const MCPhysReg ArgRegs[] = { 2772 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2773 PPC::F8 2774 }; 2775 2776 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2777 2778 unsigned RegNum = State.getFirstUnallocated(ArgRegs); 2779 2780 // If there is only one Floating-point register left we need to put both f64 2781 // values of a split ppc_fp128 value on the stack. 2782 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 2783 State.AllocateReg(ArgRegs[RegNum]); 2784 } 2785 2786 // Always return false here, as this function only makes sure that the two f64 2787 // values a ppc_fp128 value is split into are both passed in registers or both 2788 // passed on the stack and does not actually allocate a register for the 2789 // current argument. 2790 return false; 2791 } 2792 2793 /// FPR - The set of FP registers that should be allocated for arguments, 2794 /// on Darwin. 2795 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, 2796 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, 2797 PPC::F11, PPC::F12, PPC::F13}; 2798 2799 /// QFPR - The set of QPX registers that should be allocated for arguments. 2800 static const MCPhysReg QFPR[] = { 2801 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, 2802 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; 2803 2804 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 2805 /// the stack. 2806 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 2807 unsigned PtrByteSize) { 2808 unsigned ArgSize = ArgVT.getStoreSize(); 2809 if (Flags.isByVal()) 2810 ArgSize = Flags.getByValSize(); 2811 2812 // Round up to multiples of the pointer size, except for array members, 2813 // which are always packed. 2814 if (!Flags.isInConsecutiveRegs()) 2815 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2816 2817 return ArgSize; 2818 } 2819 2820 /// CalculateStackSlotAlignment - Calculates the alignment of this argument 2821 /// on the stack. 2822 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 2823 ISD::ArgFlagsTy Flags, 2824 unsigned PtrByteSize) { 2825 unsigned Align = PtrByteSize; 2826 2827 // Altivec parameters are padded to a 16 byte boundary. 2828 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2829 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2830 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 2831 ArgVT == MVT::v1i128) 2832 Align = 16; 2833 // QPX vector types stored in double-precision are padded to a 32 byte 2834 // boundary. 2835 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) 2836 Align = 32; 2837 2838 // ByVal parameters are aligned as requested. 2839 if (Flags.isByVal()) { 2840 unsigned BVAlign = Flags.getByValAlign(); 2841 if (BVAlign > PtrByteSize) { 2842 if (BVAlign % PtrByteSize != 0) 2843 llvm_unreachable( 2844 "ByVal alignment is not a multiple of the pointer size"); 2845 2846 Align = BVAlign; 2847 } 2848 } 2849 2850 // Array members are always packed to their original alignment. 2851 if (Flags.isInConsecutiveRegs()) { 2852 // If the array member was split into multiple registers, the first 2853 // needs to be aligned to the size of the full type. (Except for 2854 // ppcf128, which is only aligned as its f64 components.) 2855 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 2856 Align = OrigVT.getStoreSize(); 2857 else 2858 Align = ArgVT.getStoreSize(); 2859 } 2860 2861 return Align; 2862 } 2863 2864 /// CalculateStackSlotUsed - Return whether this argument will use its 2865 /// stack slot (instead of being passed in registers). ArgOffset, 2866 /// AvailableFPRs, and AvailableVRs must hold the current argument 2867 /// position, and will be updated to account for this argument. 2868 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 2869 ISD::ArgFlagsTy Flags, 2870 unsigned PtrByteSize, 2871 unsigned LinkageSize, 2872 unsigned ParamAreaSize, 2873 unsigned &ArgOffset, 2874 unsigned &AvailableFPRs, 2875 unsigned &AvailableVRs, bool HasQPX) { 2876 bool UseMemory = false; 2877 2878 // Respect alignment of argument on the stack. 2879 unsigned Align = 2880 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 2881 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2882 // If there's no space left in the argument save area, we must 2883 // use memory (this check also catches zero-sized arguments). 2884 if (ArgOffset >= LinkageSize + ParamAreaSize) 2885 UseMemory = true; 2886 2887 // Allocate argument on the stack. 2888 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2889 if (Flags.isInConsecutiveRegsLast()) 2890 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2891 // If we overran the argument save area, we must use memory 2892 // (this check catches arguments passed partially in memory) 2893 if (ArgOffset > LinkageSize + ParamAreaSize) 2894 UseMemory = true; 2895 2896 // However, if the argument is actually passed in an FPR or a VR, 2897 // we don't use memory after all. 2898 if (!Flags.isByVal()) { 2899 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || 2900 // QPX registers overlap with the scalar FP registers. 2901 (HasQPX && (ArgVT == MVT::v4f32 || 2902 ArgVT == MVT::v4f64 || 2903 ArgVT == MVT::v4i1))) 2904 if (AvailableFPRs > 0) { 2905 --AvailableFPRs; 2906 return false; 2907 } 2908 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2909 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2910 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || 2911 ArgVT == MVT::v1i128) 2912 if (AvailableVRs > 0) { 2913 --AvailableVRs; 2914 return false; 2915 } 2916 } 2917 2918 return UseMemory; 2919 } 2920 2921 /// EnsureStackAlignment - Round stack frame size up from NumBytes to 2922 /// ensure minimum alignment required for target. 2923 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, 2924 unsigned NumBytes) { 2925 unsigned TargetAlign = Lowering->getStackAlignment(); 2926 unsigned AlignMask = TargetAlign - 1; 2927 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2928 return NumBytes; 2929 } 2930 2931 SDValue PPCTargetLowering::LowerFormalArguments( 2932 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2933 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 2934 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2935 if (Subtarget.isSVR4ABI()) { 2936 if (Subtarget.isPPC64()) 2937 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 2938 dl, DAG, InVals); 2939 else 2940 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 2941 dl, DAG, InVals); 2942 } else { 2943 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 2944 dl, DAG, InVals); 2945 } 2946 } 2947 2948 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( 2949 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2950 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 2951 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2952 2953 // 32-bit SVR4 ABI Stack Frame Layout: 2954 // +-----------------------------------+ 2955 // +--> | Back chain | 2956 // | +-----------------------------------+ 2957 // | | Floating-point register save area | 2958 // | +-----------------------------------+ 2959 // | | General register save area | 2960 // | +-----------------------------------+ 2961 // | | CR save word | 2962 // | +-----------------------------------+ 2963 // | | VRSAVE save word | 2964 // | +-----------------------------------+ 2965 // | | Alignment padding | 2966 // | +-----------------------------------+ 2967 // | | Vector register save area | 2968 // | +-----------------------------------+ 2969 // | | Local variable space | 2970 // | +-----------------------------------+ 2971 // | | Parameter list area | 2972 // | +-----------------------------------+ 2973 // | | LR save word | 2974 // | +-----------------------------------+ 2975 // SP--> +--- | Back chain | 2976 // +-----------------------------------+ 2977 // 2978 // Specifications: 2979 // System V Application Binary Interface PowerPC Processor Supplement 2980 // AltiVec Technology Programming Interface Manual 2981 2982 MachineFunction &MF = DAG.getMachineFunction(); 2983 MachineFrameInfo &MFI = MF.getFrameInfo(); 2984 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2985 2986 EVT PtrVT = getPointerTy(MF.getDataLayout()); 2987 // Potential tail calls could cause overwriting of argument stack slots. 2988 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2989 (CallConv == CallingConv::Fast)); 2990 unsigned PtrByteSize = 4; 2991 2992 // Assign locations to all of the incoming arguments. 2993 SmallVector<CCValAssign, 16> ArgLocs; 2994 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 2995 *DAG.getContext()); 2996 2997 // Reserve space for the linkage area on the stack. 2998 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 2999 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 3000 if (useSoftFloat()) 3001 CCInfo.PreAnalyzeFormalArguments(Ins); 3002 3003 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 3004 CCInfo.clearWasPPCF128(); 3005 3006 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3007 CCValAssign &VA = ArgLocs[i]; 3008 3009 // Arguments stored in registers. 3010 if (VA.isRegLoc()) { 3011 const TargetRegisterClass *RC; 3012 EVT ValVT = VA.getValVT(); 3013 3014 switch (ValVT.getSimpleVT().SimpleTy) { 3015 default: 3016 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 3017 case MVT::i1: 3018 case MVT::i32: 3019 RC = &PPC::GPRCRegClass; 3020 break; 3021 case MVT::f32: 3022 if (Subtarget.hasP8Vector()) 3023 RC = &PPC::VSSRCRegClass; 3024 else 3025 RC = &PPC::F4RCRegClass; 3026 break; 3027 case MVT::f64: 3028 if (Subtarget.hasVSX()) 3029 RC = &PPC::VSFRCRegClass; 3030 else 3031 RC = &PPC::F8RCRegClass; 3032 break; 3033 case MVT::v16i8: 3034 case MVT::v8i16: 3035 case MVT::v4i32: 3036 RC = &PPC::VRRCRegClass; 3037 break; 3038 case MVT::v4f32: 3039 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; 3040 break; 3041 case MVT::v2f64: 3042 case MVT::v2i64: 3043 RC = &PPC::VRRCRegClass; 3044 break; 3045 case MVT::v4f64: 3046 RC = &PPC::QFRCRegClass; 3047 break; 3048 case MVT::v4i1: 3049 RC = &PPC::QBRCRegClass; 3050 break; 3051 } 3052 3053 // Transform the arguments stored in physical registers into virtual ones. 3054 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 3055 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 3056 ValVT == MVT::i1 ? MVT::i32 : ValVT); 3057 3058 if (ValVT == MVT::i1) 3059 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 3060 3061 InVals.push_back(ArgValue); 3062 } else { 3063 // Argument stored in memory. 3064 assert(VA.isMemLoc()); 3065 3066 unsigned ArgSize = VA.getLocVT().getStoreSize(); 3067 int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(), 3068 isImmutable); 3069 3070 // Create load nodes to retrieve arguments from the stack. 3071 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3072 InVals.push_back( 3073 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); 3074 } 3075 } 3076 3077 // Assign locations to all of the incoming aggregate by value arguments. 3078 // Aggregates passed by value are stored in the local variable space of the 3079 // caller's stack frame, right above the parameter list area. 3080 SmallVector<CCValAssign, 16> ByValArgLocs; 3081 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3082 ByValArgLocs, *DAG.getContext()); 3083 3084 // Reserve stack space for the allocations in CCInfo. 3085 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 3086 3087 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 3088 3089 // Area that is at least reserved in the caller of this function. 3090 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 3091 MinReservedArea = std::max(MinReservedArea, LinkageSize); 3092 3093 // Set the size that is at least reserved in caller of this function. Tail 3094 // call optimized function's reserved stack space needs to be aligned so that 3095 // taking the difference between two stack areas will result in an aligned 3096 // stack. 3097 MinReservedArea = 3098 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3099 FuncInfo->setMinReservedArea(MinReservedArea); 3100 3101 SmallVector<SDValue, 8> MemOps; 3102 3103 // If the function takes variable number of arguments, make a frame index for 3104 // the start of the first vararg value... for expansion of llvm.va_start. 3105 if (isVarArg) { 3106 static const MCPhysReg GPArgRegs[] = { 3107 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3108 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3109 }; 3110 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 3111 3112 static const MCPhysReg FPArgRegs[] = { 3113 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 3114 PPC::F8 3115 }; 3116 unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 3117 3118 if (useSoftFloat()) 3119 NumFPArgRegs = 0; 3120 3121 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); 3122 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); 3123 3124 // Make room for NumGPArgRegs and NumFPArgRegs. 3125 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 3126 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; 3127 3128 FuncInfo->setVarArgsStackOffset( 3129 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, 3130 CCInfo.getNextStackOffset(), true)); 3131 3132 FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false)); 3133 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3134 3135 // The fixed integer arguments of a variadic function are stored to the 3136 // VarArgsFrameIndex on the stack so that they may be loaded by 3137 // dereferencing the result of va_next. 3138 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 3139 // Get an existing live-in vreg, or add a new one. 3140 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 3141 if (!VReg) 3142 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 3143 3144 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3145 SDValue Store = 3146 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3147 MemOps.push_back(Store); 3148 // Increment the address by four for the next argument to store 3149 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3150 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3151 } 3152 3153 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 3154 // is set. 3155 // The double arguments are stored to the VarArgsFrameIndex 3156 // on the stack. 3157 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 3158 // Get an existing live-in vreg, or add a new one. 3159 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 3160 if (!VReg) 3161 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 3162 3163 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 3164 SDValue Store = 3165 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3166 MemOps.push_back(Store); 3167 // Increment the address by eight for the next argument to store 3168 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, 3169 PtrVT); 3170 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3171 } 3172 } 3173 3174 if (!MemOps.empty()) 3175 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3176 3177 return Chain; 3178 } 3179 3180 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3181 // value to MVT::i64 and then truncate to the correct register size. 3182 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, 3183 EVT ObjectVT, SelectionDAG &DAG, 3184 SDValue ArgVal, 3185 const SDLoc &dl) const { 3186 if (Flags.isSExt()) 3187 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 3188 DAG.getValueType(ObjectVT)); 3189 else if (Flags.isZExt()) 3190 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 3191 DAG.getValueType(ObjectVT)); 3192 3193 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 3194 } 3195 3196 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( 3197 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3198 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3199 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3200 // TODO: add description of PPC stack frame format, or at least some docs. 3201 // 3202 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3203 bool isLittleEndian = Subtarget.isLittleEndian(); 3204 MachineFunction &MF = DAG.getMachineFunction(); 3205 MachineFrameInfo &MFI = MF.getFrameInfo(); 3206 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3207 3208 assert(!(CallConv == CallingConv::Fast && isVarArg) && 3209 "fastcc not supported on varargs functions"); 3210 3211 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3212 // Potential tail calls could cause overwriting of argument stack slots. 3213 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3214 (CallConv == CallingConv::Fast)); 3215 unsigned PtrByteSize = 8; 3216 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3217 3218 static const MCPhysReg GPR[] = { 3219 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3220 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3221 }; 3222 static const MCPhysReg VR[] = { 3223 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3224 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3225 }; 3226 3227 const unsigned Num_GPR_Regs = array_lengthof(GPR); 3228 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; 3229 const unsigned Num_VR_Regs = array_lengthof(VR); 3230 const unsigned Num_QFPR_Regs = Num_FPR_Regs; 3231 3232 // Do a first pass over the arguments to determine whether the ABI 3233 // guarantees that our caller has allocated the parameter save area 3234 // on its stack frame. In the ELFv1 ABI, this is always the case; 3235 // in the ELFv2 ABI, it is true if this is a vararg function or if 3236 // any parameter is located in a stack slot. 3237 3238 bool HasParameterArea = !isELFv2ABI || isVarArg; 3239 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 3240 unsigned NumBytes = LinkageSize; 3241 unsigned AvailableFPRs = Num_FPR_Regs; 3242 unsigned AvailableVRs = Num_VR_Regs; 3243 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 3244 if (Ins[i].Flags.isNest()) 3245 continue; 3246 3247 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 3248 PtrByteSize, LinkageSize, ParamAreaSize, 3249 NumBytes, AvailableFPRs, AvailableVRs, 3250 Subtarget.hasQPX())) 3251 HasParameterArea = true; 3252 } 3253 3254 // Add DAG nodes to load the arguments or copy them out of registers. On 3255 // entry to a function on PPC, the arguments start after the linkage area, 3256 // although the first ones are often in registers. 3257 3258 unsigned ArgOffset = LinkageSize; 3259 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3260 unsigned &QFPR_idx = FPR_idx; 3261 SmallVector<SDValue, 8> MemOps; 3262 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3263 unsigned CurArgIdx = 0; 3264 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3265 SDValue ArgVal; 3266 bool needsLoad = false; 3267 EVT ObjectVT = Ins[ArgNo].VT; 3268 EVT OrigVT = Ins[ArgNo].ArgVT; 3269 unsigned ObjSize = ObjectVT.getStoreSize(); 3270 unsigned ArgSize = ObjSize; 3271 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3272 if (Ins[ArgNo].isOrigArg()) { 3273 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3274 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3275 } 3276 // We re-align the argument offset for each argument, except when using the 3277 // fast calling convention, when we need to make sure we do that only when 3278 // we'll actually use a stack slot. 3279 unsigned CurArgOffset, Align; 3280 auto ComputeArgOffset = [&]() { 3281 /* Respect alignment of argument on the stack. */ 3282 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); 3283 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 3284 CurArgOffset = ArgOffset; 3285 }; 3286 3287 if (CallConv != CallingConv::Fast) { 3288 ComputeArgOffset(); 3289 3290 /* Compute GPR index associated with argument offset. */ 3291 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3292 GPR_idx = std::min(GPR_idx, Num_GPR_Regs); 3293 } 3294 3295 // FIXME the codegen can be much improved in some cases. 3296 // We do not have to keep everything in memory. 3297 if (Flags.isByVal()) { 3298 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3299 3300 if (CallConv == CallingConv::Fast) 3301 ComputeArgOffset(); 3302 3303 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3304 ObjSize = Flags.getByValSize(); 3305 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3306 // Empty aggregate parameters do not take up registers. Examples: 3307 // struct { } a; 3308 // union { } b; 3309 // int c[0]; 3310 // etc. However, we have to provide a place-holder in InVals, so 3311 // pretend we have an 8-byte item at the current address for that 3312 // purpose. 3313 if (!ObjSize) { 3314 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); 3315 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3316 InVals.push_back(FIN); 3317 continue; 3318 } 3319 3320 // Create a stack object covering all stack doublewords occupied 3321 // by the argument. If the argument is (fully or partially) on 3322 // the stack, or if the argument is fully in registers but the 3323 // caller has allocated the parameter save anyway, we can refer 3324 // directly to the caller's stack frame. Otherwise, create a 3325 // local copy in our own frame. 3326 int FI; 3327 if (HasParameterArea || 3328 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) 3329 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true); 3330 else 3331 FI = MFI.CreateStackObject(ArgSize, Align, false); 3332 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3333 3334 // Handle aggregates smaller than 8 bytes. 3335 if (ObjSize < PtrByteSize) { 3336 // The value of the object is its address, which differs from the 3337 // address of the enclosing doubleword on big-endian systems. 3338 SDValue Arg = FIN; 3339 if (!isLittleEndian) { 3340 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); 3341 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); 3342 } 3343 InVals.push_back(Arg); 3344 3345 if (GPR_idx != Num_GPR_Regs) { 3346 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3347 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3348 SDValue Store; 3349 3350 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 3351 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 3352 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 3353 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, 3354 MachinePointerInfo(&*FuncArg), ObjType); 3355 } else { 3356 // For sizes that don't fit a truncating store (3, 5, 6, 7), 3357 // store the whole register as-is to the parameter save area 3358 // slot. 3359 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3360 MachinePointerInfo(&*FuncArg)); 3361 } 3362 3363 MemOps.push_back(Store); 3364 } 3365 // Whether we copied from a register or not, advance the offset 3366 // into the parameter save area by a full doubleword. 3367 ArgOffset += PtrByteSize; 3368 continue; 3369 } 3370 3371 // The value of the object is its address, which is the address of 3372 // its first stack doubleword. 3373 InVals.push_back(FIN); 3374 3375 // Store whatever pieces of the object are in registers to memory. 3376 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3377 if (GPR_idx == Num_GPR_Regs) 3378 break; 3379 3380 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3381 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3382 SDValue Addr = FIN; 3383 if (j) { 3384 SDValue Off = DAG.getConstant(j, dl, PtrVT); 3385 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); 3386 } 3387 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, 3388 MachinePointerInfo(&*FuncArg, j)); 3389 MemOps.push_back(Store); 3390 ++GPR_idx; 3391 } 3392 ArgOffset += ArgSize; 3393 continue; 3394 } 3395 3396 switch (ObjectVT.getSimpleVT().SimpleTy) { 3397 default: llvm_unreachable("Unhandled argument type!"); 3398 case MVT::i1: 3399 case MVT::i32: 3400 case MVT::i64: 3401 if (Flags.isNest()) { 3402 // The 'nest' parameter, if any, is passed in R11. 3403 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); 3404 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3405 3406 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3407 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3408 3409 break; 3410 } 3411 3412 // These can be scalar arguments or elements of an integer array type 3413 // passed directly. Clang may use those instead of "byval" aggregate 3414 // types to avoid forcing arguments to memory unnecessarily. 3415 if (GPR_idx != Num_GPR_Regs) { 3416 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3417 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3418 3419 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3420 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3421 // value to MVT::i64 and then truncate to the correct register size. 3422 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3423 } else { 3424 if (CallConv == CallingConv::Fast) 3425 ComputeArgOffset(); 3426 3427 needsLoad = true; 3428 ArgSize = PtrByteSize; 3429 } 3430 if (CallConv != CallingConv::Fast || needsLoad) 3431 ArgOffset += 8; 3432 break; 3433 3434 case MVT::f32: 3435 case MVT::f64: 3436 // These can be scalar arguments or elements of a float array type 3437 // passed directly. The latter are used to implement ELFv2 homogenous 3438 // float aggregates. 3439 if (FPR_idx != Num_FPR_Regs) { 3440 unsigned VReg; 3441 3442 if (ObjectVT == MVT::f32) 3443 VReg = MF.addLiveIn(FPR[FPR_idx], 3444 Subtarget.hasP8Vector() 3445 ? &PPC::VSSRCRegClass 3446 : &PPC::F4RCRegClass); 3447 else 3448 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() 3449 ? &PPC::VSFRCRegClass 3450 : &PPC::F8RCRegClass); 3451 3452 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3453 ++FPR_idx; 3454 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { 3455 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 3456 // once we support fp <-> gpr moves. 3457 3458 // This can only ever happen in the presence of f32 array types, 3459 // since otherwise we never run out of FPRs before running out 3460 // of GPRs. 3461 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); 3462 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3463 3464 if (ObjectVT == MVT::f32) { 3465 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) 3466 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, 3467 DAG.getConstant(32, dl, MVT::i32)); 3468 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 3469 } 3470 3471 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); 3472 } else { 3473 if (CallConv == CallingConv::Fast) 3474 ComputeArgOffset(); 3475 3476 needsLoad = true; 3477 } 3478 3479 // When passing an array of floats, the array occupies consecutive 3480 // space in the argument area; only round up to the next doubleword 3481 // at the end of the array. Otherwise, each float takes 8 bytes. 3482 if (CallConv != CallingConv::Fast || needsLoad) { 3483 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; 3484 ArgOffset += ArgSize; 3485 if (Flags.isInConsecutiveRegsLast()) 3486 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3487 } 3488 break; 3489 case MVT::v4f32: 3490 case MVT::v4i32: 3491 case MVT::v8i16: 3492 case MVT::v16i8: 3493 case MVT::v2f64: 3494 case MVT::v2i64: 3495 case MVT::v1i128: 3496 if (!Subtarget.hasQPX()) { 3497 // These can be scalar arguments or elements of a vector array type 3498 // passed directly. The latter are used to implement ELFv2 homogenous 3499 // vector aggregates. 3500 if (VR_idx != Num_VR_Regs) { 3501 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3502 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3503 ++VR_idx; 3504 } else { 3505 if (CallConv == CallingConv::Fast) 3506 ComputeArgOffset(); 3507 3508 needsLoad = true; 3509 } 3510 if (CallConv != CallingConv::Fast || needsLoad) 3511 ArgOffset += 16; 3512 break; 3513 } // not QPX 3514 3515 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && 3516 "Invalid QPX parameter type"); 3517 /* fall through */ 3518 3519 case MVT::v4f64: 3520 case MVT::v4i1: 3521 // QPX vectors are treated like their scalar floating-point subregisters 3522 // (except that they're larger). 3523 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; 3524 if (QFPR_idx != Num_QFPR_Regs) { 3525 const TargetRegisterClass *RC; 3526 switch (ObjectVT.getSimpleVT().SimpleTy) { 3527 case MVT::v4f64: RC = &PPC::QFRCRegClass; break; 3528 case MVT::v4f32: RC = &PPC::QSRCRegClass; break; 3529 default: RC = &PPC::QBRCRegClass; break; 3530 } 3531 3532 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); 3533 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3534 ++QFPR_idx; 3535 } else { 3536 if (CallConv == CallingConv::Fast) 3537 ComputeArgOffset(); 3538 needsLoad = true; 3539 } 3540 if (CallConv != CallingConv::Fast || needsLoad) 3541 ArgOffset += Sz; 3542 break; 3543 } 3544 3545 // We need to load the argument to a virtual register if we determined 3546 // above that we ran out of physical registers of the appropriate type. 3547 if (needsLoad) { 3548 if (ObjSize < ArgSize && !isLittleEndian) 3549 CurArgOffset += ArgSize - ObjSize; 3550 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable); 3551 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3552 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 3553 } 3554 3555 InVals.push_back(ArgVal); 3556 } 3557 3558 // Area that is at least reserved in the caller of this function. 3559 unsigned MinReservedArea; 3560 if (HasParameterArea) 3561 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); 3562 else 3563 MinReservedArea = LinkageSize; 3564 3565 // Set the size that is at least reserved in caller of this function. Tail 3566 // call optimized functions' reserved stack space needs to be aligned so that 3567 // taking the difference between two stack areas will result in an aligned 3568 // stack. 3569 MinReservedArea = 3570 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3571 FuncInfo->setMinReservedArea(MinReservedArea); 3572 3573 // If the function takes variable number of arguments, make a frame index for 3574 // the start of the first vararg value... for expansion of llvm.va_start. 3575 if (isVarArg) { 3576 int Depth = ArgOffset; 3577 3578 FuncInfo->setVarArgsFrameIndex( 3579 MFI.CreateFixedObject(PtrByteSize, Depth, true)); 3580 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3581 3582 // If this function is vararg, store any remaining integer argument regs 3583 // to their spots on the stack so that they may be loaded by dereferencing 3584 // the result of va_next. 3585 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 3586 GPR_idx < Num_GPR_Regs; ++GPR_idx) { 3587 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3588 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3589 SDValue Store = 3590 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3591 MemOps.push_back(Store); 3592 // Increment the address by four for the next argument to store 3593 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); 3594 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3595 } 3596 } 3597 3598 if (!MemOps.empty()) 3599 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3600 3601 return Chain; 3602 } 3603 3604 SDValue PPCTargetLowering::LowerFormalArguments_Darwin( 3605 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 3606 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 3607 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 3608 // TODO: add description of PPC stack frame format, or at least some docs. 3609 // 3610 MachineFunction &MF = DAG.getMachineFunction(); 3611 MachineFrameInfo &MFI = MF.getFrameInfo(); 3612 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3613 3614 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3615 bool isPPC64 = PtrVT == MVT::i64; 3616 // Potential tail calls could cause overwriting of argument stack slots. 3617 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 3618 (CallConv == CallingConv::Fast)); 3619 unsigned PtrByteSize = isPPC64 ? 8 : 4; 3620 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 3621 unsigned ArgOffset = LinkageSize; 3622 // Area that is at least reserved in caller of this function. 3623 unsigned MinReservedArea = ArgOffset; 3624 3625 static const MCPhysReg GPR_32[] = { // 32-bit registers. 3626 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3627 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3628 }; 3629 static const MCPhysReg GPR_64[] = { // 64-bit registers. 3630 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3631 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3632 }; 3633 static const MCPhysReg VR[] = { 3634 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3635 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3636 }; 3637 3638 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 3639 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; 3640 const unsigned Num_VR_Regs = array_lengthof( VR); 3641 3642 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3643 3644 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 3645 3646 // In 32-bit non-varargs functions, the stack space for vectors is after the 3647 // stack space for non-vectors. We do not use this space unless we have 3648 // too many vectors to fit in registers, something that only occurs in 3649 // constructed examples:), but we have to walk the arglist to figure 3650 // that out...for the pathological case, compute VecArgOffset as the 3651 // start of the vector parameter area. Computing VecArgOffset is the 3652 // entire point of the following loop. 3653 unsigned VecArgOffset = ArgOffset; 3654 if (!isVarArg && !isPPC64) { 3655 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 3656 ++ArgNo) { 3657 EVT ObjectVT = Ins[ArgNo].VT; 3658 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3659 3660 if (Flags.isByVal()) { 3661 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 3662 unsigned ObjSize = Flags.getByValSize(); 3663 unsigned ArgSize = 3664 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3665 VecArgOffset += ArgSize; 3666 continue; 3667 } 3668 3669 switch(ObjectVT.getSimpleVT().SimpleTy) { 3670 default: llvm_unreachable("Unhandled argument type!"); 3671 case MVT::i1: 3672 case MVT::i32: 3673 case MVT::f32: 3674 VecArgOffset += 4; 3675 break; 3676 case MVT::i64: // PPC64 3677 case MVT::f64: 3678 // FIXME: We are guaranteed to be !isPPC64 at this point. 3679 // Does MVT::i64 apply? 3680 VecArgOffset += 8; 3681 break; 3682 case MVT::v4f32: 3683 case MVT::v4i32: 3684 case MVT::v8i16: 3685 case MVT::v16i8: 3686 // Nothing to do, we're only looking at Nonvector args here. 3687 break; 3688 } 3689 } 3690 } 3691 // We've found where the vector parameter area in memory is. Skip the 3692 // first 12 parameters; these don't use that memory. 3693 VecArgOffset = ((VecArgOffset+15)/16)*16; 3694 VecArgOffset += 12*16; 3695 3696 // Add DAG nodes to load the arguments or copy them out of registers. On 3697 // entry to a function on PPC, the arguments start after the linkage area, 3698 // although the first ones are often in registers. 3699 3700 SmallVector<SDValue, 8> MemOps; 3701 unsigned nAltivecParamsAtEnd = 0; 3702 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3703 unsigned CurArgIdx = 0; 3704 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3705 SDValue ArgVal; 3706 bool needsLoad = false; 3707 EVT ObjectVT = Ins[ArgNo].VT; 3708 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 3709 unsigned ArgSize = ObjSize; 3710 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3711 if (Ins[ArgNo].isOrigArg()) { 3712 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3713 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3714 } 3715 unsigned CurArgOffset = ArgOffset; 3716 3717 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 3718 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 3719 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 3720 if (isVarArg || isPPC64) { 3721 MinReservedArea = ((MinReservedArea+15)/16)*16; 3722 MinReservedArea += CalculateStackSlotSize(ObjectVT, 3723 Flags, 3724 PtrByteSize); 3725 } else nAltivecParamsAtEnd++; 3726 } else 3727 // Calculate min reserved area. 3728 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 3729 Flags, 3730 PtrByteSize); 3731 3732 // FIXME the codegen can be much improved in some cases. 3733 // We do not have to keep everything in memory. 3734 if (Flags.isByVal()) { 3735 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3736 3737 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3738 ObjSize = Flags.getByValSize(); 3739 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3740 // Objects of size 1 and 2 are right justified, everything else is 3741 // left justified. This means the memory address is adjusted forwards. 3742 if (ObjSize==1 || ObjSize==2) { 3743 CurArgOffset = CurArgOffset + (4 - ObjSize); 3744 } 3745 // The value of the object is its address. 3746 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true); 3747 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3748 InVals.push_back(FIN); 3749 if (ObjSize==1 || ObjSize==2) { 3750 if (GPR_idx != Num_GPR_Regs) { 3751 unsigned VReg; 3752 if (isPPC64) 3753 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3754 else 3755 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3756 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3757 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 3758 SDValue Store = 3759 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 3760 MachinePointerInfo(&*FuncArg), ObjType); 3761 MemOps.push_back(Store); 3762 ++GPR_idx; 3763 } 3764 3765 ArgOffset += PtrByteSize; 3766 3767 continue; 3768 } 3769 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3770 // Store whatever pieces of the object are in registers 3771 // to memory. ArgOffset will be the address of the beginning 3772 // of the object. 3773 if (GPR_idx != Num_GPR_Regs) { 3774 unsigned VReg; 3775 if (isPPC64) 3776 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3777 else 3778 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3779 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); 3780 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3781 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3782 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3783 MachinePointerInfo(&*FuncArg, j)); 3784 MemOps.push_back(Store); 3785 ++GPR_idx; 3786 ArgOffset += PtrByteSize; 3787 } else { 3788 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 3789 break; 3790 } 3791 } 3792 continue; 3793 } 3794 3795 switch (ObjectVT.getSimpleVT().SimpleTy) { 3796 default: llvm_unreachable("Unhandled argument type!"); 3797 case MVT::i1: 3798 case MVT::i32: 3799 if (!isPPC64) { 3800 if (GPR_idx != Num_GPR_Regs) { 3801 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3802 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 3803 3804 if (ObjectVT == MVT::i1) 3805 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 3806 3807 ++GPR_idx; 3808 } else { 3809 needsLoad = true; 3810 ArgSize = PtrByteSize; 3811 } 3812 // All int arguments reserve stack space in the Darwin ABI. 3813 ArgOffset += PtrByteSize; 3814 break; 3815 } 3816 LLVM_FALLTHROUGH; 3817 case MVT::i64: // PPC64 3818 if (GPR_idx != Num_GPR_Regs) { 3819 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3820 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3821 3822 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3823 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3824 // value to MVT::i64 and then truncate to the correct register size. 3825 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3826 3827 ++GPR_idx; 3828 } else { 3829 needsLoad = true; 3830 ArgSize = PtrByteSize; 3831 } 3832 // All int arguments reserve stack space in the Darwin ABI. 3833 ArgOffset += 8; 3834 break; 3835 3836 case MVT::f32: 3837 case MVT::f64: 3838 // Every 4 bytes of argument space consumes one of the GPRs available for 3839 // argument passing. 3840 if (GPR_idx != Num_GPR_Regs) { 3841 ++GPR_idx; 3842 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 3843 ++GPR_idx; 3844 } 3845 if (FPR_idx != Num_FPR_Regs) { 3846 unsigned VReg; 3847 3848 if (ObjectVT == MVT::f32) 3849 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 3850 else 3851 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 3852 3853 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3854 ++FPR_idx; 3855 } else { 3856 needsLoad = true; 3857 } 3858 3859 // All FP arguments reserve stack space in the Darwin ABI. 3860 ArgOffset += isPPC64 ? 8 : ObjSize; 3861 break; 3862 case MVT::v4f32: 3863 case MVT::v4i32: 3864 case MVT::v8i16: 3865 case MVT::v16i8: 3866 // Note that vector arguments in registers don't reserve stack space, 3867 // except in varargs functions. 3868 if (VR_idx != Num_VR_Regs) { 3869 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3870 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3871 if (isVarArg) { 3872 while ((ArgOffset % 16) != 0) { 3873 ArgOffset += PtrByteSize; 3874 if (GPR_idx != Num_GPR_Regs) 3875 GPR_idx++; 3876 } 3877 ArgOffset += 16; 3878 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 3879 } 3880 ++VR_idx; 3881 } else { 3882 if (!isVarArg && !isPPC64) { 3883 // Vectors go after all the nonvectors. 3884 CurArgOffset = VecArgOffset; 3885 VecArgOffset += 16; 3886 } else { 3887 // Vectors are aligned. 3888 ArgOffset = ((ArgOffset+15)/16)*16; 3889 CurArgOffset = ArgOffset; 3890 ArgOffset += 16; 3891 } 3892 needsLoad = true; 3893 } 3894 break; 3895 } 3896 3897 // We need to load the argument to a virtual register if we determined above 3898 // that we ran out of physical registers of the appropriate type. 3899 if (needsLoad) { 3900 int FI = MFI.CreateFixedObject(ObjSize, 3901 CurArgOffset + (ArgSize - ObjSize), 3902 isImmutable); 3903 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3904 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); 3905 } 3906 3907 InVals.push_back(ArgVal); 3908 } 3909 3910 // Allow for Altivec parameters at the end, if needed. 3911 if (nAltivecParamsAtEnd) { 3912 MinReservedArea = ((MinReservedArea+15)/16)*16; 3913 MinReservedArea += 16*nAltivecParamsAtEnd; 3914 } 3915 3916 // Area that is at least reserved in the caller of this function. 3917 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 3918 3919 // Set the size that is at least reserved in caller of this function. Tail 3920 // call optimized functions' reserved stack space needs to be aligned so that 3921 // taking the difference between two stack areas will result in an aligned 3922 // stack. 3923 MinReservedArea = 3924 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); 3925 FuncInfo->setMinReservedArea(MinReservedArea); 3926 3927 // If the function takes variable number of arguments, make a frame index for 3928 // the start of the first vararg value... for expansion of llvm.va_start. 3929 if (isVarArg) { 3930 int Depth = ArgOffset; 3931 3932 FuncInfo->setVarArgsFrameIndex( 3933 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, 3934 Depth, true)); 3935 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3936 3937 // If this function is vararg, store any remaining integer argument regs 3938 // to their spots on the stack so that they may be loaded by dereferencing 3939 // the result of va_next. 3940 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 3941 unsigned VReg; 3942 3943 if (isPPC64) 3944 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3945 else 3946 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3947 3948 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3949 SDValue Store = 3950 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); 3951 MemOps.push_back(Store); 3952 // Increment the address by four for the next argument to store 3953 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); 3954 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3955 } 3956 } 3957 3958 if (!MemOps.empty()) 3959 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3960 3961 return Chain; 3962 } 3963 3964 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 3965 /// adjusted to accommodate the arguments for the tailcall. 3966 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 3967 unsigned ParamSize) { 3968 3969 if (!isTailCall) return 0; 3970 3971 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 3972 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 3973 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 3974 // Remember only if the new adjustement is bigger. 3975 if (SPDiff < FI->getTailCallSPDelta()) 3976 FI->setTailCallSPDelta(SPDiff); 3977 3978 return SPDiff; 3979 } 3980 3981 static bool isFunctionGlobalAddress(SDValue Callee); 3982 3983 static bool 3984 resideInSameSection(const Function *Caller, SDValue Callee, 3985 const TargetMachine &TM) { 3986 // If !G, Callee can be an external symbol. 3987 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 3988 if (!G) 3989 return false; 3990 3991 const GlobalValue *GV = G->getGlobal(); 3992 if (!GV->isStrongDefinitionForLinker()) 3993 return false; 3994 3995 // Any explicitly-specified sections and section prefixes must also match. 3996 // Also, if we're using -ffunction-sections, then each function is always in 3997 // a different section (the same is true for COMDAT functions). 3998 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() || 3999 GV->getSection() != Caller->getSection()) 4000 return false; 4001 if (const auto *F = dyn_cast<Function>(GV)) { 4002 if (F->getSectionPrefix() != Caller->getSectionPrefix()) 4003 return false; 4004 } 4005 4006 // If the callee might be interposed, then we can't assume the ultimate call 4007 // target will be in the same section. Even in cases where we can assume that 4008 // interposition won't happen, in any case where the linker might insert a 4009 // stub to allow for interposition, we must generate code as though 4010 // interposition might occur. To understand why this matters, consider a 4011 // situation where: a -> b -> c where the arrows indicate calls. b and c are 4012 // in the same section, but a is in a different module (i.e. has a different 4013 // TOC base pointer). If the linker allows for interposition between b and c, 4014 // then it will generate a stub for the call edge between b and c which will 4015 // save the TOC pointer into the designated stack slot allocated by b. If we 4016 // return true here, and therefore allow a tail call between b and c, that 4017 // stack slot won't exist and the b -> c stub will end up saving b'c TOC base 4018 // pointer into the stack slot allocated by a (where the a -> b stub saved 4019 // a's TOC base pointer). If we're not considering a tail call, but rather, 4020 // whether a nop is needed after the call instruction in b, because the linker 4021 // will insert a stub, it might complain about a missing nop if we omit it 4022 // (although many don't complain in this case). 4023 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV)) 4024 return false; 4025 4026 return true; 4027 } 4028 4029 static bool 4030 needStackSlotPassParameters(const PPCSubtarget &Subtarget, 4031 const SmallVectorImpl<ISD::OutputArg> &Outs) { 4032 assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); 4033 4034 const unsigned PtrByteSize = 8; 4035 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 4036 4037 static const MCPhysReg GPR[] = { 4038 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4039 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4040 }; 4041 static const MCPhysReg VR[] = { 4042 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4043 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4044 }; 4045 4046 const unsigned NumGPRs = array_lengthof(GPR); 4047 const unsigned NumFPRs = 13; 4048 const unsigned NumVRs = array_lengthof(VR); 4049 const unsigned ParamAreaSize = NumGPRs * PtrByteSize; 4050 4051 unsigned NumBytes = LinkageSize; 4052 unsigned AvailableFPRs = NumFPRs; 4053 unsigned AvailableVRs = NumVRs; 4054 4055 for (const ISD::OutputArg& Param : Outs) { 4056 if (Param.Flags.isNest()) continue; 4057 4058 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, 4059 PtrByteSize, LinkageSize, ParamAreaSize, 4060 NumBytes, AvailableFPRs, AvailableVRs, 4061 Subtarget.hasQPX())) 4062 return true; 4063 } 4064 return false; 4065 } 4066 4067 static bool 4068 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) { 4069 if (CS->arg_size() != CallerFn->getArgumentList().size()) 4070 return false; 4071 4072 ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin(); 4073 ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end(); 4074 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); 4075 4076 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { 4077 const Value* CalleeArg = *CalleeArgIter; 4078 const Value* CallerArg = &(*CallerArgIter); 4079 if (CalleeArg == CallerArg) 4080 continue; 4081 4082 // e.g. @caller([4 x i64] %a, [4 x i64] %b) { 4083 // tail call @callee([4 x i64] undef, [4 x i64] %b) 4084 // } 4085 // 1st argument of callee is undef and has the same type as caller. 4086 if (CalleeArg->getType() == CallerArg->getType() && 4087 isa<UndefValue>(CalleeArg)) 4088 continue; 4089 4090 return false; 4091 } 4092 4093 return true; 4094 } 4095 4096 bool 4097 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( 4098 SDValue Callee, 4099 CallingConv::ID CalleeCC, 4100 ImmutableCallSite *CS, 4101 bool isVarArg, 4102 const SmallVectorImpl<ISD::OutputArg> &Outs, 4103 const SmallVectorImpl<ISD::InputArg> &Ins, 4104 SelectionDAG& DAG) const { 4105 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; 4106 4107 if (DisableSCO && !TailCallOpt) return false; 4108 4109 // Variadic argument functions are not supported. 4110 if (isVarArg) return false; 4111 4112 MachineFunction &MF = DAG.getMachineFunction(); 4113 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 4114 4115 // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has 4116 // the same calling convention 4117 if (CallerCC != CalleeCC) return false; 4118 4119 // SCO support C calling convention 4120 if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C) 4121 return false; 4122 4123 // Caller contains any byval parameter is not supported. 4124 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); })) 4125 return false; 4126 4127 // Callee contains any byval parameter is not supported, too. 4128 // Note: This is a quick work around, because in some cases, e.g. 4129 // caller's stack size > callee's stack size, we are still able to apply 4130 // sibling call optimization. See: https://reviews.llvm.org/D23441#513574 4131 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) 4132 return false; 4133 4134 // No TCO/SCO on indirect call because Caller have to restore its TOC 4135 if (!isFunctionGlobalAddress(Callee) && 4136 !isa<ExternalSymbolSDNode>(Callee)) 4137 return false; 4138 4139 // Check if Callee resides in the same section, because for now, PPC64 SVR4 4140 // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another 4141 // section. 4142 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 4143 if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine())) 4144 return false; 4145 4146 // TCO allows altering callee ABI, so we don't have to check further. 4147 if (CalleeCC == CallingConv::Fast && TailCallOpt) 4148 return true; 4149 4150 if (DisableSCO) return false; 4151 4152 // If callee use the same argument list that caller is using, then we can 4153 // apply SCO on this case. If it is not, then we need to check if callee needs 4154 // stack for passing arguments. 4155 if (!hasSameArgumentList(MF.getFunction(), CS) && 4156 needStackSlotPassParameters(Subtarget, Outs)) { 4157 return false; 4158 } 4159 4160 return true; 4161 } 4162 4163 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 4164 /// for tail call optimization. Targets which want to do tail call 4165 /// optimization should implement this function. 4166 bool 4167 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 4168 CallingConv::ID CalleeCC, 4169 bool isVarArg, 4170 const SmallVectorImpl<ISD::InputArg> &Ins, 4171 SelectionDAG& DAG) const { 4172 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4173 return false; 4174 4175 // Variable argument functions are not supported. 4176 if (isVarArg) 4177 return false; 4178 4179 MachineFunction &MF = DAG.getMachineFunction(); 4180 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 4181 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 4182 // Functions containing by val parameters are not supported. 4183 for (unsigned i = 0; i != Ins.size(); i++) { 4184 ISD::ArgFlagsTy Flags = Ins[i].Flags; 4185 if (Flags.isByVal()) return false; 4186 } 4187 4188 // Non-PIC/GOT tail calls are supported. 4189 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 4190 return true; 4191 4192 // At the moment we can only do local tail calls (in same module, hidden 4193 // or protected) if we are generating PIC. 4194 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4195 return G->getGlobal()->hasHiddenVisibility() 4196 || G->getGlobal()->hasProtectedVisibility(); 4197 } 4198 4199 return false; 4200 } 4201 4202 /// isCallCompatibleAddress - Return the immediate to use if the specified 4203 /// 32-bit value is representable in the immediate field of a BxA instruction. 4204 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 4205 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 4206 if (!C) return nullptr; 4207 4208 int Addr = C->getZExtValue(); 4209 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 4210 SignExtend32<26>(Addr) != Addr) 4211 return nullptr; // Top 6 bits have to be sext of immediate. 4212 4213 return DAG 4214 .getConstant( 4215 (int)C->getZExtValue() >> 2, SDLoc(Op), 4216 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) 4217 .getNode(); 4218 } 4219 4220 namespace { 4221 4222 struct TailCallArgumentInfo { 4223 SDValue Arg; 4224 SDValue FrameIdxOp; 4225 int FrameIdx; 4226 4227 TailCallArgumentInfo() : FrameIdx(0) {} 4228 }; 4229 } 4230 4231 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 4232 static void StoreTailCallArgumentsToStackSlot( 4233 SelectionDAG &DAG, SDValue Chain, 4234 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 4235 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) { 4236 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 4237 SDValue Arg = TailCallArgs[i].Arg; 4238 SDValue FIN = TailCallArgs[i].FrameIdxOp; 4239 int FI = TailCallArgs[i].FrameIdx; 4240 // Store relative to framepointer. 4241 MemOpChains.push_back(DAG.getStore( 4242 Chain, dl, Arg, FIN, 4243 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); 4244 } 4245 } 4246 4247 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 4248 /// the appropriate stack slot for the tail call optimized function call. 4249 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, 4250 SDValue OldRetAddr, SDValue OldFP, 4251 int SPDiff, const SDLoc &dl) { 4252 if (SPDiff) { 4253 // Calculate the new stack slot for the return address. 4254 MachineFunction &MF = DAG.getMachineFunction(); 4255 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); 4256 const PPCFrameLowering *FL = Subtarget.getFrameLowering(); 4257 bool isPPC64 = Subtarget.isPPC64(); 4258 int SlotSize = isPPC64 ? 8 : 4; 4259 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); 4260 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize, 4261 NewRetAddrLoc, true); 4262 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4263 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 4264 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 4265 MachinePointerInfo::getFixedStack(MF, NewRetAddr)); 4266 4267 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 4268 // slot as the FP is never overwritten. 4269 if (Subtarget.isDarwinABI()) { 4270 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); 4271 int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc, 4272 true); 4273 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 4274 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 4275 MachinePointerInfo::getFixedStack( 4276 DAG.getMachineFunction(), NewFPIdx)); 4277 } 4278 } 4279 return Chain; 4280 } 4281 4282 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 4283 /// the position of the argument. 4284 static void 4285 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 4286 SDValue Arg, int SPDiff, unsigned ArgOffset, 4287 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 4288 int Offset = ArgOffset + SPDiff; 4289 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8; 4290 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); 4291 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 4292 SDValue FIN = DAG.getFrameIndex(FI, VT); 4293 TailCallArgumentInfo Info; 4294 Info.Arg = Arg; 4295 Info.FrameIdxOp = FIN; 4296 Info.FrameIdx = FI; 4297 TailCallArguments.push_back(Info); 4298 } 4299 4300 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 4301 /// stack slot. Returns the chain as result and the loaded frame pointers in 4302 /// LROpOut/FPOpout. Used when tail calling. 4303 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( 4304 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, 4305 SDValue &FPOpOut, const SDLoc &dl) const { 4306 if (SPDiff) { 4307 // Load the LR and FP stack slot for later adjusting. 4308 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 4309 LROpOut = getReturnAddrFrameIndex(DAG); 4310 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); 4311 Chain = SDValue(LROpOut.getNode(), 1); 4312 4313 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 4314 // slot as the FP is never overwritten. 4315 if (Subtarget.isDarwinABI()) { 4316 FPOpOut = getFramePointerFrameIndex(DAG); 4317 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo()); 4318 Chain = SDValue(FPOpOut.getNode(), 1); 4319 } 4320 } 4321 return Chain; 4322 } 4323 4324 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 4325 /// by "Src" to address "Dst" of size "Size". Alignment information is 4326 /// specified by the specific parameter attribute. The copy will be passed as 4327 /// a byval function parameter. 4328 /// Sometimes what we are copying is the end of a larger object, the part that 4329 /// does not fit in registers. 4330 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, 4331 SDValue Chain, ISD::ArgFlagsTy Flags, 4332 SelectionDAG &DAG, const SDLoc &dl) { 4333 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); 4334 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 4335 false, false, false, MachinePointerInfo(), 4336 MachinePointerInfo()); 4337 } 4338 4339 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 4340 /// tail calls. 4341 static void LowerMemOpCallTo( 4342 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, 4343 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, 4344 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 4345 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) { 4346 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4347 if (!isTailCall) { 4348 if (isVector) { 4349 SDValue StackPtr; 4350 if (isPPC64) 4351 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4352 else 4353 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4354 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4355 DAG.getConstant(ArgOffset, dl, PtrVT)); 4356 } 4357 MemOpChains.push_back( 4358 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 4359 // Calculate and remember argument location. 4360 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 4361 TailCallArguments); 4362 } 4363 4364 static void 4365 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 4366 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, 4367 SDValue FPOp, 4368 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 4369 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 4370 // might overwrite each other in case of tail call optimization. 4371 SmallVector<SDValue, 8> MemOpChains2; 4372 // Do not flag preceding copytoreg stuff together with the following stuff. 4373 InFlag = SDValue(); 4374 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 4375 MemOpChains2, dl); 4376 if (!MemOpChains2.empty()) 4377 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 4378 4379 // Store the return address to the appropriate stack slot. 4380 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); 4381 4382 // Emit callseq_end just before tailcall node. 4383 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4384 DAG.getIntPtrConstant(0, dl, true), InFlag, dl); 4385 InFlag = Chain.getValue(1); 4386 } 4387 4388 // Is this global address that of a function that can be called by name? (as 4389 // opposed to something that must hold a descriptor for an indirect call). 4390 static bool isFunctionGlobalAddress(SDValue Callee) { 4391 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 4392 if (Callee.getOpcode() == ISD::GlobalTLSAddress || 4393 Callee.getOpcode() == ISD::TargetGlobalTLSAddress) 4394 return false; 4395 4396 return G->getGlobal()->getValueType()->isFunctionTy(); 4397 } 4398 4399 return false; 4400 } 4401 4402 static unsigned 4403 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, 4404 SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, 4405 bool isPatchPoint, bool hasNest, 4406 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, 4407 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 4408 ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { 4409 4410 bool isPPC64 = Subtarget.isPPC64(); 4411 bool isSVR4ABI = Subtarget.isSVR4ABI(); 4412 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4413 4414 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 4415 NodeTys.push_back(MVT::Other); // Returns a chain 4416 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 4417 4418 unsigned CallOpc = PPCISD::CALL; 4419 4420 bool needIndirectCall = true; 4421 if (!isSVR4ABI || !isPPC64) 4422 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 4423 // If this is an absolute destination address, use the munged value. 4424 Callee = SDValue(Dest, 0); 4425 needIndirectCall = false; 4426 } 4427 4428 // PC-relative references to external symbols should go through $stub, unless 4429 // we're building with the leopard linker or later, which automatically 4430 // synthesizes these stubs. 4431 const TargetMachine &TM = DAG.getTarget(); 4432 const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); 4433 const GlobalValue *GV = nullptr; 4434 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) 4435 GV = G->getGlobal(); 4436 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); 4437 bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; 4438 4439 if (isFunctionGlobalAddress(Callee)) { 4440 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); 4441 // A call to a TLS address is actually an indirect call to a 4442 // thread-specific pointer. 4443 unsigned OpFlags = 0; 4444 if (UsePlt) 4445 OpFlags = PPCII::MO_PLT; 4446 4447 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 4448 // every direct call is) turn it into a TargetGlobalAddress / 4449 // TargetExternalSymbol node so that legalize doesn't hack it. 4450 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 4451 Callee.getValueType(), 0, OpFlags); 4452 needIndirectCall = false; 4453 } 4454 4455 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 4456 unsigned char OpFlags = 0; 4457 4458 if (UsePlt) 4459 OpFlags = PPCII::MO_PLT; 4460 4461 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 4462 OpFlags); 4463 needIndirectCall = false; 4464 } 4465 4466 if (isPatchPoint) { 4467 // We'll form an invalid direct call when lowering a patchpoint; the full 4468 // sequence for an indirect call is complicated, and many of the 4469 // instructions introduced might have side effects (and, thus, can't be 4470 // removed later). The call itself will be removed as soon as the 4471 // argument/return lowering is complete, so the fact that it has the wrong 4472 // kind of operands should not really matter. 4473 needIndirectCall = false; 4474 } 4475 4476 if (needIndirectCall) { 4477 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 4478 // to do the call, we can't use PPCISD::CALL. 4479 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 4480 4481 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 4482 // Function pointers in the 64-bit SVR4 ABI do not point to the function 4483 // entry point, but to the function descriptor (the function entry point 4484 // address is part of the function descriptor though). 4485 // The function descriptor is a three doubleword structure with the 4486 // following fields: function entry point, TOC base address and 4487 // environment pointer. 4488 // Thus for a call through a function pointer, the following actions need 4489 // to be performed: 4490 // 1. Save the TOC of the caller in the TOC save area of its stack 4491 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 4492 // 2. Load the address of the function entry point from the function 4493 // descriptor. 4494 // 3. Load the TOC of the callee from the function descriptor into r2. 4495 // 4. Load the environment pointer from the function descriptor into 4496 // r11. 4497 // 5. Branch to the function entry point address. 4498 // 6. On return of the callee, the TOC of the caller needs to be 4499 // restored (this is done in FinishCall()). 4500 // 4501 // The loads are scheduled at the beginning of the call sequence, and the 4502 // register copies are flagged together to ensure that no other 4503 // operations can be scheduled in between. E.g. without flagging the 4504 // copies together, a TOC access in the caller could be scheduled between 4505 // the assignment of the callee TOC and the branch to the callee, which 4506 // results in the TOC access going through the TOC of the callee instead 4507 // of going through the TOC of the caller, which leads to incorrect code. 4508 4509 // Load the address of the function entry point from the function 4510 // descriptor. 4511 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); 4512 if (LDChain.getValueType() == MVT::Glue) 4513 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); 4514 4515 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() 4516 ? (MachineMemOperand::MODereferenceable | 4517 MachineMemOperand::MOInvariant) 4518 : MachineMemOperand::MONone; 4519 4520 MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr); 4521 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, 4522 /* Alignment = */ 8, MMOFlags); 4523 4524 // Load environment pointer into r11. 4525 SDValue PtrOff = DAG.getIntPtrConstant(16, dl); 4526 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 4527 SDValue LoadEnvPtr = 4528 DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), 4529 /* Alignment = */ 8, MMOFlags); 4530 4531 SDValue TOCOff = DAG.getIntPtrConstant(8, dl); 4532 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 4533 SDValue TOCPtr = 4534 DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), 4535 /* Alignment = */ 8, MMOFlags); 4536 4537 setUsesTOCBasePtr(DAG); 4538 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, 4539 InFlag); 4540 Chain = TOCVal.getValue(0); 4541 InFlag = TOCVal.getValue(1); 4542 4543 // If the function call has an explicit 'nest' parameter, it takes the 4544 // place of the environment pointer. 4545 if (!hasNest) { 4546 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 4547 InFlag); 4548 4549 Chain = EnvVal.getValue(0); 4550 InFlag = EnvVal.getValue(1); 4551 } 4552 4553 MTCTROps[0] = Chain; 4554 MTCTROps[1] = LoadFuncPtr; 4555 MTCTROps[2] = InFlag; 4556 } 4557 4558 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 4559 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 4560 InFlag = Chain.getValue(1); 4561 4562 NodeTys.clear(); 4563 NodeTys.push_back(MVT::Other); 4564 NodeTys.push_back(MVT::Glue); 4565 Ops.push_back(Chain); 4566 CallOpc = PPCISD::BCTRL; 4567 Callee.setNode(nullptr); 4568 // Add use of X11 (holding environment pointer) 4569 if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) 4570 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 4571 // Add CTR register as callee so a bctr can be emitted later. 4572 if (isTailCall) 4573 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 4574 } 4575 4576 // If this is a direct call, pass the chain and the callee. 4577 if (Callee.getNode()) { 4578 Ops.push_back(Chain); 4579 Ops.push_back(Callee); 4580 } 4581 // If this is a tail call add stack pointer delta. 4582 if (isTailCall) 4583 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); 4584 4585 // Add argument registers to the end of the list so that they are known live 4586 // into the call. 4587 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 4588 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 4589 RegsToPass[i].second.getValueType())); 4590 4591 // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live 4592 // into the call. 4593 if (isSVR4ABI && isPPC64 && !isPatchPoint) { 4594 setUsesTOCBasePtr(DAG); 4595 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); 4596 } 4597 4598 return CallOpc; 4599 } 4600 4601 SDValue PPCTargetLowering::LowerCallResult( 4602 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, 4603 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 4604 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 4605 4606 SmallVector<CCValAssign, 16> RVLocs; 4607 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 4608 *DAG.getContext()); 4609 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 4610 4611 // Copy all of the result registers out of their specified physreg. 4612 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 4613 CCValAssign &VA = RVLocs[i]; 4614 assert(VA.isRegLoc() && "Can only return in registers!"); 4615 4616 SDValue Val = DAG.getCopyFromReg(Chain, dl, 4617 VA.getLocReg(), VA.getLocVT(), InFlag); 4618 Chain = Val.getValue(1); 4619 InFlag = Val.getValue(2); 4620 4621 switch (VA.getLocInfo()) { 4622 default: llvm_unreachable("Unknown loc info!"); 4623 case CCValAssign::Full: break; 4624 case CCValAssign::AExt: 4625 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4626 break; 4627 case CCValAssign::ZExt: 4628 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 4629 DAG.getValueType(VA.getValVT())); 4630 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4631 break; 4632 case CCValAssign::SExt: 4633 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 4634 DAG.getValueType(VA.getValVT())); 4635 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 4636 break; 4637 } 4638 4639 InVals.push_back(Val); 4640 } 4641 4642 return Chain; 4643 } 4644 4645 SDValue PPCTargetLowering::FinishCall( 4646 CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, 4647 bool isPatchPoint, bool hasNest, SelectionDAG &DAG, 4648 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, 4649 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, 4650 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, 4651 SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const { 4652 4653 std::vector<EVT> NodeTys; 4654 SmallVector<SDValue, 8> Ops; 4655 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, 4656 SPDiff, isTailCall, isPatchPoint, hasNest, 4657 RegsToPass, Ops, NodeTys, CS, Subtarget); 4658 4659 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 4660 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 4661 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 4662 4663 // When performing tail call optimization the callee pops its arguments off 4664 // the stack. Account for this here so these bytes can be pushed back on in 4665 // PPCFrameLowering::eliminateCallFramePseudoInstr. 4666 int BytesCalleePops = 4667 (CallConv == CallingConv::Fast && 4668 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 4669 4670 // Add a register mask operand representing the call-preserved registers. 4671 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4672 const uint32_t *Mask = 4673 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); 4674 assert(Mask && "Missing call preserved mask for calling convention"); 4675 Ops.push_back(DAG.getRegisterMask(Mask)); 4676 4677 if (InFlag.getNode()) 4678 Ops.push_back(InFlag); 4679 4680 // Emit tail call. 4681 if (isTailCall) { 4682 assert(((Callee.getOpcode() == ISD::Register && 4683 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 4684 Callee.getOpcode() == ISD::TargetExternalSymbol || 4685 Callee.getOpcode() == ISD::TargetGlobalAddress || 4686 isa<ConstantSDNode>(Callee)) && 4687 "Expecting an global address, external symbol, absolute value or register"); 4688 4689 DAG.getMachineFunction().getFrameInfo().setHasTailCall(); 4690 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 4691 } 4692 4693 // Add a NOP immediately after the branch instruction when using the 64-bit 4694 // SVR4 ABI. At link time, if caller and callee are in a different module and 4695 // thus have a different TOC, the call will be replaced with a call to a stub 4696 // function which saves the current TOC, loads the TOC of the callee and 4697 // branches to the callee. The NOP will be replaced with a load instruction 4698 // which restores the TOC of the caller from the TOC save slot of the current 4699 // stack frame. If caller and callee belong to the same module (and have the 4700 // same TOC), the NOP will remain unchanged. 4701 4702 MachineFunction &MF = DAG.getMachineFunction(); 4703 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && 4704 !isPatchPoint) { 4705 if (CallOpc == PPCISD::BCTRL) { 4706 // This is a call through a function pointer. 4707 // Restore the caller TOC from the save area into R2. 4708 // See PrepareCall() for more information about calls through function 4709 // pointers in the 64-bit SVR4 ABI. 4710 // We are using a target-specific load with r2 hard coded, because the 4711 // result of a target-independent load would never go directly into r2, 4712 // since r2 is a reserved register (which prevents the register allocator 4713 // from allocating it), resulting in an additional register being 4714 // allocated and an unnecessary move instruction being generated. 4715 CallOpc = PPCISD::BCTRL_LOAD_TOC; 4716 4717 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4718 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 4719 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 4720 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 4721 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 4722 4723 // The address needs to go after the chain input but before the flag (or 4724 // any other variadic arguments). 4725 Ops.insert(std::next(Ops.begin()), AddTOC); 4726 } else if (CallOpc == PPCISD::CALL && 4727 !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) { 4728 // Otherwise insert NOP for non-local calls. 4729 CallOpc = PPCISD::CALL_NOP; 4730 } 4731 } 4732 4733 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 4734 InFlag = Chain.getValue(1); 4735 4736 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4737 DAG.getIntPtrConstant(BytesCalleePops, dl, true), 4738 InFlag, dl); 4739 if (!Ins.empty()) 4740 InFlag = Chain.getValue(1); 4741 4742 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 4743 Ins, dl, DAG, InVals); 4744 } 4745 4746 SDValue 4747 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 4748 SmallVectorImpl<SDValue> &InVals) const { 4749 SelectionDAG &DAG = CLI.DAG; 4750 SDLoc &dl = CLI.DL; 4751 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 4752 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 4753 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 4754 SDValue Chain = CLI.Chain; 4755 SDValue Callee = CLI.Callee; 4756 bool &isTailCall = CLI.IsTailCall; 4757 CallingConv::ID CallConv = CLI.CallConv; 4758 bool isVarArg = CLI.IsVarArg; 4759 bool isPatchPoint = CLI.IsPatchPoint; 4760 ImmutableCallSite *CS = CLI.CS; 4761 4762 if (isTailCall) { 4763 if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall())) 4764 isTailCall = false; 4765 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) 4766 isTailCall = 4767 IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, 4768 isVarArg, Outs, Ins, DAG); 4769 else 4770 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 4771 Ins, DAG); 4772 if (isTailCall) { 4773 ++NumTailCalls; 4774 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 4775 ++NumSiblingCalls; 4776 4777 assert(isa<GlobalAddressSDNode>(Callee) && 4778 "Callee should be an llvm::Function object."); 4779 DEBUG( 4780 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); 4781 const unsigned Width = 80 - strlen("TCO caller: ") 4782 - strlen(", callee linkage: 0, 0"); 4783 dbgs() << "TCO caller: " 4784 << left_justify(DAG.getMachineFunction().getName(), Width) 4785 << ", callee linkage: " 4786 << GV->getVisibility() << ", " << GV->getLinkage() << "\n" 4787 ); 4788 } 4789 } 4790 4791 if (!isTailCall && CS && CS->isMustTailCall()) 4792 report_fatal_error("failed to perform tail call elimination on a call " 4793 "site marked musttail"); 4794 4795 // When long calls (i.e. indirect calls) are always used, calls are always 4796 // made via function pointer. If we have a function name, first translate it 4797 // into a pointer. 4798 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) && 4799 !isTailCall) 4800 Callee = LowerGlobalAddress(Callee, DAG); 4801 4802 if (Subtarget.isSVR4ABI()) { 4803 if (Subtarget.isPPC64()) 4804 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 4805 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4806 dl, DAG, InVals, CS); 4807 else 4808 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 4809 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4810 dl, DAG, InVals, CS); 4811 } 4812 4813 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 4814 isTailCall, isPatchPoint, Outs, OutVals, Ins, 4815 dl, DAG, InVals, CS); 4816 } 4817 4818 SDValue PPCTargetLowering::LowerCall_32SVR4( 4819 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 4820 bool isTailCall, bool isPatchPoint, 4821 const SmallVectorImpl<ISD::OutputArg> &Outs, 4822 const SmallVectorImpl<SDValue> &OutVals, 4823 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 4824 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 4825 ImmutableCallSite *CS) const { 4826 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 4827 // of the 32-bit SVR4 ABI stack frame layout. 4828 4829 assert((CallConv == CallingConv::C || 4830 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 4831 4832 unsigned PtrByteSize = 4; 4833 4834 MachineFunction &MF = DAG.getMachineFunction(); 4835 4836 // Mark this function as potentially containing a function that contains a 4837 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4838 // and restoring the callers stack pointer in this functions epilog. This is 4839 // done because by tail calling the called function might overwrite the value 4840 // in this function's (MF) stack pointer stack slot 0(SP). 4841 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4842 CallConv == CallingConv::Fast) 4843 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4844 4845 // Count how many bytes are to be pushed on the stack, including the linkage 4846 // area, parameter list area and the part of the local variable space which 4847 // contains copies of aggregates which are passed by value. 4848 4849 // Assign locations to all of the outgoing arguments. 4850 SmallVector<CCValAssign, 16> ArgLocs; 4851 PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); 4852 4853 // Reserve space for the linkage area on the stack. 4854 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), 4855 PtrByteSize); 4856 if (useSoftFloat()) 4857 CCInfo.PreAnalyzeCallOperands(Outs); 4858 4859 if (isVarArg) { 4860 // Handle fixed and variable vector arguments differently. 4861 // Fixed vector arguments go into registers as long as registers are 4862 // available. Variable vector arguments always go into memory. 4863 unsigned NumArgs = Outs.size(); 4864 4865 for (unsigned i = 0; i != NumArgs; ++i) { 4866 MVT ArgVT = Outs[i].VT; 4867 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4868 bool Result; 4869 4870 if (Outs[i].IsFixed) { 4871 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 4872 CCInfo); 4873 } else { 4874 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 4875 ArgFlags, CCInfo); 4876 } 4877 4878 if (Result) { 4879 #ifndef NDEBUG 4880 errs() << "Call operand #" << i << " has unhandled type " 4881 << EVT(ArgVT).getEVTString() << "\n"; 4882 #endif 4883 llvm_unreachable(nullptr); 4884 } 4885 } 4886 } else { 4887 // All arguments are treated the same. 4888 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 4889 } 4890 CCInfo.clearWasPPCF128(); 4891 4892 // Assign locations to all of the outgoing aggregate by value arguments. 4893 SmallVector<CCValAssign, 16> ByValArgLocs; 4894 CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext()); 4895 4896 // Reserve stack space for the allocations in CCInfo. 4897 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 4898 4899 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 4900 4901 // Size of the linkage area, parameter list area and the part of the local 4902 // space variable where copies of aggregates which are passed by value are 4903 // stored. 4904 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 4905 4906 // Calculate by how many bytes the stack has to be adjusted in case of tail 4907 // call optimization. 4908 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4909 4910 // Adjust the stack pointer for the new arguments... 4911 // These operations are automatically eliminated by the prolog/epilog pass 4912 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 4913 dl); 4914 SDValue CallSeqStart = Chain; 4915 4916 // Load the return address and frame pointer so it can be moved somewhere else 4917 // later. 4918 SDValue LROp, FPOp; 4919 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 4920 4921 // Set up a copy of the stack pointer for use loading and storing any 4922 // arguments that may not fit in the registers available for argument 4923 // passing. 4924 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4925 4926 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4927 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4928 SmallVector<SDValue, 8> MemOpChains; 4929 4930 bool seenFloatArg = false; 4931 // Walk the register/memloc assignments, inserting copies/loads. 4932 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 4933 i != e; 4934 ++i) { 4935 CCValAssign &VA = ArgLocs[i]; 4936 SDValue Arg = OutVals[i]; 4937 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4938 4939 if (Flags.isByVal()) { 4940 // Argument is an aggregate which is passed by value, thus we need to 4941 // create a copy of it in the local variable space of the current stack 4942 // frame (which is the stack frame of the caller) and pass the address of 4943 // this copy to the callee. 4944 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 4945 CCValAssign &ByValVA = ByValArgLocs[j++]; 4946 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 4947 4948 // Memory reserved in the local variable space of the callers stack frame. 4949 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 4950 4951 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 4952 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 4953 StackPtr, PtrOff); 4954 4955 // Create a copy of the argument in the local area of the current 4956 // stack frame. 4957 SDValue MemcpyCall = 4958 CreateCopyOfByValArgument(Arg, PtrOff, 4959 CallSeqStart.getNode()->getOperand(0), 4960 Flags, DAG, dl); 4961 4962 // This must go outside the CALLSEQ_START..END. 4963 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4964 CallSeqStart.getNode()->getOperand(1), 4965 SDLoc(MemcpyCall)); 4966 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4967 NewCallSeqStart.getNode()); 4968 Chain = CallSeqStart = NewCallSeqStart; 4969 4970 // Pass the address of the aggregate copy on the stack either in a 4971 // physical register or in the parameter list area of the current stack 4972 // frame to the callee. 4973 Arg = PtrOff; 4974 } 4975 4976 if (VA.isRegLoc()) { 4977 if (Arg.getValueType() == MVT::i1) 4978 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); 4979 4980 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 4981 // Put argument in a physical register. 4982 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 4983 } else { 4984 // Put argument in the parameter list area of the current stack frame. 4985 assert(VA.isMemLoc()); 4986 unsigned LocMemOffset = VA.getLocMemOffset(); 4987 4988 if (!isTailCall) { 4989 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 4990 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), 4991 StackPtr, PtrOff); 4992 4993 MemOpChains.push_back( 4994 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); 4995 } else { 4996 // Calculate and remember argument location. 4997 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 4998 TailCallArguments); 4999 } 5000 } 5001 } 5002 5003 if (!MemOpChains.empty()) 5004 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5005 5006 // Build a sequence of copy-to-reg nodes chained together with token chain 5007 // and flag operands which copy the outgoing args into the appropriate regs. 5008 SDValue InFlag; 5009 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5010 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5011 RegsToPass[i].second, InFlag); 5012 InFlag = Chain.getValue(1); 5013 } 5014 5015 // Set CR bit 6 to true if this is a vararg call with floating args passed in 5016 // registers. 5017 if (isVarArg) { 5018 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 5019 SDValue Ops[] = { Chain, InFlag }; 5020 5021 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 5022 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 5023 5024 InFlag = Chain.getValue(1); 5025 } 5026 5027 if (isTailCall) 5028 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 5029 TailCallArguments); 5030 5031 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 5032 /* unused except on PPC64 ELFv1 */ false, DAG, 5033 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 5034 NumBytes, Ins, InVals, CS); 5035 } 5036 5037 // Copy an argument into memory, being careful to do this outside the 5038 // call sequence for the call to which the argument belongs. 5039 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( 5040 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, 5041 SelectionDAG &DAG, const SDLoc &dl) const { 5042 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 5043 CallSeqStart.getNode()->getOperand(0), 5044 Flags, DAG, dl); 5045 // The MEMCPY must go outside the CALLSEQ_START..END. 5046 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 5047 CallSeqStart.getNode()->getOperand(1), 5048 SDLoc(MemcpyCall)); 5049 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 5050 NewCallSeqStart.getNode()); 5051 return NewCallSeqStart; 5052 } 5053 5054 SDValue PPCTargetLowering::LowerCall_64SVR4( 5055 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 5056 bool isTailCall, bool isPatchPoint, 5057 const SmallVectorImpl<ISD::OutputArg> &Outs, 5058 const SmallVectorImpl<SDValue> &OutVals, 5059 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5060 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 5061 ImmutableCallSite *CS) const { 5062 5063 bool isELFv2ABI = Subtarget.isELFv2ABI(); 5064 bool isLittleEndian = Subtarget.isLittleEndian(); 5065 unsigned NumOps = Outs.size(); 5066 bool hasNest = false; 5067 bool IsSibCall = false; 5068 5069 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5070 unsigned PtrByteSize = 8; 5071 5072 MachineFunction &MF = DAG.getMachineFunction(); 5073 5074 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) 5075 IsSibCall = true; 5076 5077 // Mark this function as potentially containing a function that contains a 5078 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5079 // and restoring the callers stack pointer in this functions epilog. This is 5080 // done because by tail calling the called function might overwrite the value 5081 // in this function's (MF) stack pointer stack slot 0(SP). 5082 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5083 CallConv == CallingConv::Fast) 5084 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5085 5086 assert(!(CallConv == CallingConv::Fast && isVarArg) && 5087 "fastcc not supported on varargs functions"); 5088 5089 // Count how many bytes are to be pushed on the stack, including the linkage 5090 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 5091 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 5092 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 5093 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 5094 unsigned NumBytes = LinkageSize; 5095 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5096 unsigned &QFPR_idx = FPR_idx; 5097 5098 static const MCPhysReg GPR[] = { 5099 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5100 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5101 }; 5102 static const MCPhysReg VR[] = { 5103 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5104 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5105 }; 5106 5107 const unsigned NumGPRs = array_lengthof(GPR); 5108 const unsigned NumFPRs = 13; 5109 const unsigned NumVRs = array_lengthof(VR); 5110 const unsigned NumQFPRs = NumFPRs; 5111 5112 // When using the fast calling convention, we don't provide backing for 5113 // arguments that will be in registers. 5114 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; 5115 5116 // Add up all the space actually used. 5117 for (unsigned i = 0; i != NumOps; ++i) { 5118 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5119 EVT ArgVT = Outs[i].VT; 5120 EVT OrigVT = Outs[i].ArgVT; 5121 5122 if (Flags.isNest()) 5123 continue; 5124 5125 if (CallConv == CallingConv::Fast) { 5126 if (Flags.isByVal()) 5127 NumGPRsUsed += (Flags.getByValSize()+7)/8; 5128 else 5129 switch (ArgVT.getSimpleVT().SimpleTy) { 5130 default: llvm_unreachable("Unexpected ValueType for argument!"); 5131 case MVT::i1: 5132 case MVT::i32: 5133 case MVT::i64: 5134 if (++NumGPRsUsed <= NumGPRs) 5135 continue; 5136 break; 5137 case MVT::v4i32: 5138 case MVT::v8i16: 5139 case MVT::v16i8: 5140 case MVT::v2f64: 5141 case MVT::v2i64: 5142 case MVT::v1i128: 5143 if (++NumVRsUsed <= NumVRs) 5144 continue; 5145 break; 5146 case MVT::v4f32: 5147 // When using QPX, this is handled like a FP register, otherwise, it 5148 // is an Altivec register. 5149 if (Subtarget.hasQPX()) { 5150 if (++NumFPRsUsed <= NumFPRs) 5151 continue; 5152 } else { 5153 if (++NumVRsUsed <= NumVRs) 5154 continue; 5155 } 5156 break; 5157 case MVT::f32: 5158 case MVT::f64: 5159 case MVT::v4f64: // QPX 5160 case MVT::v4i1: // QPX 5161 if (++NumFPRsUsed <= NumFPRs) 5162 continue; 5163 break; 5164 } 5165 } 5166 5167 /* Respect alignment of argument on the stack. */ 5168 unsigned Align = 5169 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5170 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 5171 5172 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5173 if (Flags.isInConsecutiveRegsLast()) 5174 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5175 } 5176 5177 unsigned NumBytesActuallyUsed = NumBytes; 5178 5179 // The prolog code of the callee may store up to 8 GPR argument registers to 5180 // the stack, allowing va_start to index over them in memory if its varargs. 5181 // Because we cannot tell if this is needed on the caller side, we have to 5182 // conservatively assume that it is needed. As such, make sure we have at 5183 // least enough stack space for the caller to store the 8 GPRs. 5184 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. 5185 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5186 5187 // Tail call needs the stack to be aligned. 5188 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5189 CallConv == CallingConv::Fast) 5190 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5191 5192 int SPDiff = 0; 5193 5194 // Calculate by how many bytes the stack has to be adjusted in case of tail 5195 // call optimization. 5196 if (!IsSibCall) 5197 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5198 5199 // To protect arguments on the stack from being clobbered in a tail call, 5200 // force all the loads to happen before doing any other lowering. 5201 if (isTailCall) 5202 Chain = DAG.getStackArgumentTokenFactor(Chain); 5203 5204 // Adjust the stack pointer for the new arguments... 5205 // These operations are automatically eliminated by the prolog/epilog pass 5206 if (!IsSibCall) 5207 Chain = DAG.getCALLSEQ_START(Chain, 5208 DAG.getIntPtrConstant(NumBytes, dl, true), dl); 5209 SDValue CallSeqStart = Chain; 5210 5211 // Load the return address and frame pointer so it can be move somewhere else 5212 // later. 5213 SDValue LROp, FPOp; 5214 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5215 5216 // Set up a copy of the stack pointer for use loading and storing any 5217 // arguments that may not fit in the registers available for argument 5218 // passing. 5219 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5220 5221 // Figure out which arguments are going to go in registers, and which in 5222 // memory. Also, if this is a vararg function, floating point operations 5223 // must be stored to our stack, and loaded into integer regs as well, if 5224 // any integer regs are available for argument passing. 5225 unsigned ArgOffset = LinkageSize; 5226 5227 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5228 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5229 5230 SmallVector<SDValue, 8> MemOpChains; 5231 for (unsigned i = 0; i != NumOps; ++i) { 5232 SDValue Arg = OutVals[i]; 5233 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5234 EVT ArgVT = Outs[i].VT; 5235 EVT OrigVT = Outs[i].ArgVT; 5236 5237 // PtrOff will be used to store the current argument to the stack if a 5238 // register cannot be found for it. 5239 SDValue PtrOff; 5240 5241 // We re-align the argument offset for each argument, except when using the 5242 // fast calling convention, when we need to make sure we do that only when 5243 // we'll actually use a stack slot. 5244 auto ComputePtrOff = [&]() { 5245 /* Respect alignment of argument on the stack. */ 5246 unsigned Align = 5247 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 5248 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 5249 5250 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5251 5252 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5253 }; 5254 5255 if (CallConv != CallingConv::Fast) { 5256 ComputePtrOff(); 5257 5258 /* Compute GPR index associated with argument offset. */ 5259 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 5260 GPR_idx = std::min(GPR_idx, NumGPRs); 5261 } 5262 5263 // Promote integers to 64-bit values. 5264 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 5265 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5266 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5267 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5268 } 5269 5270 // FIXME memcpy is used way more than necessary. Correctness first. 5271 // Note: "by value" is code for passing a structure by value, not 5272 // basic types. 5273 if (Flags.isByVal()) { 5274 // Note: Size includes alignment padding, so 5275 // struct x { short a; char b; } 5276 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 5277 // These are the proper values we need for right-justifying the 5278 // aggregate in a parameter register. 5279 unsigned Size = Flags.getByValSize(); 5280 5281 // An empty aggregate parameter takes up no storage and no 5282 // registers. 5283 if (Size == 0) 5284 continue; 5285 5286 if (CallConv == CallingConv::Fast) 5287 ComputePtrOff(); 5288 5289 // All aggregates smaller than 8 bytes must be passed right-justified. 5290 if (Size==1 || Size==2 || Size==4) { 5291 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 5292 if (GPR_idx != NumGPRs) { 5293 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5294 MachinePointerInfo(), VT); 5295 MemOpChains.push_back(Load.getValue(1)); 5296 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5297 5298 ArgOffset += PtrByteSize; 5299 continue; 5300 } 5301 } 5302 5303 if (GPR_idx == NumGPRs && Size < 8) { 5304 SDValue AddPtr = PtrOff; 5305 if (!isLittleEndian) { 5306 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5307 PtrOff.getValueType()); 5308 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5309 } 5310 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5311 CallSeqStart, 5312 Flags, DAG, dl); 5313 ArgOffset += PtrByteSize; 5314 continue; 5315 } 5316 // Copy entire object into memory. There are cases where gcc-generated 5317 // code assumes it is there, even if it could be put entirely into 5318 // registers. (This is not what the doc says.) 5319 5320 // FIXME: The above statement is likely due to a misunderstanding of the 5321 // documents. All arguments must be copied into the parameter area BY 5322 // THE CALLEE in the event that the callee takes the address of any 5323 // formal argument. That has not yet been implemented. However, it is 5324 // reasonable to use the stack area as a staging area for the register 5325 // load. 5326 5327 // Skip this for small aggregates, as we will use the same slot for a 5328 // right-justified copy, below. 5329 if (Size >= 8) 5330 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5331 CallSeqStart, 5332 Flags, DAG, dl); 5333 5334 // When a register is available, pass a small aggregate right-justified. 5335 if (Size < 8 && GPR_idx != NumGPRs) { 5336 // The easiest way to get this right-justified in a register 5337 // is to copy the structure into the rightmost portion of a 5338 // local variable slot, then load the whole slot into the 5339 // register. 5340 // FIXME: The memcpy seems to produce pretty awful code for 5341 // small aggregates, particularly for packed ones. 5342 // FIXME: It would be preferable to use the slot in the 5343 // parameter save area instead of a new local variable. 5344 SDValue AddPtr = PtrOff; 5345 if (!isLittleEndian) { 5346 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); 5347 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5348 } 5349 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5350 CallSeqStart, 5351 Flags, DAG, dl); 5352 5353 // Load the slot into the register. 5354 SDValue Load = 5355 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); 5356 MemOpChains.push_back(Load.getValue(1)); 5357 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5358 5359 // Done with this argument. 5360 ArgOffset += PtrByteSize; 5361 continue; 5362 } 5363 5364 // For aggregates larger than PtrByteSize, copy the pieces of the 5365 // object that fit into registers from the parameter save area. 5366 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5367 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5368 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5369 if (GPR_idx != NumGPRs) { 5370 SDValue Load = 5371 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 5372 MemOpChains.push_back(Load.getValue(1)); 5373 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5374 ArgOffset += PtrByteSize; 5375 } else { 5376 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5377 break; 5378 } 5379 } 5380 continue; 5381 } 5382 5383 switch (Arg.getSimpleValueType().SimpleTy) { 5384 default: llvm_unreachable("Unexpected ValueType for argument!"); 5385 case MVT::i1: 5386 case MVT::i32: 5387 case MVT::i64: 5388 if (Flags.isNest()) { 5389 // The 'nest' parameter, if any, is passed in R11. 5390 RegsToPass.push_back(std::make_pair(PPC::X11, Arg)); 5391 hasNest = true; 5392 break; 5393 } 5394 5395 // These can be scalar arguments or elements of an integer array type 5396 // passed directly. Clang may use those instead of "byval" aggregate 5397 // types to avoid forcing arguments to memory unnecessarily. 5398 if (GPR_idx != NumGPRs) { 5399 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5400 } else { 5401 if (CallConv == CallingConv::Fast) 5402 ComputePtrOff(); 5403 5404 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5405 true, isTailCall, false, MemOpChains, 5406 TailCallArguments, dl); 5407 if (CallConv == CallingConv::Fast) 5408 ArgOffset += PtrByteSize; 5409 } 5410 if (CallConv != CallingConv::Fast) 5411 ArgOffset += PtrByteSize; 5412 break; 5413 case MVT::f32: 5414 case MVT::f64: { 5415 // These can be scalar arguments or elements of a float array type 5416 // passed directly. The latter are used to implement ELFv2 homogenous 5417 // float aggregates. 5418 5419 // Named arguments go into FPRs first, and once they overflow, the 5420 // remaining arguments go into GPRs and then the parameter save area. 5421 // Unnamed arguments for vararg functions always go to GPRs and 5422 // then the parameter save area. For now, put all arguments to vararg 5423 // routines always in both locations (FPR *and* GPR or stack slot). 5424 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 5425 bool NeededLoad = false; 5426 5427 // First load the argument into the next available FPR. 5428 if (FPR_idx != NumFPRs) 5429 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5430 5431 // Next, load the argument into GPR or stack slot if needed. 5432 if (!NeedGPROrStack) 5433 ; 5434 else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) { 5435 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 5436 // once we support fp <-> gpr moves. 5437 5438 // In the non-vararg case, this can only ever happen in the 5439 // presence of f32 array types, since otherwise we never run 5440 // out of FPRs before running out of GPRs. 5441 SDValue ArgVal; 5442 5443 // Double values are always passed in a single GPR. 5444 if (Arg.getValueType() != MVT::f32) { 5445 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 5446 5447 // Non-array float values are extended and passed in a GPR. 5448 } else if (!Flags.isInConsecutiveRegs()) { 5449 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5450 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5451 5452 // If we have an array of floats, we collect every odd element 5453 // together with its predecessor into one GPR. 5454 } else if (ArgOffset % PtrByteSize != 0) { 5455 SDValue Lo, Hi; 5456 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 5457 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5458 if (!isLittleEndian) 5459 std::swap(Lo, Hi); 5460 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 5461 5462 // The final element, if even, goes into the first half of a GPR. 5463 } else if (Flags.isInConsecutiveRegsLast()) { 5464 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 5465 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 5466 if (!isLittleEndian) 5467 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 5468 DAG.getConstant(32, dl, MVT::i32)); 5469 5470 // Non-final even elements are skipped; they will be handled 5471 // together the with subsequent argument on the next go-around. 5472 } else 5473 ArgVal = SDValue(); 5474 5475 if (ArgVal.getNode()) 5476 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); 5477 } else { 5478 if (CallConv == CallingConv::Fast) 5479 ComputePtrOff(); 5480 5481 // Single-precision floating-point values are mapped to the 5482 // second (rightmost) word of the stack doubleword. 5483 if (Arg.getValueType() == MVT::f32 && 5484 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 5485 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 5486 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5487 } 5488 5489 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5490 true, isTailCall, false, MemOpChains, 5491 TailCallArguments, dl); 5492 5493 NeededLoad = true; 5494 } 5495 // When passing an array of floats, the array occupies consecutive 5496 // space in the argument area; only round up to the next doubleword 5497 // at the end of the array. Otherwise, each float takes 8 bytes. 5498 if (CallConv != CallingConv::Fast || NeededLoad) { 5499 ArgOffset += (Arg.getValueType() == MVT::f32 && 5500 Flags.isInConsecutiveRegs()) ? 4 : 8; 5501 if (Flags.isInConsecutiveRegsLast()) 5502 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 5503 } 5504 break; 5505 } 5506 case MVT::v4f32: 5507 case MVT::v4i32: 5508 case MVT::v8i16: 5509 case MVT::v16i8: 5510 case MVT::v2f64: 5511 case MVT::v2i64: 5512 case MVT::v1i128: 5513 if (!Subtarget.hasQPX()) { 5514 // These can be scalar arguments or elements of a vector array type 5515 // passed directly. The latter are used to implement ELFv2 homogenous 5516 // vector aggregates. 5517 5518 // For a varargs call, named arguments go into VRs or on the stack as 5519 // usual; unnamed arguments always go to the stack or the corresponding 5520 // GPRs when within range. For now, we always put the value in both 5521 // locations (or even all three). 5522 if (isVarArg) { 5523 // We could elide this store in the case where the object fits 5524 // entirely in R registers. Maybe later. 5525 SDValue Store = 5526 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5527 MemOpChains.push_back(Store); 5528 if (VR_idx != NumVRs) { 5529 SDValue Load = 5530 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 5531 MemOpChains.push_back(Load.getValue(1)); 5532 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5533 } 5534 ArgOffset += 16; 5535 for (unsigned i=0; i<16; i+=PtrByteSize) { 5536 if (GPR_idx == NumGPRs) 5537 break; 5538 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5539 DAG.getConstant(i, dl, PtrVT)); 5540 SDValue Load = 5541 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5542 MemOpChains.push_back(Load.getValue(1)); 5543 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5544 } 5545 break; 5546 } 5547 5548 // Non-varargs Altivec params go into VRs or on the stack. 5549 if (VR_idx != NumVRs) { 5550 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5551 } else { 5552 if (CallConv == CallingConv::Fast) 5553 ComputePtrOff(); 5554 5555 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5556 true, isTailCall, true, MemOpChains, 5557 TailCallArguments, dl); 5558 if (CallConv == CallingConv::Fast) 5559 ArgOffset += 16; 5560 } 5561 5562 if (CallConv != CallingConv::Fast) 5563 ArgOffset += 16; 5564 break; 5565 } // not QPX 5566 5567 assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && 5568 "Invalid QPX parameter type"); 5569 5570 /* fall through */ 5571 case MVT::v4f64: 5572 case MVT::v4i1: { 5573 bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; 5574 if (isVarArg) { 5575 // We could elide this store in the case where the object fits 5576 // entirely in R registers. Maybe later. 5577 SDValue Store = 5578 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5579 MemOpChains.push_back(Store); 5580 if (QFPR_idx != NumQFPRs) { 5581 SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store, 5582 PtrOff, MachinePointerInfo()); 5583 MemOpChains.push_back(Load.getValue(1)); 5584 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); 5585 } 5586 ArgOffset += (IsF32 ? 16 : 32); 5587 for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) { 5588 if (GPR_idx == NumGPRs) 5589 break; 5590 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5591 DAG.getConstant(i, dl, PtrVT)); 5592 SDValue Load = 5593 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5594 MemOpChains.push_back(Load.getValue(1)); 5595 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5596 } 5597 break; 5598 } 5599 5600 // Non-varargs QPX params go into registers or on the stack. 5601 if (QFPR_idx != NumQFPRs) { 5602 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); 5603 } else { 5604 if (CallConv == CallingConv::Fast) 5605 ComputePtrOff(); 5606 5607 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5608 true, isTailCall, true, MemOpChains, 5609 TailCallArguments, dl); 5610 if (CallConv == CallingConv::Fast) 5611 ArgOffset += (IsF32 ? 16 : 32); 5612 } 5613 5614 if (CallConv != CallingConv::Fast) 5615 ArgOffset += (IsF32 ? 16 : 32); 5616 break; 5617 } 5618 } 5619 } 5620 5621 assert(NumBytesActuallyUsed == ArgOffset); 5622 (void)NumBytesActuallyUsed; 5623 5624 if (!MemOpChains.empty()) 5625 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5626 5627 // Check if this is an indirect call (MTCTR/BCTRL). 5628 // See PrepareCall() for more information about calls through function 5629 // pointers in the 64-bit SVR4 ABI. 5630 if (!isTailCall && !isPatchPoint && 5631 !isFunctionGlobalAddress(Callee) && 5632 !isa<ExternalSymbolSDNode>(Callee)) { 5633 // Load r2 into a virtual register and store it to the TOC save area. 5634 setUsesTOCBasePtr(DAG); 5635 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 5636 // TOC save area offset. 5637 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); 5638 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); 5639 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5640 Chain = DAG.getStore( 5641 Val.getValue(1), dl, Val, AddPtr, 5642 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); 5643 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 5644 // This does not mean the MTCTR instruction must use R12; it's easier 5645 // to model this as an extra parameter, so do that. 5646 if (isELFv2ABI && !isPatchPoint) 5647 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 5648 } 5649 5650 // Build a sequence of copy-to-reg nodes chained together with token chain 5651 // and flag operands which copy the outgoing args into the appropriate regs. 5652 SDValue InFlag; 5653 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5654 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5655 RegsToPass[i].second, InFlag); 5656 InFlag = Chain.getValue(1); 5657 } 5658 5659 if (isTailCall && !IsSibCall) 5660 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 5661 TailCallArguments); 5662 5663 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest, 5664 DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, 5665 SPDiff, NumBytes, Ins, InVals, CS); 5666 } 5667 5668 SDValue PPCTargetLowering::LowerCall_Darwin( 5669 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, 5670 bool isTailCall, bool isPatchPoint, 5671 const SmallVectorImpl<ISD::OutputArg> &Outs, 5672 const SmallVectorImpl<SDValue> &OutVals, 5673 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 5674 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, 5675 ImmutableCallSite *CS) const { 5676 5677 unsigned NumOps = Outs.size(); 5678 5679 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5680 bool isPPC64 = PtrVT == MVT::i64; 5681 unsigned PtrByteSize = isPPC64 ? 8 : 4; 5682 5683 MachineFunction &MF = DAG.getMachineFunction(); 5684 5685 // Mark this function as potentially containing a function that contains a 5686 // tail call. As a consequence the frame pointer will be used for dynamicalloc 5687 // and restoring the callers stack pointer in this functions epilog. This is 5688 // done because by tail calling the called function might overwrite the value 5689 // in this function's (MF) stack pointer stack slot 0(SP). 5690 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5691 CallConv == CallingConv::Fast) 5692 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 5693 5694 // Count how many bytes are to be pushed on the stack, including the linkage 5695 // area, and parameter passing area. We start with 24/48 bytes, which is 5696 // prereserved space for [SP][CR][LR][3 x unused]. 5697 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); 5698 unsigned NumBytes = LinkageSize; 5699 5700 // Add up all the space actually used. 5701 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 5702 // they all go in registers, but we must reserve stack space for them for 5703 // possible use by the caller. In varargs or 64-bit calls, parameters are 5704 // assigned stack space in order, with padding so Altivec parameters are 5705 // 16-byte aligned. 5706 unsigned nAltivecParamsAtEnd = 0; 5707 for (unsigned i = 0; i != NumOps; ++i) { 5708 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5709 EVT ArgVT = Outs[i].VT; 5710 // Varargs Altivec parameters are padded to a 16 byte boundary. 5711 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 5712 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 5713 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 5714 if (!isVarArg && !isPPC64) { 5715 // Non-varargs Altivec parameters go after all the non-Altivec 5716 // parameters; handle those later so we know how much padding we need. 5717 nAltivecParamsAtEnd++; 5718 continue; 5719 } 5720 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 5721 NumBytes = ((NumBytes+15)/16)*16; 5722 } 5723 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 5724 } 5725 5726 // Allow for Altivec parameters at the end, if needed. 5727 if (nAltivecParamsAtEnd) { 5728 NumBytes = ((NumBytes+15)/16)*16; 5729 NumBytes += 16*nAltivecParamsAtEnd; 5730 } 5731 5732 // The prolog code of the callee may store up to 8 GPR argument registers to 5733 // the stack, allowing va_start to index over them in memory if its varargs. 5734 // Because we cannot tell if this is needed on the caller side, we have to 5735 // conservatively assume that it is needed. As such, make sure we have at 5736 // least enough stack space for the caller to store the 8 GPRs. 5737 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 5738 5739 // Tail call needs the stack to be aligned. 5740 if (getTargetMachine().Options.GuaranteedTailCallOpt && 5741 CallConv == CallingConv::Fast) 5742 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); 5743 5744 // Calculate by how many bytes the stack has to be adjusted in case of tail 5745 // call optimization. 5746 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 5747 5748 // To protect arguments on the stack from being clobbered in a tail call, 5749 // force all the loads to happen before doing any other lowering. 5750 if (isTailCall) 5751 Chain = DAG.getStackArgumentTokenFactor(Chain); 5752 5753 // Adjust the stack pointer for the new arguments... 5754 // These operations are automatically eliminated by the prolog/epilog pass 5755 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 5756 dl); 5757 SDValue CallSeqStart = Chain; 5758 5759 // Load the return address and frame pointer so it can be move somewhere else 5760 // later. 5761 SDValue LROp, FPOp; 5762 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); 5763 5764 // Set up a copy of the stack pointer for use loading and storing any 5765 // arguments that may not fit in the registers available for argument 5766 // passing. 5767 SDValue StackPtr; 5768 if (isPPC64) 5769 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 5770 else 5771 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 5772 5773 // Figure out which arguments are going to go in registers, and which in 5774 // memory. Also, if this is a vararg function, floating point operations 5775 // must be stored to our stack, and loaded into integer regs as well, if 5776 // any integer regs are available for argument passing. 5777 unsigned ArgOffset = LinkageSize; 5778 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 5779 5780 static const MCPhysReg GPR_32[] = { // 32-bit registers. 5781 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 5782 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 5783 }; 5784 static const MCPhysReg GPR_64[] = { // 64-bit registers. 5785 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 5786 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 5787 }; 5788 static const MCPhysReg VR[] = { 5789 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 5790 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 5791 }; 5792 const unsigned NumGPRs = array_lengthof(GPR_32); 5793 const unsigned NumFPRs = 13; 5794 const unsigned NumVRs = array_lengthof(VR); 5795 5796 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 5797 5798 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 5799 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 5800 5801 SmallVector<SDValue, 8> MemOpChains; 5802 for (unsigned i = 0; i != NumOps; ++i) { 5803 SDValue Arg = OutVals[i]; 5804 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5805 5806 // PtrOff will be used to store the current argument to the stack if a 5807 // register cannot be found for it. 5808 SDValue PtrOff; 5809 5810 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); 5811 5812 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 5813 5814 // On PPC64, promote integers to 64-bit values. 5815 if (isPPC64 && Arg.getValueType() == MVT::i32) { 5816 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 5817 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 5818 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 5819 } 5820 5821 // FIXME memcpy is used way more than necessary. Correctness first. 5822 // Note: "by value" is code for passing a structure by value, not 5823 // basic types. 5824 if (Flags.isByVal()) { 5825 unsigned Size = Flags.getByValSize(); 5826 // Very small objects are passed right-justified. Everything else is 5827 // passed left-justified. 5828 if (Size==1 || Size==2) { 5829 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 5830 if (GPR_idx != NumGPRs) { 5831 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 5832 MachinePointerInfo(), VT); 5833 MemOpChains.push_back(Load.getValue(1)); 5834 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5835 5836 ArgOffset += PtrByteSize; 5837 } else { 5838 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, 5839 PtrOff.getValueType()); 5840 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 5841 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 5842 CallSeqStart, 5843 Flags, DAG, dl); 5844 ArgOffset += PtrByteSize; 5845 } 5846 continue; 5847 } 5848 // Copy entire object into memory. There are cases where gcc-generated 5849 // code assumes it is there, even if it could be put entirely into 5850 // registers. (This is not what the doc says.) 5851 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 5852 CallSeqStart, 5853 Flags, DAG, dl); 5854 5855 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 5856 // copy the pieces of the object that fit into registers from the 5857 // parameter save area. 5858 for (unsigned j=0; j<Size; j+=PtrByteSize) { 5859 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); 5860 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 5861 if (GPR_idx != NumGPRs) { 5862 SDValue Load = 5863 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); 5864 MemOpChains.push_back(Load.getValue(1)); 5865 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5866 ArgOffset += PtrByteSize; 5867 } else { 5868 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 5869 break; 5870 } 5871 } 5872 continue; 5873 } 5874 5875 switch (Arg.getSimpleValueType().SimpleTy) { 5876 default: llvm_unreachable("Unexpected ValueType for argument!"); 5877 case MVT::i1: 5878 case MVT::i32: 5879 case MVT::i64: 5880 if (GPR_idx != NumGPRs) { 5881 if (Arg.getValueType() == MVT::i1) 5882 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 5883 5884 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 5885 } else { 5886 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5887 isPPC64, isTailCall, false, MemOpChains, 5888 TailCallArguments, dl); 5889 } 5890 ArgOffset += PtrByteSize; 5891 break; 5892 case MVT::f32: 5893 case MVT::f64: 5894 if (FPR_idx != NumFPRs) { 5895 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 5896 5897 if (isVarArg) { 5898 SDValue Store = 5899 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5900 MemOpChains.push_back(Store); 5901 5902 // Float varargs are always shadowed in available integer registers 5903 if (GPR_idx != NumGPRs) { 5904 SDValue Load = 5905 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 5906 MemOpChains.push_back(Load.getValue(1)); 5907 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5908 } 5909 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 5910 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); 5911 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 5912 SDValue Load = 5913 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); 5914 MemOpChains.push_back(Load.getValue(1)); 5915 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5916 } 5917 } else { 5918 // If we have any FPRs remaining, we may also have GPRs remaining. 5919 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 5920 // GPRs. 5921 if (GPR_idx != NumGPRs) 5922 ++GPR_idx; 5923 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 5924 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 5925 ++GPR_idx; 5926 } 5927 } else 5928 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5929 isPPC64, isTailCall, false, MemOpChains, 5930 TailCallArguments, dl); 5931 if (isPPC64) 5932 ArgOffset += 8; 5933 else 5934 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 5935 break; 5936 case MVT::v4f32: 5937 case MVT::v4i32: 5938 case MVT::v8i16: 5939 case MVT::v16i8: 5940 if (isVarArg) { 5941 // These go aligned on the stack, or in the corresponding R registers 5942 // when within range. The Darwin PPC ABI doc claims they also go in 5943 // V registers; in fact gcc does this only for arguments that are 5944 // prototyped, not for those that match the ... We do it for all 5945 // arguments, seems to work. 5946 while (ArgOffset % 16 !=0) { 5947 ArgOffset += PtrByteSize; 5948 if (GPR_idx != NumGPRs) 5949 GPR_idx++; 5950 } 5951 // We could elide this store in the case where the object fits 5952 // entirely in R registers. Maybe later. 5953 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 5954 DAG.getConstant(ArgOffset, dl, PtrVT)); 5955 SDValue Store = 5956 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); 5957 MemOpChains.push_back(Store); 5958 if (VR_idx != NumVRs) { 5959 SDValue Load = 5960 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); 5961 MemOpChains.push_back(Load.getValue(1)); 5962 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5963 } 5964 ArgOffset += 16; 5965 for (unsigned i=0; i<16; i+=PtrByteSize) { 5966 if (GPR_idx == NumGPRs) 5967 break; 5968 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5969 DAG.getConstant(i, dl, PtrVT)); 5970 SDValue Load = 5971 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); 5972 MemOpChains.push_back(Load.getValue(1)); 5973 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5974 } 5975 break; 5976 } 5977 5978 // Non-varargs Altivec params generally go in registers, but have 5979 // stack space allocated at the end. 5980 if (VR_idx != NumVRs) { 5981 // Doesn't have GPR space allocated. 5982 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5983 } else if (nAltivecParamsAtEnd==0) { 5984 // We are emitting Altivec params in order. 5985 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5986 isPPC64, isTailCall, true, MemOpChains, 5987 TailCallArguments, dl); 5988 ArgOffset += 16; 5989 } 5990 break; 5991 } 5992 } 5993 // If all Altivec parameters fit in registers, as they usually do, 5994 // they get stack space following the non-Altivec parameters. We 5995 // don't track this here because nobody below needs it. 5996 // If there are more Altivec parameters than fit in registers emit 5997 // the stores here. 5998 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 5999 unsigned j = 0; 6000 // Offset is aligned; skip 1st 12 params which go in V registers. 6001 ArgOffset = ((ArgOffset+15)/16)*16; 6002 ArgOffset += 12*16; 6003 for (unsigned i = 0; i != NumOps; ++i) { 6004 SDValue Arg = OutVals[i]; 6005 EVT ArgType = Outs[i].VT; 6006 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 6007 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 6008 if (++j > NumVRs) { 6009 SDValue PtrOff; 6010 // We are emitting Altivec params in order. 6011 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 6012 isPPC64, isTailCall, true, MemOpChains, 6013 TailCallArguments, dl); 6014 ArgOffset += 16; 6015 } 6016 } 6017 } 6018 } 6019 6020 if (!MemOpChains.empty()) 6021 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 6022 6023 // On Darwin, R12 must contain the address of an indirect callee. This does 6024 // not mean the MTCTR instruction must use R12; it's easier to model this as 6025 // an extra parameter, so do that. 6026 if (!isTailCall && 6027 !isFunctionGlobalAddress(Callee) && 6028 !isa<ExternalSymbolSDNode>(Callee) && 6029 !isBLACompatibleAddress(Callee, DAG)) 6030 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 6031 PPC::R12), Callee)); 6032 6033 // Build a sequence of copy-to-reg nodes chained together with token chain 6034 // and flag operands which copy the outgoing args into the appropriate regs. 6035 SDValue InFlag; 6036 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 6037 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 6038 RegsToPass[i].second, InFlag); 6039 InFlag = Chain.getValue(1); 6040 } 6041 6042 if (isTailCall) 6043 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, 6044 TailCallArguments); 6045 6046 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, 6047 /* unused except on PPC64 ELFv1 */ false, DAG, 6048 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, 6049 NumBytes, Ins, InVals, CS); 6050 } 6051 6052 bool 6053 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 6054 MachineFunction &MF, bool isVarArg, 6055 const SmallVectorImpl<ISD::OutputArg> &Outs, 6056 LLVMContext &Context) const { 6057 SmallVector<CCValAssign, 16> RVLocs; 6058 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 6059 return CCInfo.CheckReturn(Outs, RetCC_PPC); 6060 } 6061 6062 SDValue 6063 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 6064 bool isVarArg, 6065 const SmallVectorImpl<ISD::OutputArg> &Outs, 6066 const SmallVectorImpl<SDValue> &OutVals, 6067 const SDLoc &dl, SelectionDAG &DAG) const { 6068 6069 SmallVector<CCValAssign, 16> RVLocs; 6070 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 6071 *DAG.getContext()); 6072 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 6073 6074 SDValue Flag; 6075 SmallVector<SDValue, 4> RetOps(1, Chain); 6076 6077 // Copy the result values into the output registers. 6078 for (unsigned i = 0; i != RVLocs.size(); ++i) { 6079 CCValAssign &VA = RVLocs[i]; 6080 assert(VA.isRegLoc() && "Can only return in registers!"); 6081 6082 SDValue Arg = OutVals[i]; 6083 6084 switch (VA.getLocInfo()) { 6085 default: llvm_unreachable("Unknown loc info!"); 6086 case CCValAssign::Full: break; 6087 case CCValAssign::AExt: 6088 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 6089 break; 6090 case CCValAssign::ZExt: 6091 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 6092 break; 6093 case CCValAssign::SExt: 6094 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 6095 break; 6096 } 6097 6098 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 6099 Flag = Chain.getValue(1); 6100 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 6101 } 6102 6103 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6104 const MCPhysReg *I = 6105 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); 6106 if (I) { 6107 for (; *I; ++I) { 6108 6109 if (PPC::G8RCRegClass.contains(*I)) 6110 RetOps.push_back(DAG.getRegister(*I, MVT::i64)); 6111 else if (PPC::F8RCRegClass.contains(*I)) 6112 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); 6113 else if (PPC::CRRCRegClass.contains(*I)) 6114 RetOps.push_back(DAG.getRegister(*I, MVT::i1)); 6115 else if (PPC::VRRCRegClass.contains(*I)) 6116 RetOps.push_back(DAG.getRegister(*I, MVT::Other)); 6117 else 6118 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 6119 } 6120 } 6121 6122 RetOps[0] = Chain; // Update chain. 6123 6124 // Add the flag if we have it. 6125 if (Flag.getNode()) 6126 RetOps.push_back(Flag); 6127 6128 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 6129 } 6130 6131 SDValue 6132 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, 6133 SelectionDAG &DAG) const { 6134 SDLoc dl(Op); 6135 6136 // Get the corect type for integers. 6137 EVT IntVT = Op.getValueType(); 6138 6139 // Get the inputs. 6140 SDValue Chain = Op.getOperand(0); 6141 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6142 // Build a DYNAREAOFFSET node. 6143 SDValue Ops[2] = {Chain, FPSIdx}; 6144 SDVTList VTs = DAG.getVTList(IntVT); 6145 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); 6146 } 6147 6148 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, 6149 SelectionDAG &DAG) const { 6150 // When we pop the dynamic allocation we need to restore the SP link. 6151 SDLoc dl(Op); 6152 6153 // Get the corect type for pointers. 6154 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6155 6156 // Construct the stack pointer operand. 6157 bool isPPC64 = Subtarget.isPPC64(); 6158 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 6159 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 6160 6161 // Get the operands for the STACKRESTORE. 6162 SDValue Chain = Op.getOperand(0); 6163 SDValue SaveSP = Op.getOperand(1); 6164 6165 // Load the old link SP. 6166 SDValue LoadLinkSP = 6167 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); 6168 6169 // Restore the stack pointer. 6170 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 6171 6172 // Store the old link SP. 6173 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); 6174 } 6175 6176 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { 6177 MachineFunction &MF = DAG.getMachineFunction(); 6178 bool isPPC64 = Subtarget.isPPC64(); 6179 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6180 6181 // Get current frame pointer save index. The users of this index will be 6182 // primarily DYNALLOC instructions. 6183 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6184 int RASI = FI->getReturnAddrSaveIndex(); 6185 6186 // If the frame pointer save index hasn't been defined yet. 6187 if (!RASI) { 6188 // Find out what the fix offset of the frame pointer save area. 6189 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); 6190 // Allocate the frame index for frame pointer save area. 6191 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false); 6192 // Save the result. 6193 FI->setReturnAddrSaveIndex(RASI); 6194 } 6195 return DAG.getFrameIndex(RASI, PtrVT); 6196 } 6197 6198 SDValue 6199 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 6200 MachineFunction &MF = DAG.getMachineFunction(); 6201 bool isPPC64 = Subtarget.isPPC64(); 6202 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6203 6204 // Get current frame pointer save index. The users of this index will be 6205 // primarily DYNALLOC instructions. 6206 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 6207 int FPSI = FI->getFramePointerSaveIndex(); 6208 6209 // If the frame pointer save index hasn't been defined yet. 6210 if (!FPSI) { 6211 // Find out what the fix offset of the frame pointer save area. 6212 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); 6213 // Allocate the frame index for frame pointer save area. 6214 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 6215 // Save the result. 6216 FI->setFramePointerSaveIndex(FPSI); 6217 } 6218 return DAG.getFrameIndex(FPSI, PtrVT); 6219 } 6220 6221 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 6222 SelectionDAG &DAG) const { 6223 // Get the inputs. 6224 SDValue Chain = Op.getOperand(0); 6225 SDValue Size = Op.getOperand(1); 6226 SDLoc dl(Op); 6227 6228 // Get the corect type for pointers. 6229 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6230 // Negate the size. 6231 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 6232 DAG.getConstant(0, dl, PtrVT), Size); 6233 // Construct a node for the frame pointer save index. 6234 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 6235 // Build a DYNALLOC node. 6236 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 6237 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 6238 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 6239 } 6240 6241 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op, 6242 SelectionDAG &DAG) const { 6243 MachineFunction &MF = DAG.getMachineFunction(); 6244 6245 bool isPPC64 = Subtarget.isPPC64(); 6246 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6247 6248 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false); 6249 return DAG.getFrameIndex(FI, PtrVT); 6250 } 6251 6252 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 6253 SelectionDAG &DAG) const { 6254 SDLoc DL(Op); 6255 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 6256 DAG.getVTList(MVT::i32, MVT::Other), 6257 Op.getOperand(0), Op.getOperand(1)); 6258 } 6259 6260 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 6261 SelectionDAG &DAG) const { 6262 SDLoc DL(Op); 6263 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 6264 Op.getOperand(0), Op.getOperand(1)); 6265 } 6266 6267 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 6268 if (Op.getValueType().isVector()) 6269 return LowerVectorLoad(Op, DAG); 6270 6271 assert(Op.getValueType() == MVT::i1 && 6272 "Custom lowering only for i1 loads"); 6273 6274 // First, load 8 bits into 32 bits, then truncate to 1 bit. 6275 6276 SDLoc dl(Op); 6277 LoadSDNode *LD = cast<LoadSDNode>(Op); 6278 6279 SDValue Chain = LD->getChain(); 6280 SDValue BasePtr = LD->getBasePtr(); 6281 MachineMemOperand *MMO = LD->getMemOperand(); 6282 6283 SDValue NewLD = 6284 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, 6285 BasePtr, MVT::i8, MMO); 6286 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 6287 6288 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 6289 return DAG.getMergeValues(Ops, dl); 6290 } 6291 6292 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 6293 if (Op.getOperand(1).getValueType().isVector()) 6294 return LowerVectorStore(Op, DAG); 6295 6296 assert(Op.getOperand(1).getValueType() == MVT::i1 && 6297 "Custom lowering only for i1 stores"); 6298 6299 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 6300 6301 SDLoc dl(Op); 6302 StoreSDNode *ST = cast<StoreSDNode>(Op); 6303 6304 SDValue Chain = ST->getChain(); 6305 SDValue BasePtr = ST->getBasePtr(); 6306 SDValue Value = ST->getValue(); 6307 MachineMemOperand *MMO = ST->getMemOperand(); 6308 6309 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), 6310 Value); 6311 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 6312 } 6313 6314 // FIXME: Remove this once the ANDI glue bug is fixed: 6315 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 6316 assert(Op.getValueType() == MVT::i1 && 6317 "Custom lowering only for i1 results"); 6318 6319 SDLoc DL(Op); 6320 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 6321 Op.getOperand(0)); 6322 } 6323 6324 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 6325 /// possible. 6326 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 6327 // Not FP? Not a fsel. 6328 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 6329 !Op.getOperand(2).getValueType().isFloatingPoint()) 6330 return Op; 6331 6332 // We might be able to do better than this under some circumstances, but in 6333 // general, fsel-based lowering of select is a finite-math-only optimization. 6334 // For more information, see section F.3 of the 2.06 ISA specification. 6335 if (!DAG.getTarget().Options.NoInfsFPMath || 6336 !DAG.getTarget().Options.NoNaNsFPMath) 6337 return Op; 6338 // TODO: Propagate flags from the select rather than global settings. 6339 SDNodeFlags Flags; 6340 Flags.setNoInfs(true); 6341 Flags.setNoNaNs(true); 6342 6343 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 6344 6345 EVT ResVT = Op.getValueType(); 6346 EVT CmpVT = Op.getOperand(0).getValueType(); 6347 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6348 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 6349 SDLoc dl(Op); 6350 6351 // If the RHS of the comparison is a 0.0, we don't need to do the 6352 // subtraction at all. 6353 SDValue Sel1; 6354 if (isFloatingPointZero(RHS)) 6355 switch (CC) { 6356 default: break; // SETUO etc aren't handled by fsel. 6357 case ISD::SETNE: 6358 std::swap(TV, FV); 6359 case ISD::SETEQ: 6360 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6361 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6362 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6363 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6364 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6365 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6366 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 6367 case ISD::SETULT: 6368 case ISD::SETLT: 6369 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6370 case ISD::SETOGE: 6371 case ISD::SETGE: 6372 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6373 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6374 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 6375 case ISD::SETUGT: 6376 case ISD::SETGT: 6377 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 6378 case ISD::SETOLE: 6379 case ISD::SETLE: 6380 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 6381 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 6382 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6383 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 6384 } 6385 6386 SDValue Cmp; 6387 switch (CC) { 6388 default: break; // SETUO etc aren't handled by fsel. 6389 case ISD::SETNE: 6390 std::swap(TV, FV); 6391 case ISD::SETEQ: 6392 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6393 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6394 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6395 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6396 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 6397 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 6398 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 6399 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 6400 case ISD::SETULT: 6401 case ISD::SETLT: 6402 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6403 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6404 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6405 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6406 case ISD::SETOGE: 6407 case ISD::SETGE: 6408 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); 6409 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6410 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6411 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6412 case ISD::SETUGT: 6413 case ISD::SETGT: 6414 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); 6415 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6416 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6417 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 6418 case ISD::SETOLE: 6419 case ISD::SETLE: 6420 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); 6421 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 6422 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 6423 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 6424 } 6425 return Op; 6426 } 6427 6428 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 6429 SelectionDAG &DAG, 6430 const SDLoc &dl) const { 6431 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 6432 SDValue Src = Op.getOperand(0); 6433 if (Src.getValueType() == MVT::f32) 6434 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 6435 6436 SDValue Tmp; 6437 switch (Op.getSimpleValueType().SimpleTy) { 6438 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 6439 case MVT::i32: 6440 Tmp = DAG.getNode( 6441 Op.getOpcode() == ISD::FP_TO_SINT 6442 ? PPCISD::FCTIWZ 6443 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 6444 dl, MVT::f64, Src); 6445 break; 6446 case MVT::i64: 6447 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 6448 "i64 FP_TO_UINT is supported only with FPCVT"); 6449 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 6450 PPCISD::FCTIDUZ, 6451 dl, MVT::f64, Src); 6452 break; 6453 } 6454 6455 // Convert the FP value to an int value through memory. 6456 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 6457 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 6458 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 6459 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 6460 MachinePointerInfo MPI = 6461 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); 6462 6463 // Emit a store to the stack slot. 6464 SDValue Chain; 6465 if (i32Stack) { 6466 MachineFunction &MF = DAG.getMachineFunction(); 6467 MachineMemOperand *MMO = 6468 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 6469 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 6470 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 6471 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 6472 } else 6473 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); 6474 6475 // Result is a load from the stack slot. If loading 4 bytes, make sure to 6476 // add in a bias on big endian. 6477 if (Op.getValueType() == MVT::i32 && !i32Stack) { 6478 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 6479 DAG.getConstant(4, dl, FIPtr.getValueType())); 6480 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); 6481 } 6482 6483 RLI.Chain = Chain; 6484 RLI.Ptr = FIPtr; 6485 RLI.MPI = MPI; 6486 } 6487 6488 /// \brief Custom lowers floating point to integer conversions to use 6489 /// the direct move instructions available in ISA 2.07 to avoid the 6490 /// need for load/store combinations. 6491 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, 6492 SelectionDAG &DAG, 6493 const SDLoc &dl) const { 6494 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 6495 SDValue Src = Op.getOperand(0); 6496 6497 if (Src.getValueType() == MVT::f32) 6498 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 6499 6500 SDValue Tmp; 6501 switch (Op.getSimpleValueType().SimpleTy) { 6502 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 6503 case MVT::i32: 6504 Tmp = DAG.getNode( 6505 Op.getOpcode() == ISD::FP_TO_SINT 6506 ? PPCISD::FCTIWZ 6507 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), 6508 dl, MVT::f64, Src); 6509 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); 6510 break; 6511 case MVT::i64: 6512 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 6513 "i64 FP_TO_UINT is supported only with FPCVT"); 6514 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 6515 PPCISD::FCTIDUZ, 6516 dl, MVT::f64, Src); 6517 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); 6518 break; 6519 } 6520 return Tmp; 6521 } 6522 6523 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 6524 const SDLoc &dl) const { 6525 if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) 6526 return LowerFP_TO_INTDirectMove(Op, DAG, dl); 6527 6528 ReuseLoadInfo RLI; 6529 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 6530 6531 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, 6532 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); 6533 } 6534 6535 // We're trying to insert a regular store, S, and then a load, L. If the 6536 // incoming value, O, is a load, we might just be able to have our load use the 6537 // address used by O. However, we don't know if anything else will store to 6538 // that address before we can load from it. To prevent this situation, we need 6539 // to insert our load, L, into the chain as a peer of O. To do this, we give L 6540 // the same chain operand as O, we create a token factor from the chain results 6541 // of O and L, and we replace all uses of O's chain result with that token 6542 // factor (see spliceIntoChain below for this last part). 6543 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, 6544 ReuseLoadInfo &RLI, 6545 SelectionDAG &DAG, 6546 ISD::LoadExtType ET) const { 6547 SDLoc dl(Op); 6548 if (ET == ISD::NON_EXTLOAD && 6549 (Op.getOpcode() == ISD::FP_TO_UINT || 6550 Op.getOpcode() == ISD::FP_TO_SINT) && 6551 isOperationLegalOrCustom(Op.getOpcode(), 6552 Op.getOperand(0).getValueType())) { 6553 6554 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 6555 return true; 6556 } 6557 6558 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); 6559 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || 6560 LD->isNonTemporal()) 6561 return false; 6562 if (LD->getMemoryVT() != MemVT) 6563 return false; 6564 6565 RLI.Ptr = LD->getBasePtr(); 6566 if (LD->isIndexed() && !LD->getOffset().isUndef()) { 6567 assert(LD->getAddressingMode() == ISD::PRE_INC && 6568 "Non-pre-inc AM on PPC?"); 6569 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, 6570 LD->getOffset()); 6571 } 6572 6573 RLI.Chain = LD->getChain(); 6574 RLI.MPI = LD->getPointerInfo(); 6575 RLI.IsDereferenceable = LD->isDereferenceable(); 6576 RLI.IsInvariant = LD->isInvariant(); 6577 RLI.Alignment = LD->getAlignment(); 6578 RLI.AAInfo = LD->getAAInfo(); 6579 RLI.Ranges = LD->getRanges(); 6580 6581 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); 6582 return true; 6583 } 6584 6585 // Given the head of the old chain, ResChain, insert a token factor containing 6586 // it and NewResChain, and make users of ResChain now be users of that token 6587 // factor. 6588 void PPCTargetLowering::spliceIntoChain(SDValue ResChain, 6589 SDValue NewResChain, 6590 SelectionDAG &DAG) const { 6591 if (!ResChain) 6592 return; 6593 6594 SDLoc dl(NewResChain); 6595 6596 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 6597 NewResChain, DAG.getUNDEF(MVT::Other)); 6598 assert(TF.getNode() != NewResChain.getNode() && 6599 "A new TF really is required here"); 6600 6601 DAG.ReplaceAllUsesOfValueWith(ResChain, TF); 6602 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); 6603 } 6604 6605 /// \brief Analyze profitability of direct move 6606 /// prefer float load to int load plus direct move 6607 /// when there is no integer use of int load 6608 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const { 6609 SDNode *Origin = Op.getOperand(0).getNode(); 6610 if (Origin->getOpcode() != ISD::LOAD) 6611 return true; 6612 6613 // If there is no LXSIBZX/LXSIHZX, like Power8, 6614 // prefer direct move if the memory size is 1 or 2 bytes. 6615 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand(); 6616 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2) 6617 return true; 6618 6619 for (SDNode::use_iterator UI = Origin->use_begin(), 6620 UE = Origin->use_end(); 6621 UI != UE; ++UI) { 6622 6623 // Only look at the users of the loaded value. 6624 if (UI.getUse().get().getResNo() != 0) 6625 continue; 6626 6627 if (UI->getOpcode() != ISD::SINT_TO_FP && 6628 UI->getOpcode() != ISD::UINT_TO_FP) 6629 return true; 6630 } 6631 6632 return false; 6633 } 6634 6635 /// \brief Custom lowers integer to floating point conversions to use 6636 /// the direct move instructions available in ISA 2.07 to avoid the 6637 /// need for load/store combinations. 6638 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, 6639 SelectionDAG &DAG, 6640 const SDLoc &dl) const { 6641 assert((Op.getValueType() == MVT::f32 || 6642 Op.getValueType() == MVT::f64) && 6643 "Invalid floating point type as target of conversion"); 6644 assert(Subtarget.hasFPCVT() && 6645 "Int to FP conversions with direct moves require FPCVT"); 6646 SDValue FP; 6647 SDValue Src = Op.getOperand(0); 6648 bool SinglePrec = Op.getValueType() == MVT::f32; 6649 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; 6650 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; 6651 unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : 6652 (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); 6653 6654 if (WordInt) { 6655 FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, 6656 dl, MVT::f64, Src); 6657 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 6658 } 6659 else { 6660 FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); 6661 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); 6662 } 6663 6664 return FP; 6665 } 6666 6667 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 6668 SelectionDAG &DAG) const { 6669 SDLoc dl(Op); 6670 6671 if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { 6672 if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) 6673 return SDValue(); 6674 6675 SDValue Value = Op.getOperand(0); 6676 // The values are now known to be -1 (false) or 1 (true). To convert this 6677 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 6678 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 6679 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 6680 6681 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 6682 6683 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 6684 6685 if (Op.getValueType() != MVT::v4f64) 6686 Value = DAG.getNode(ISD::FP_ROUND, dl, 6687 Op.getValueType(), Value, 6688 DAG.getIntPtrConstant(1, dl)); 6689 return Value; 6690 } 6691 6692 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 6693 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 6694 return SDValue(); 6695 6696 if (Op.getOperand(0).getValueType() == MVT::i1) 6697 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 6698 DAG.getConstantFP(1.0, dl, Op.getValueType()), 6699 DAG.getConstantFP(0.0, dl, Op.getValueType())); 6700 6701 // If we have direct moves, we can do all the conversion, skip the store/load 6702 // however, without FPCVT we can't do most conversions. 6703 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && 6704 Subtarget.isPPC64() && Subtarget.hasFPCVT()) 6705 return LowerINT_TO_FPDirectMove(Op, DAG, dl); 6706 6707 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 6708 "UINT_TO_FP is supported only with FPCVT"); 6709 6710 // If we have FCFIDS, then use it when converting to single-precision. 6711 // Otherwise, convert to double-precision and then round. 6712 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6713 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 6714 : PPCISD::FCFIDS) 6715 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 6716 : PPCISD::FCFID); 6717 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 6718 ? MVT::f32 6719 : MVT::f64; 6720 6721 if (Op.getOperand(0).getValueType() == MVT::i64) { 6722 SDValue SINT = Op.getOperand(0); 6723 // When converting to single-precision, we actually need to convert 6724 // to double-precision first and then round to single-precision. 6725 // To avoid double-rounding effects during that operation, we have 6726 // to prepare the input operand. Bits that might be truncated when 6727 // converting to double-precision are replaced by a bit that won't 6728 // be lost at this stage, but is below the single-precision rounding 6729 // position. 6730 // 6731 // However, if -enable-unsafe-fp-math is in effect, accept double 6732 // rounding to avoid the extra overhead. 6733 if (Op.getValueType() == MVT::f32 && 6734 !Subtarget.hasFPCVT() && 6735 !DAG.getTarget().Options.UnsafeFPMath) { 6736 6737 // Twiddle input to make sure the low 11 bits are zero. (If this 6738 // is the case, we are guaranteed the value will fit into the 53 bit 6739 // mantissa of an IEEE double-precision value without rounding.) 6740 // If any of those low 11 bits were not zero originally, make sure 6741 // bit 12 (value 2048) is set instead, so that the final rounding 6742 // to single-precision gets the correct result. 6743 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6744 SINT, DAG.getConstant(2047, dl, MVT::i64)); 6745 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 6746 Round, DAG.getConstant(2047, dl, MVT::i64)); 6747 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 6748 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 6749 Round, DAG.getConstant(-2048, dl, MVT::i64)); 6750 6751 // However, we cannot use that value unconditionally: if the magnitude 6752 // of the input value is small, the bit-twiddling we did above might 6753 // end up visibly changing the output. Fortunately, in that case, we 6754 // don't need to twiddle bits since the original input will convert 6755 // exactly to double-precision floating-point already. Therefore, 6756 // construct a conditional to use the original value if the top 11 6757 // bits are all sign-bit copies, and use the rounded value computed 6758 // above otherwise. 6759 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 6760 SINT, DAG.getConstant(53, dl, MVT::i32)); 6761 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 6762 Cond, DAG.getConstant(1, dl, MVT::i64)); 6763 Cond = DAG.getSetCC(dl, MVT::i32, 6764 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); 6765 6766 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 6767 } 6768 6769 ReuseLoadInfo RLI; 6770 SDValue Bits; 6771 6772 MachineFunction &MF = DAG.getMachineFunction(); 6773 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { 6774 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, 6775 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); 6776 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6777 } else if (Subtarget.hasLFIWAX() && 6778 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { 6779 MachineMemOperand *MMO = 6780 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6781 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6782 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6783 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, 6784 DAG.getVTList(MVT::f64, MVT::Other), 6785 Ops, MVT::i32, MMO); 6786 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6787 } else if (Subtarget.hasFPCVT() && 6788 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { 6789 MachineMemOperand *MMO = 6790 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6791 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6792 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6793 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, 6794 DAG.getVTList(MVT::f64, MVT::Other), 6795 Ops, MVT::i32, MMO); 6796 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 6797 } else if (((Subtarget.hasLFIWAX() && 6798 SINT.getOpcode() == ISD::SIGN_EXTEND) || 6799 (Subtarget.hasFPCVT() && 6800 SINT.getOpcode() == ISD::ZERO_EXTEND)) && 6801 SINT.getOperand(0).getValueType() == MVT::i32) { 6802 MachineFrameInfo &MFI = MF.getFrameInfo(); 6803 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 6804 6805 int FrameIdx = MFI.CreateStackObject(4, 4, false); 6806 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6807 6808 SDValue Store = 6809 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, 6810 MachinePointerInfo::getFixedStack( 6811 DAG.getMachineFunction(), FrameIdx)); 6812 6813 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6814 "Expected an i32 store"); 6815 6816 RLI.Ptr = FIdx; 6817 RLI.Chain = Store; 6818 RLI.MPI = 6819 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 6820 RLI.Alignment = 4; 6821 6822 MachineMemOperand *MMO = 6823 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6824 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6825 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6826 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? 6827 PPCISD::LFIWZX : PPCISD::LFIWAX, 6828 dl, DAG.getVTList(MVT::f64, MVT::Other), 6829 Ops, MVT::i32, MMO); 6830 } else 6831 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 6832 6833 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 6834 6835 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6836 FP = DAG.getNode(ISD::FP_ROUND, dl, 6837 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 6838 return FP; 6839 } 6840 6841 assert(Op.getOperand(0).getValueType() == MVT::i32 && 6842 "Unhandled INT_TO_FP type in custom expander!"); 6843 // Since we only generate this in 64-bit mode, we can take advantage of 6844 // 64-bit registers. In particular, sign extend the input value into the 6845 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 6846 // then lfd it and fcfid it. 6847 MachineFunction &MF = DAG.getMachineFunction(); 6848 MachineFrameInfo &MFI = MF.getFrameInfo(); 6849 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6850 6851 SDValue Ld; 6852 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 6853 ReuseLoadInfo RLI; 6854 bool ReusingLoad; 6855 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, 6856 DAG))) { 6857 int FrameIdx = MFI.CreateStackObject(4, 4, false); 6858 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6859 6860 SDValue Store = 6861 DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 6862 MachinePointerInfo::getFixedStack( 6863 DAG.getMachineFunction(), FrameIdx)); 6864 6865 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 6866 "Expected an i32 store"); 6867 6868 RLI.Ptr = FIdx; 6869 RLI.Chain = Store; 6870 RLI.MPI = 6871 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 6872 RLI.Alignment = 4; 6873 } 6874 6875 MachineMemOperand *MMO = 6876 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 6877 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 6878 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 6879 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 6880 PPCISD::LFIWZX : PPCISD::LFIWAX, 6881 dl, DAG.getVTList(MVT::f64, MVT::Other), 6882 Ops, MVT::i32, MMO); 6883 if (ReusingLoad) 6884 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); 6885 } else { 6886 assert(Subtarget.isPPC64() && 6887 "i32->FP without LFIWAX supported only on PPC64"); 6888 6889 int FrameIdx = MFI.CreateStackObject(8, 8, false); 6890 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6891 6892 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 6893 Op.getOperand(0)); 6894 6895 // STD the extended value into the stack slot. 6896 SDValue Store = DAG.getStore( 6897 DAG.getEntryNode(), dl, Ext64, FIdx, 6898 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 6899 6900 // Load the value as a double. 6901 Ld = DAG.getLoad( 6902 MVT::f64, dl, Store, FIdx, 6903 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); 6904 } 6905 6906 // FCFID it and return it. 6907 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 6908 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 6909 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, 6910 DAG.getIntPtrConstant(0, dl)); 6911 return FP; 6912 } 6913 6914 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 6915 SelectionDAG &DAG) const { 6916 SDLoc dl(Op); 6917 /* 6918 The rounding mode is in bits 30:31 of FPSR, and has the following 6919 settings: 6920 00 Round to nearest 6921 01 Round to 0 6922 10 Round to +inf 6923 11 Round to -inf 6924 6925 FLT_ROUNDS, on the other hand, expects the following: 6926 -1 Undefined 6927 0 Round to 0 6928 1 Round to nearest 6929 2 Round to +inf 6930 3 Round to -inf 6931 6932 To perform the conversion, we do: 6933 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 6934 */ 6935 6936 MachineFunction &MF = DAG.getMachineFunction(); 6937 EVT VT = Op.getValueType(); 6938 EVT PtrVT = getPointerTy(MF.getDataLayout()); 6939 6940 // Save FP Control Word to register 6941 EVT NodeTys[] = { 6942 MVT::f64, // return register 6943 MVT::Glue // unused in this context 6944 }; 6945 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 6946 6947 // Save FP register to stack slot 6948 int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false); 6949 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 6950 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, 6951 MachinePointerInfo()); 6952 6953 // Load FP Control Word from low 32 bits of stack slot. 6954 SDValue Four = DAG.getConstant(4, dl, PtrVT); 6955 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 6956 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); 6957 6958 // Transform as necessary 6959 SDValue CWD1 = 6960 DAG.getNode(ISD::AND, dl, MVT::i32, 6961 CWD, DAG.getConstant(3, dl, MVT::i32)); 6962 SDValue CWD2 = 6963 DAG.getNode(ISD::SRL, dl, MVT::i32, 6964 DAG.getNode(ISD::AND, dl, MVT::i32, 6965 DAG.getNode(ISD::XOR, dl, MVT::i32, 6966 CWD, DAG.getConstant(3, dl, MVT::i32)), 6967 DAG.getConstant(3, dl, MVT::i32)), 6968 DAG.getConstant(1, dl, MVT::i32)); 6969 6970 SDValue RetVal = 6971 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 6972 6973 return DAG.getNode((VT.getSizeInBits() < 16 ? 6974 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 6975 } 6976 6977 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 6978 EVT VT = Op.getValueType(); 6979 unsigned BitWidth = VT.getSizeInBits(); 6980 SDLoc dl(Op); 6981 assert(Op.getNumOperands() == 3 && 6982 VT == Op.getOperand(1).getValueType() && 6983 "Unexpected SHL!"); 6984 6985 // Expand into a bunch of logical ops. Note that these ops 6986 // depend on the PPC behavior for oversized shift amounts. 6987 SDValue Lo = Op.getOperand(0); 6988 SDValue Hi = Op.getOperand(1); 6989 SDValue Amt = Op.getOperand(2); 6990 EVT AmtVT = Amt.getValueType(); 6991 6992 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 6993 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 6994 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 6995 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 6996 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 6997 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 6998 DAG.getConstant(-BitWidth, dl, AmtVT)); 6999 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 7000 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 7001 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 7002 SDValue OutOps[] = { OutLo, OutHi }; 7003 return DAG.getMergeValues(OutOps, dl); 7004 } 7005 7006 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 7007 EVT VT = Op.getValueType(); 7008 SDLoc dl(Op); 7009 unsigned BitWidth = VT.getSizeInBits(); 7010 assert(Op.getNumOperands() == 3 && 7011 VT == Op.getOperand(1).getValueType() && 7012 "Unexpected SRL!"); 7013 7014 // Expand into a bunch of logical ops. Note that these ops 7015 // depend on the PPC behavior for oversized shift amounts. 7016 SDValue Lo = Op.getOperand(0); 7017 SDValue Hi = Op.getOperand(1); 7018 SDValue Amt = Op.getOperand(2); 7019 EVT AmtVT = Amt.getValueType(); 7020 7021 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 7022 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 7023 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 7024 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 7025 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 7026 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 7027 DAG.getConstant(-BitWidth, dl, AmtVT)); 7028 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 7029 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 7030 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 7031 SDValue OutOps[] = { OutLo, OutHi }; 7032 return DAG.getMergeValues(OutOps, dl); 7033 } 7034 7035 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 7036 SDLoc dl(Op); 7037 EVT VT = Op.getValueType(); 7038 unsigned BitWidth = VT.getSizeInBits(); 7039 assert(Op.getNumOperands() == 3 && 7040 VT == Op.getOperand(1).getValueType() && 7041 "Unexpected SRA!"); 7042 7043 // Expand into a bunch of logical ops, followed by a select_cc. 7044 SDValue Lo = Op.getOperand(0); 7045 SDValue Hi = Op.getOperand(1); 7046 SDValue Amt = Op.getOperand(2); 7047 EVT AmtVT = Amt.getValueType(); 7048 7049 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 7050 DAG.getConstant(BitWidth, dl, AmtVT), Amt); 7051 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 7052 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 7053 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 7054 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 7055 DAG.getConstant(-BitWidth, dl, AmtVT)); 7056 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 7057 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 7058 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), 7059 Tmp4, Tmp6, ISD::SETLE); 7060 SDValue OutOps[] = { OutLo, OutHi }; 7061 return DAG.getMergeValues(OutOps, dl); 7062 } 7063 7064 //===----------------------------------------------------------------------===// 7065 // Vector related lowering. 7066 // 7067 7068 /// BuildSplatI - Build a canonical splati of Val with an element size of 7069 /// SplatSize. Cast the result to VT. 7070 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 7071 SelectionDAG &DAG, const SDLoc &dl) { 7072 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 7073 7074 static const MVT VTys[] = { // canonical VT to use for each size. 7075 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 7076 }; 7077 7078 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 7079 7080 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 7081 if (Val == -1) 7082 SplatSize = 1; 7083 7084 EVT CanonicalVT = VTys[SplatSize-1]; 7085 7086 // Build a canonical splat for this value. 7087 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); 7088 } 7089 7090 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the 7091 /// specified intrinsic ID. 7092 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, 7093 const SDLoc &dl, EVT DestVT = MVT::Other) { 7094 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 7095 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7096 DAG.getConstant(IID, dl, MVT::i32), Op); 7097 } 7098 7099 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 7100 /// specified intrinsic ID. 7101 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 7102 SelectionDAG &DAG, const SDLoc &dl, 7103 EVT DestVT = MVT::Other) { 7104 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 7105 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7106 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); 7107 } 7108 7109 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 7110 /// specified intrinsic ID. 7111 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 7112 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, 7113 EVT DestVT = MVT::Other) { 7114 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 7115 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 7116 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); 7117 } 7118 7119 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 7120 /// amount. The result has the specified value type. 7121 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, 7122 SelectionDAG &DAG, const SDLoc &dl) { 7123 // Force LHS/RHS to be the right type. 7124 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 7125 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 7126 7127 int Ops[16]; 7128 for (unsigned i = 0; i != 16; ++i) 7129 Ops[i] = i + Amt; 7130 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 7131 return DAG.getNode(ISD::BITCAST, dl, VT, T); 7132 } 7133 7134 /// Do we have an efficient pattern in a .td file for this node? 7135 /// 7136 /// \param V - pointer to the BuildVectorSDNode being matched 7137 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves? 7138 /// 7139 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR 7140 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where 7141 /// the opposite is true (expansion is beneficial) are: 7142 /// - The node builds a vector out of integers that are not 32 or 64-bits 7143 /// - The node builds a vector out of constants 7144 /// - The node is a "load-and-splat" 7145 /// In all other cases, we will choose to keep the BUILD_VECTOR. 7146 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, 7147 bool HasDirectMove) { 7148 EVT VecVT = V->getValueType(0); 7149 bool RightType = VecVT == MVT::v2f64 || VecVT == MVT::v4f32 || 7150 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32)); 7151 if (!RightType) 7152 return false; 7153 7154 bool IsSplat = true; 7155 bool IsLoad = false; 7156 SDValue Op0 = V->getOperand(0); 7157 7158 // This function is called in a block that confirms the node is not a constant 7159 // splat. So a constant BUILD_VECTOR here means the vector is built out of 7160 // different constants. 7161 if (V->isConstant()) 7162 return false; 7163 for (int i = 0, e = V->getNumOperands(); i < e; ++i) { 7164 if (V->getOperand(i).isUndef()) 7165 return false; 7166 // We want to expand nodes that represent load-and-splat even if the 7167 // loaded value is a floating point truncation or conversion to int. 7168 if (V->getOperand(i).getOpcode() == ISD::LOAD || 7169 (V->getOperand(i).getOpcode() == ISD::FP_ROUND && 7170 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || 7171 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT && 7172 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || 7173 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT && 7174 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD)) 7175 IsLoad = true; 7176 // If the operands are different or the input is not a load and has more 7177 // uses than just this BV node, then it isn't a splat. 7178 if (V->getOperand(i) != Op0 || 7179 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode()))) 7180 IsSplat = false; 7181 } 7182 return !(IsSplat && IsLoad); 7183 } 7184 7185 // If this is a case we can't handle, return null and let the default 7186 // expansion code take care of it. If we CAN select this case, and if it 7187 // selects to a single instruction, return Op. Otherwise, if we can codegen 7188 // this case more efficiently than a constant pool load, lower it to the 7189 // sequence of ops that should be used. 7190 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 7191 SelectionDAG &DAG) const { 7192 SDLoc dl(Op); 7193 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 7194 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 7195 7196 if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { 7197 // We first build an i32 vector, load it into a QPX register, 7198 // then convert it to a floating-point vector and compare it 7199 // to a zero vector to get the boolean result. 7200 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 7201 int FrameIdx = MFI.CreateStackObject(16, 16, false); 7202 MachinePointerInfo PtrInfo = 7203 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 7204 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7205 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 7206 7207 assert(BVN->getNumOperands() == 4 && 7208 "BUILD_VECTOR for v4i1 does not have 4 operands"); 7209 7210 bool IsConst = true; 7211 for (unsigned i = 0; i < 4; ++i) { 7212 if (BVN->getOperand(i).isUndef()) continue; 7213 if (!isa<ConstantSDNode>(BVN->getOperand(i))) { 7214 IsConst = false; 7215 break; 7216 } 7217 } 7218 7219 if (IsConst) { 7220 Constant *One = 7221 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); 7222 Constant *NegOne = 7223 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); 7224 7225 Constant *CV[4]; 7226 for (unsigned i = 0; i < 4; ++i) { 7227 if (BVN->getOperand(i).isUndef()) 7228 CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); 7229 else if (isNullConstant(BVN->getOperand(i))) 7230 CV[i] = NegOne; 7231 else 7232 CV[i] = One; 7233 } 7234 7235 Constant *CP = ConstantVector::get(CV); 7236 SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), 7237 16 /* alignment */); 7238 7239 SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; 7240 SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other}); 7241 return DAG.getMemIntrinsicNode( 7242 PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, 7243 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 7244 } 7245 7246 SmallVector<SDValue, 4> Stores; 7247 for (unsigned i = 0; i < 4; ++i) { 7248 if (BVN->getOperand(i).isUndef()) continue; 7249 7250 unsigned Offset = 4*i; 7251 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 7252 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 7253 7254 unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); 7255 if (StoreSize > 4) { 7256 Stores.push_back( 7257 DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx, 7258 PtrInfo.getWithOffset(Offset), MVT::i32)); 7259 } else { 7260 SDValue StoreValue = BVN->getOperand(i); 7261 if (StoreSize < 4) 7262 StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); 7263 7264 Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx, 7265 PtrInfo.getWithOffset(Offset))); 7266 } 7267 } 7268 7269 SDValue StoreChain; 7270 if (!Stores.empty()) 7271 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 7272 else 7273 StoreChain = DAG.getEntryNode(); 7274 7275 // Now load from v4i32 into the QPX register; this will extend it to 7276 // v4i64 but not yet convert it to a floating point. Nevertheless, this 7277 // is typed as v4f64 because the QPX register integer states are not 7278 // explicitly represented. 7279 7280 SDValue Ops[] = {StoreChain, 7281 DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32), 7282 FIdx}; 7283 SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other}); 7284 7285 SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, 7286 dl, VTs, Ops, MVT::v4i32, PtrInfo); 7287 LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 7288 DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), 7289 LoadedVect); 7290 7291 SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); 7292 7293 return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); 7294 } 7295 7296 // All other QPX vectors are handled by generic code. 7297 if (Subtarget.hasQPX()) 7298 return SDValue(); 7299 7300 // Check if this is a splat of a constant value. 7301 APInt APSplatBits, APSplatUndef; 7302 unsigned SplatBitSize; 7303 bool HasAnyUndefs; 7304 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 7305 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || 7306 SplatBitSize > 32) { 7307 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be 7308 // lowered to VSX instructions under certain conditions. 7309 // Without VSX, there is no pattern more efficient than expanding the node. 7310 if (Subtarget.hasVSX() && 7311 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove())) 7312 return Op; 7313 return SDValue(); 7314 } 7315 7316 unsigned SplatBits = APSplatBits.getZExtValue(); 7317 unsigned SplatUndef = APSplatUndef.getZExtValue(); 7318 unsigned SplatSize = SplatBitSize / 8; 7319 7320 // First, handle single instruction cases. 7321 7322 // All zeros? 7323 if (SplatBits == 0) { 7324 // Canonicalize all zero vectors to be v4i32. 7325 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 7326 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); 7327 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 7328 } 7329 return Op; 7330 } 7331 7332 // We have XXSPLTIB for constant splats one byte wide 7333 if (Subtarget.hasP9Vector() && SplatSize == 1) { 7334 // This is a splat of 1-byte elements with some elements potentially undef. 7335 // Rather than trying to match undef in the SDAG patterns, ensure that all 7336 // elements are the same constant. 7337 if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) { 7338 SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits, 7339 dl, MVT::i32)); 7340 SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops); 7341 if (Op.getValueType() != MVT::v16i8) 7342 return DAG.getBitcast(Op.getValueType(), NewBV); 7343 return NewBV; 7344 } 7345 return Op; 7346 } 7347 7348 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 7349 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 7350 (32-SplatBitSize)); 7351 if (SextVal >= -16 && SextVal <= 15) 7352 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 7353 7354 // Two instruction sequences. 7355 7356 // If this value is in the range [-32,30] and is even, use: 7357 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 7358 // If this value is in the range [17,31] and is odd, use: 7359 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 7360 // If this value is in the range [-31,-17] and is odd, use: 7361 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 7362 // Note the last two are three-instruction sequences. 7363 if (SextVal >= -32 && SextVal <= 31) { 7364 // To avoid having these optimizations undone by constant folding, 7365 // we convert to a pseudo that will be expanded later into one of 7366 // the above forms. 7367 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); 7368 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 7369 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 7370 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); 7371 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 7372 if (VT == Op.getValueType()) 7373 return RetVal; 7374 else 7375 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 7376 } 7377 7378 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 7379 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 7380 // for fneg/fabs. 7381 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 7382 // Make -1 and vspltisw -1: 7383 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 7384 7385 // Make the VSLW intrinsic, computing 0x8000_0000. 7386 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 7387 OnesV, DAG, dl); 7388 7389 // xor by OnesV to invert it. 7390 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 7391 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7392 } 7393 7394 // Check to see if this is a wide variety of vsplti*, binop self cases. 7395 static const signed char SplatCsts[] = { 7396 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 7397 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 7398 }; 7399 7400 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 7401 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 7402 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 7403 int i = SplatCsts[idx]; 7404 7405 // Figure out what shift amount will be used by altivec if shifted by i in 7406 // this splat size. 7407 unsigned TypeShiftAmt = i & (SplatBitSize-1); 7408 7409 // vsplti + shl self. 7410 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 7411 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7412 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7413 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 7414 Intrinsic::ppc_altivec_vslw 7415 }; 7416 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7417 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7418 } 7419 7420 // vsplti + srl self. 7421 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 7422 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7423 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7424 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 7425 Intrinsic::ppc_altivec_vsrw 7426 }; 7427 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7428 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7429 } 7430 7431 // vsplti + sra self. 7432 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 7433 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7434 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7435 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 7436 Intrinsic::ppc_altivec_vsraw 7437 }; 7438 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7439 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7440 } 7441 7442 // vsplti + rol self. 7443 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 7444 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 7445 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 7446 static const unsigned IIDs[] = { // Intrinsic to use for each size. 7447 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 7448 Intrinsic::ppc_altivec_vrlw 7449 }; 7450 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 7451 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 7452 } 7453 7454 // t = vsplti c, result = vsldoi t, t, 1 7455 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 7456 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7457 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; 7458 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7459 } 7460 // t = vsplti c, result = vsldoi t, t, 2 7461 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 7462 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7463 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; 7464 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7465 } 7466 // t = vsplti c, result = vsldoi t, t, 3 7467 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 7468 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 7469 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; 7470 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); 7471 } 7472 } 7473 7474 return SDValue(); 7475 } 7476 7477 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 7478 /// the specified operations to build the shuffle. 7479 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 7480 SDValue RHS, SelectionDAG &DAG, 7481 const SDLoc &dl) { 7482 unsigned OpNum = (PFEntry >> 26) & 0x0F; 7483 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 7484 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 7485 7486 enum { 7487 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 7488 OP_VMRGHW, 7489 OP_VMRGLW, 7490 OP_VSPLTISW0, 7491 OP_VSPLTISW1, 7492 OP_VSPLTISW2, 7493 OP_VSPLTISW3, 7494 OP_VSLDOI4, 7495 OP_VSLDOI8, 7496 OP_VSLDOI12 7497 }; 7498 7499 if (OpNum == OP_COPY) { 7500 if (LHSID == (1*9+2)*9+3) return LHS; 7501 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 7502 return RHS; 7503 } 7504 7505 SDValue OpLHS, OpRHS; 7506 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 7507 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 7508 7509 int ShufIdxs[16]; 7510 switch (OpNum) { 7511 default: llvm_unreachable("Unknown i32 permute!"); 7512 case OP_VMRGHW: 7513 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 7514 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 7515 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 7516 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 7517 break; 7518 case OP_VMRGLW: 7519 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 7520 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 7521 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 7522 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 7523 break; 7524 case OP_VSPLTISW0: 7525 for (unsigned i = 0; i != 16; ++i) 7526 ShufIdxs[i] = (i&3)+0; 7527 break; 7528 case OP_VSPLTISW1: 7529 for (unsigned i = 0; i != 16; ++i) 7530 ShufIdxs[i] = (i&3)+4; 7531 break; 7532 case OP_VSPLTISW2: 7533 for (unsigned i = 0; i != 16; ++i) 7534 ShufIdxs[i] = (i&3)+8; 7535 break; 7536 case OP_VSPLTISW3: 7537 for (unsigned i = 0; i != 16; ++i) 7538 ShufIdxs[i] = (i&3)+12; 7539 break; 7540 case OP_VSLDOI4: 7541 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 7542 case OP_VSLDOI8: 7543 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 7544 case OP_VSLDOI12: 7545 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 7546 } 7547 EVT VT = OpLHS.getValueType(); 7548 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 7549 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 7550 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 7551 return DAG.getNode(ISD::BITCAST, dl, VT, T); 7552 } 7553 7554 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 7555 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 7556 /// return the code it can be lowered into. Worst case, it can always be 7557 /// lowered into a vperm. 7558 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 7559 SelectionDAG &DAG) const { 7560 SDLoc dl(Op); 7561 SDValue V1 = Op.getOperand(0); 7562 SDValue V2 = Op.getOperand(1); 7563 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 7564 EVT VT = Op.getValueType(); 7565 bool isLittleEndian = Subtarget.isLittleEndian(); 7566 7567 unsigned ShiftElts, InsertAtByte; 7568 bool Swap; 7569 if (Subtarget.hasP9Vector() && 7570 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, 7571 isLittleEndian)) { 7572 if (Swap) 7573 std::swap(V1, V2); 7574 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 7575 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); 7576 if (ShiftElts) { 7577 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, 7578 DAG.getConstant(ShiftElts, dl, MVT::i32)); 7579 SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl, 7580 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 7581 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 7582 } 7583 SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2, 7584 DAG.getConstant(InsertAtByte, dl, MVT::i32)); 7585 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); 7586 } 7587 7588 if (Subtarget.hasVSX()) { 7589 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { 7590 int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); 7591 7592 // If the source for the shuffle is a scalar_to_vector that came from a 7593 // 32-bit load, it will have used LXVWSX so we don't need to splat again. 7594 if (Subtarget.hasP9Vector() && 7595 ((isLittleEndian && SplatIdx == 3) || 7596 (!isLittleEndian && SplatIdx == 0))) { 7597 SDValue Src = V1.getOperand(0); 7598 if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR && 7599 Src.getOperand(0).getOpcode() == ISD::LOAD && 7600 Src.getOperand(0).hasOneUse()) 7601 return V1; 7602 } 7603 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); 7604 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, 7605 DAG.getConstant(SplatIdx, dl, MVT::i32)); 7606 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); 7607 } 7608 7609 // Left shifts of 8 bytes are actually swaps. Convert accordingly. 7610 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { 7611 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); 7612 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); 7613 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); 7614 } 7615 7616 } 7617 7618 if (Subtarget.hasQPX()) { 7619 if (VT.getVectorNumElements() != 4) 7620 return SDValue(); 7621 7622 if (V2.isUndef()) V2 = V1; 7623 7624 int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); 7625 if (AlignIdx != -1) { 7626 return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, 7627 DAG.getConstant(AlignIdx, dl, MVT::i32)); 7628 } else if (SVOp->isSplat()) { 7629 int SplatIdx = SVOp->getSplatIndex(); 7630 if (SplatIdx >= 4) { 7631 std::swap(V1, V2); 7632 SplatIdx -= 4; 7633 } 7634 7635 return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, 7636 DAG.getConstant(SplatIdx, dl, MVT::i32)); 7637 } 7638 7639 // Lower this into a qvgpci/qvfperm pair. 7640 7641 // Compute the qvgpci literal 7642 unsigned idx = 0; 7643 for (unsigned i = 0; i < 4; ++i) { 7644 int m = SVOp->getMaskElt(i); 7645 unsigned mm = m >= 0 ? (unsigned) m : i; 7646 idx |= mm << (3-i)*3; 7647 } 7648 7649 SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, 7650 DAG.getConstant(idx, dl, MVT::i32)); 7651 return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); 7652 } 7653 7654 // Cases that are handled by instructions that take permute immediates 7655 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 7656 // selected by the instruction selector. 7657 if (V2.isUndef()) { 7658 if (PPC::isSplatShuffleMask(SVOp, 1) || 7659 PPC::isSplatShuffleMask(SVOp, 2) || 7660 PPC::isSplatShuffleMask(SVOp, 4) || 7661 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 7662 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 7663 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 7664 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 7665 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 7666 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 7667 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 7668 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 7669 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || 7670 (Subtarget.hasP8Altivec() && ( 7671 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || 7672 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || 7673 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { 7674 return Op; 7675 } 7676 } 7677 7678 // Altivec has a variety of "shuffle immediates" that take two vector inputs 7679 // and produce a fixed permutation. If any of these match, do not lower to 7680 // VPERM. 7681 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 7682 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 7683 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 7684 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 7685 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 7686 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 7687 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 7688 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 7689 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 7690 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || 7691 (Subtarget.hasP8Altivec() && ( 7692 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || 7693 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || 7694 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) 7695 return Op; 7696 7697 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 7698 // perfect shuffle table to emit an optimal matching sequence. 7699 ArrayRef<int> PermMask = SVOp->getMask(); 7700 7701 unsigned PFIndexes[4]; 7702 bool isFourElementShuffle = true; 7703 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 7704 unsigned EltNo = 8; // Start out undef. 7705 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 7706 if (PermMask[i*4+j] < 0) 7707 continue; // Undef, ignore it. 7708 7709 unsigned ByteSource = PermMask[i*4+j]; 7710 if ((ByteSource & 3) != j) { 7711 isFourElementShuffle = false; 7712 break; 7713 } 7714 7715 if (EltNo == 8) { 7716 EltNo = ByteSource/4; 7717 } else if (EltNo != ByteSource/4) { 7718 isFourElementShuffle = false; 7719 break; 7720 } 7721 } 7722 PFIndexes[i] = EltNo; 7723 } 7724 7725 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 7726 // perfect shuffle vector to determine if it is cost effective to do this as 7727 // discrete instructions, or whether we should use a vperm. 7728 // For now, we skip this for little endian until such time as we have a 7729 // little-endian perfect shuffle table. 7730 if (isFourElementShuffle && !isLittleEndian) { 7731 // Compute the index in the perfect shuffle table. 7732 unsigned PFTableIndex = 7733 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 7734 7735 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 7736 unsigned Cost = (PFEntry >> 30); 7737 7738 // Determining when to avoid vperm is tricky. Many things affect the cost 7739 // of vperm, particularly how many times the perm mask needs to be computed. 7740 // For example, if the perm mask can be hoisted out of a loop or is already 7741 // used (perhaps because there are multiple permutes with the same shuffle 7742 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 7743 // the loop requires an extra register. 7744 // 7745 // As a compromise, we only emit discrete instructions if the shuffle can be 7746 // generated in 3 or fewer operations. When we have loop information 7747 // available, if this block is within a loop, we should avoid using vperm 7748 // for 3-operation perms and use a constant pool load instead. 7749 if (Cost < 3) 7750 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 7751 } 7752 7753 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 7754 // vector that will get spilled to the constant pool. 7755 if (V2.isUndef()) V2 = V1; 7756 7757 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 7758 // that it is in input element units, not in bytes. Convert now. 7759 7760 // For little endian, the order of the input vectors is reversed, and 7761 // the permutation mask is complemented with respect to 31. This is 7762 // necessary to produce proper semantics with the big-endian-biased vperm 7763 // instruction. 7764 EVT EltVT = V1.getValueType().getVectorElementType(); 7765 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 7766 7767 SmallVector<SDValue, 16> ResultMask; 7768 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 7769 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 7770 7771 for (unsigned j = 0; j != BytesPerElement; ++j) 7772 if (isLittleEndian) 7773 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), 7774 dl, MVT::i32)); 7775 else 7776 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, 7777 MVT::i32)); 7778 } 7779 7780 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); 7781 if (isLittleEndian) 7782 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7783 V2, V1, VPermMask); 7784 else 7785 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 7786 V1, V2, VPermMask); 7787 } 7788 7789 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a 7790 /// vector comparison. If it is, return true and fill in Opc/isDot with 7791 /// information about the intrinsic. 7792 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, 7793 bool &isDot, const PPCSubtarget &Subtarget) { 7794 unsigned IntrinsicID = 7795 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 7796 CompareOpc = -1; 7797 isDot = false; 7798 switch (IntrinsicID) { 7799 default: return false; 7800 // Comparison predicates. 7801 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 7802 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 7803 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 7804 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 7805 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 7806 case Intrinsic::ppc_altivec_vcmpequd_p: 7807 if (Subtarget.hasP8Altivec()) { 7808 CompareOpc = 199; 7809 isDot = 1; 7810 } else 7811 return false; 7812 7813 break; 7814 case Intrinsic::ppc_altivec_vcmpneb_p: 7815 case Intrinsic::ppc_altivec_vcmpneh_p: 7816 case Intrinsic::ppc_altivec_vcmpnew_p: 7817 case Intrinsic::ppc_altivec_vcmpnezb_p: 7818 case Intrinsic::ppc_altivec_vcmpnezh_p: 7819 case Intrinsic::ppc_altivec_vcmpnezw_p: 7820 if (Subtarget.hasP9Altivec()) { 7821 switch(IntrinsicID) { 7822 default: llvm_unreachable("Unknown comparison intrinsic."); 7823 case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break; 7824 case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break; 7825 case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break; 7826 case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break; 7827 case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break; 7828 case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break; 7829 } 7830 isDot = 1; 7831 } else 7832 return false; 7833 7834 break; 7835 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 7836 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 7837 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 7838 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 7839 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 7840 case Intrinsic::ppc_altivec_vcmpgtsd_p: 7841 if (Subtarget.hasP8Altivec()) { 7842 CompareOpc = 967; 7843 isDot = 1; 7844 } else 7845 return false; 7846 7847 break; 7848 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 7849 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 7850 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 7851 case Intrinsic::ppc_altivec_vcmpgtud_p: 7852 if (Subtarget.hasP8Altivec()) { 7853 CompareOpc = 711; 7854 isDot = 1; 7855 } else 7856 return false; 7857 7858 break; 7859 // VSX predicate comparisons use the same infrastructure 7860 case Intrinsic::ppc_vsx_xvcmpeqdp_p: 7861 case Intrinsic::ppc_vsx_xvcmpgedp_p: 7862 case Intrinsic::ppc_vsx_xvcmpgtdp_p: 7863 case Intrinsic::ppc_vsx_xvcmpeqsp_p: 7864 case Intrinsic::ppc_vsx_xvcmpgesp_p: 7865 case Intrinsic::ppc_vsx_xvcmpgtsp_p: 7866 if (Subtarget.hasVSX()) { 7867 switch (IntrinsicID) { 7868 case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; 7869 case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; 7870 case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; 7871 case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; 7872 case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; 7873 case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; 7874 } 7875 isDot = 1; 7876 } 7877 else 7878 return false; 7879 7880 break; 7881 7882 // Normal Comparisons. 7883 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 7884 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 7885 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 7886 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 7887 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 7888 case Intrinsic::ppc_altivec_vcmpequd: 7889 if (Subtarget.hasP8Altivec()) { 7890 CompareOpc = 199; 7891 isDot = 0; 7892 } else 7893 return false; 7894 7895 break; 7896 case Intrinsic::ppc_altivec_vcmpneb: 7897 case Intrinsic::ppc_altivec_vcmpneh: 7898 case Intrinsic::ppc_altivec_vcmpnew: 7899 case Intrinsic::ppc_altivec_vcmpnezb: 7900 case Intrinsic::ppc_altivec_vcmpnezh: 7901 case Intrinsic::ppc_altivec_vcmpnezw: 7902 if (Subtarget.hasP9Altivec()) { 7903 switch (IntrinsicID) { 7904 default: llvm_unreachable("Unknown comparison intrinsic."); 7905 case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break; 7906 case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break; 7907 case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break; 7908 case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break; 7909 case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break; 7910 case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break; 7911 } 7912 isDot = 0; 7913 } else 7914 return false; 7915 break; 7916 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 7917 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 7918 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 7919 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 7920 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 7921 case Intrinsic::ppc_altivec_vcmpgtsd: 7922 if (Subtarget.hasP8Altivec()) { 7923 CompareOpc = 967; 7924 isDot = 0; 7925 } else 7926 return false; 7927 7928 break; 7929 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 7930 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 7931 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 7932 case Intrinsic::ppc_altivec_vcmpgtud: 7933 if (Subtarget.hasP8Altivec()) { 7934 CompareOpc = 711; 7935 isDot = 0; 7936 } else 7937 return false; 7938 7939 break; 7940 } 7941 return true; 7942 } 7943 7944 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 7945 /// lower, do it, otherwise return null. 7946 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 7947 SelectionDAG &DAG) const { 7948 unsigned IntrinsicID = 7949 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7950 7951 if (IntrinsicID == Intrinsic::thread_pointer) { 7952 // Reads the thread pointer register, used for __builtin_thread_pointer. 7953 bool is64bit = Subtarget.isPPC64(); 7954 return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 7955 is64bit ? MVT::i64 : MVT::i32); 7956 } 7957 7958 // If this is a lowered altivec predicate compare, CompareOpc is set to the 7959 // opcode number of the comparison. 7960 SDLoc dl(Op); 7961 int CompareOpc; 7962 bool isDot; 7963 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) 7964 return SDValue(); // Don't custom lower most intrinsics. 7965 7966 // If this is a non-dot comparison, make the VCMP node and we are done. 7967 if (!isDot) { 7968 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 7969 Op.getOperand(1), Op.getOperand(2), 7970 DAG.getConstant(CompareOpc, dl, MVT::i32)); 7971 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 7972 } 7973 7974 // Create the PPCISD altivec 'dot' comparison node. 7975 SDValue Ops[] = { 7976 Op.getOperand(2), // LHS 7977 Op.getOperand(3), // RHS 7978 DAG.getConstant(CompareOpc, dl, MVT::i32) 7979 }; 7980 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 7981 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 7982 7983 // Now that we have the comparison, emit a copy from the CR to a GPR. 7984 // This is flagged to the above dot comparison. 7985 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 7986 DAG.getRegister(PPC::CR6, MVT::i32), 7987 CompNode.getValue(1)); 7988 7989 // Unpack the result based on how the target uses it. 7990 unsigned BitNo; // Bit # of CR6. 7991 bool InvertBit; // Invert result? 7992 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 7993 default: // Can't happen, don't crash on invalid number though. 7994 case 0: // Return the value of the EQ bit of CR6. 7995 BitNo = 0; InvertBit = false; 7996 break; 7997 case 1: // Return the inverted value of the EQ bit of CR6. 7998 BitNo = 0; InvertBit = true; 7999 break; 8000 case 2: // Return the value of the LT bit of CR6. 8001 BitNo = 2; InvertBit = false; 8002 break; 8003 case 3: // Return the inverted value of the LT bit of CR6. 8004 BitNo = 2; InvertBit = true; 8005 break; 8006 } 8007 8008 // Shift the bit into the low position. 8009 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 8010 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); 8011 // Isolate the bit. 8012 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 8013 DAG.getConstant(1, dl, MVT::i32)); 8014 8015 // If we are supposed to, toggle the bit. 8016 if (InvertBit) 8017 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 8018 DAG.getConstant(1, dl, MVT::i32)); 8019 return Flags; 8020 } 8021 8022 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 8023 SelectionDAG &DAG) const { 8024 SDLoc dl(Op); 8025 // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int 8026 // instructions), but for smaller types, we need to first extend up to v2i32 8027 // before doing going farther. 8028 if (Op.getValueType() == MVT::v2i64) { 8029 EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 8030 if (ExtVT != MVT::v2i32) { 8031 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); 8032 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, 8033 DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), 8034 ExtVT.getVectorElementType(), 4))); 8035 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); 8036 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, 8037 DAG.getValueType(MVT::v2i32)); 8038 } 8039 8040 return Op; 8041 } 8042 8043 return SDValue(); 8044 } 8045 8046 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 8047 SelectionDAG &DAG) const { 8048 SDLoc dl(Op); 8049 // Create a stack slot that is 16-byte aligned. 8050 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 8051 int FrameIdx = MFI.CreateStackObject(16, 16, false); 8052 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8053 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 8054 8055 // Store the input value into Value#0 of the stack slot. 8056 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 8057 MachinePointerInfo()); 8058 // Load it out. 8059 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); 8060 } 8061 8062 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 8063 SelectionDAG &DAG) const { 8064 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && 8065 "Should only be called for ISD::INSERT_VECTOR_ELT"); 8066 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 8067 // We have legal lowering for constant indices but not for variable ones. 8068 if (C) 8069 return Op; 8070 return SDValue(); 8071 } 8072 8073 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 8074 SelectionDAG &DAG) const { 8075 SDLoc dl(Op); 8076 SDNode *N = Op.getNode(); 8077 8078 assert(N->getOperand(0).getValueType() == MVT::v4i1 && 8079 "Unknown extract_vector_elt type"); 8080 8081 SDValue Value = N->getOperand(0); 8082 8083 // The first part of this is like the store lowering except that we don't 8084 // need to track the chain. 8085 8086 // The values are now known to be -1 (false) or 1 (true). To convert this 8087 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 8088 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 8089 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 8090 8091 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 8092 // understand how to form the extending load. 8093 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 8094 8095 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 8096 8097 // Now convert to an integer and store. 8098 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 8099 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 8100 Value); 8101 8102 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 8103 int FrameIdx = MFI.CreateStackObject(16, 16, false); 8104 MachinePointerInfo PtrInfo = 8105 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 8106 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8107 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 8108 8109 SDValue StoreChain = DAG.getEntryNode(); 8110 SDValue Ops[] = {StoreChain, 8111 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 8112 Value, FIdx}; 8113 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 8114 8115 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 8116 dl, VTs, Ops, MVT::v4i32, PtrInfo); 8117 8118 // Extract the value requested. 8119 unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 8120 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 8121 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 8122 8123 SDValue IntVal = 8124 DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); 8125 8126 if (!Subtarget.useCRBits()) 8127 return IntVal; 8128 8129 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); 8130 } 8131 8132 /// Lowering for QPX v4i1 loads 8133 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, 8134 SelectionDAG &DAG) const { 8135 SDLoc dl(Op); 8136 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); 8137 SDValue LoadChain = LN->getChain(); 8138 SDValue BasePtr = LN->getBasePtr(); 8139 8140 if (Op.getValueType() == MVT::v4f64 || 8141 Op.getValueType() == MVT::v4f32) { 8142 EVT MemVT = LN->getMemoryVT(); 8143 unsigned Alignment = LN->getAlignment(); 8144 8145 // If this load is properly aligned, then it is legal. 8146 if (Alignment >= MemVT.getStoreSize()) 8147 return Op; 8148 8149 EVT ScalarVT = Op.getValueType().getScalarType(), 8150 ScalarMemVT = MemVT.getScalarType(); 8151 unsigned Stride = ScalarMemVT.getStoreSize(); 8152 8153 SDValue Vals[4], LoadChains[4]; 8154 for (unsigned Idx = 0; Idx < 4; ++Idx) { 8155 SDValue Load; 8156 if (ScalarVT != ScalarMemVT) 8157 Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, 8158 BasePtr, 8159 LN->getPointerInfo().getWithOffset(Idx * Stride), 8160 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 8161 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 8162 else 8163 Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, 8164 LN->getPointerInfo().getWithOffset(Idx * Stride), 8165 MinAlign(Alignment, Idx * Stride), 8166 LN->getMemOperand()->getFlags(), LN->getAAInfo()); 8167 8168 if (Idx == 0 && LN->isIndexed()) { 8169 assert(LN->getAddressingMode() == ISD::PRE_INC && 8170 "Unknown addressing mode on vector load"); 8171 Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), 8172 LN->getAddressingMode()); 8173 } 8174 8175 Vals[Idx] = Load; 8176 LoadChains[Idx] = Load.getValue(1); 8177 8178 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 8179 DAG.getConstant(Stride, dl, 8180 BasePtr.getValueType())); 8181 } 8182 8183 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 8184 SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals); 8185 8186 if (LN->isIndexed()) { 8187 SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; 8188 return DAG.getMergeValues(RetOps, dl); 8189 } 8190 8191 SDValue RetOps[] = { Value, TF }; 8192 return DAG.getMergeValues(RetOps, dl); 8193 } 8194 8195 assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); 8196 assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); 8197 8198 // To lower v4i1 from a byte array, we load the byte elements of the 8199 // vector and then reuse the BUILD_VECTOR logic. 8200 8201 SDValue VectElmts[4], VectElmtChains[4]; 8202 for (unsigned i = 0; i < 4; ++i) { 8203 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 8204 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 8205 8206 VectElmts[i] = DAG.getExtLoad( 8207 ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx, 8208 LN->getPointerInfo().getWithOffset(i), MVT::i8, 8209 /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo()); 8210 VectElmtChains[i] = VectElmts[i].getValue(1); 8211 } 8212 8213 LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); 8214 SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts); 8215 8216 SDValue RVals[] = { Value, LoadChain }; 8217 return DAG.getMergeValues(RVals, dl); 8218 } 8219 8220 /// Lowering for QPX v4i1 stores 8221 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, 8222 SelectionDAG &DAG) const { 8223 SDLoc dl(Op); 8224 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode()); 8225 SDValue StoreChain = SN->getChain(); 8226 SDValue BasePtr = SN->getBasePtr(); 8227 SDValue Value = SN->getValue(); 8228 8229 if (Value.getValueType() == MVT::v4f64 || 8230 Value.getValueType() == MVT::v4f32) { 8231 EVT MemVT = SN->getMemoryVT(); 8232 unsigned Alignment = SN->getAlignment(); 8233 8234 // If this store is properly aligned, then it is legal. 8235 if (Alignment >= MemVT.getStoreSize()) 8236 return Op; 8237 8238 EVT ScalarVT = Value.getValueType().getScalarType(), 8239 ScalarMemVT = MemVT.getScalarType(); 8240 unsigned Stride = ScalarMemVT.getStoreSize(); 8241 8242 SDValue Stores[4]; 8243 for (unsigned Idx = 0; Idx < 4; ++Idx) { 8244 SDValue Ex = DAG.getNode( 8245 ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, 8246 DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); 8247 SDValue Store; 8248 if (ScalarVT != ScalarMemVT) 8249 Store = 8250 DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, 8251 SN->getPointerInfo().getWithOffset(Idx * Stride), 8252 ScalarMemVT, MinAlign(Alignment, Idx * Stride), 8253 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 8254 else 8255 Store = DAG.getStore(StoreChain, dl, Ex, BasePtr, 8256 SN->getPointerInfo().getWithOffset(Idx * Stride), 8257 MinAlign(Alignment, Idx * Stride), 8258 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 8259 8260 if (Idx == 0 && SN->isIndexed()) { 8261 assert(SN->getAddressingMode() == ISD::PRE_INC && 8262 "Unknown addressing mode on vector store"); 8263 Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), 8264 SN->getAddressingMode()); 8265 } 8266 8267 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 8268 DAG.getConstant(Stride, dl, 8269 BasePtr.getValueType())); 8270 Stores[Idx] = Store; 8271 } 8272 8273 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 8274 8275 if (SN->isIndexed()) { 8276 SDValue RetOps[] = { TF, Stores[0].getValue(1) }; 8277 return DAG.getMergeValues(RetOps, dl); 8278 } 8279 8280 return TF; 8281 } 8282 8283 assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); 8284 assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); 8285 8286 // The values are now known to be -1 (false) or 1 (true). To convert this 8287 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). 8288 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 8289 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); 8290 8291 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to 8292 // understand how to form the extending load. 8293 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); 8294 8295 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 8296 8297 // Now convert to an integer and store. 8298 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, 8299 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), 8300 Value); 8301 8302 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 8303 int FrameIdx = MFI.CreateStackObject(16, 16, false); 8304 MachinePointerInfo PtrInfo = 8305 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); 8306 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8307 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 8308 8309 SDValue Ops[] = {StoreChain, 8310 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), 8311 Value, FIdx}; 8312 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); 8313 8314 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, 8315 dl, VTs, Ops, MVT::v4i32, PtrInfo); 8316 8317 // Move data into the byte array. 8318 SDValue Loads[4], LoadChains[4]; 8319 for (unsigned i = 0; i < 4; ++i) { 8320 unsigned Offset = 4*i; 8321 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); 8322 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); 8323 8324 Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, 8325 PtrInfo.getWithOffset(Offset)); 8326 LoadChains[i] = Loads[i].getValue(1); 8327 } 8328 8329 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 8330 8331 SDValue Stores[4]; 8332 for (unsigned i = 0; i < 4; ++i) { 8333 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); 8334 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); 8335 8336 Stores[i] = DAG.getTruncStore( 8337 StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), 8338 MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(), 8339 SN->getAAInfo()); 8340 } 8341 8342 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 8343 8344 return StoreChain; 8345 } 8346 8347 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 8348 SDLoc dl(Op); 8349 if (Op.getValueType() == MVT::v4i32) { 8350 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8351 8352 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 8353 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 8354 8355 SDValue RHSSwap = // = vrlw RHS, 16 8356 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 8357 8358 // Shrinkify inputs to v8i16. 8359 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 8360 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 8361 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 8362 8363 // Low parts multiplied together, generating 32-bit results (we ignore the 8364 // top parts). 8365 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 8366 LHS, RHS, DAG, dl, MVT::v4i32); 8367 8368 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 8369 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 8370 // Shift the high parts up 16 bits. 8371 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 8372 Neg16, DAG, dl); 8373 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 8374 } else if (Op.getValueType() == MVT::v8i16) { 8375 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8376 8377 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 8378 8379 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 8380 LHS, RHS, Zero, DAG, dl); 8381 } else if (Op.getValueType() == MVT::v16i8) { 8382 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 8383 bool isLittleEndian = Subtarget.isLittleEndian(); 8384 8385 // Multiply the even 8-bit parts, producing 16-bit sums. 8386 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 8387 LHS, RHS, DAG, dl, MVT::v8i16); 8388 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 8389 8390 // Multiply the odd 8-bit parts, producing 16-bit sums. 8391 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 8392 LHS, RHS, DAG, dl, MVT::v8i16); 8393 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 8394 8395 // Merge the results together. Because vmuleub and vmuloub are 8396 // instructions with a big-endian bias, we must reverse the 8397 // element numbering and reverse the meaning of "odd" and "even" 8398 // when generating little endian code. 8399 int Ops[16]; 8400 for (unsigned i = 0; i != 8; ++i) { 8401 if (isLittleEndian) { 8402 Ops[i*2 ] = 2*i; 8403 Ops[i*2+1] = 2*i+16; 8404 } else { 8405 Ops[i*2 ] = 2*i+1; 8406 Ops[i*2+1] = 2*i+1+16; 8407 } 8408 } 8409 if (isLittleEndian) 8410 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 8411 else 8412 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 8413 } else { 8414 llvm_unreachable("Unknown mul to lower!"); 8415 } 8416 } 8417 8418 /// LowerOperation - Provide custom lowering hooks for some operations. 8419 /// 8420 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 8421 switch (Op.getOpcode()) { 8422 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 8423 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 8424 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 8425 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 8426 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 8427 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 8428 case ISD::SETCC: return LowerSETCC(Op, DAG); 8429 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 8430 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 8431 case ISD::VASTART: 8432 return LowerVASTART(Op, DAG); 8433 8434 case ISD::VAARG: 8435 return LowerVAARG(Op, DAG); 8436 8437 case ISD::VACOPY: 8438 return LowerVACOPY(Op, DAG); 8439 8440 case ISD::STACKRESTORE: 8441 return LowerSTACKRESTORE(Op, DAG); 8442 8443 case ISD::DYNAMIC_STACKALLOC: 8444 return LowerDYNAMIC_STACKALLOC(Op, DAG); 8445 8446 case ISD::GET_DYNAMIC_AREA_OFFSET: 8447 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); 8448 8449 case ISD::EH_DWARF_CFA: 8450 return LowerEH_DWARF_CFA(Op, DAG); 8451 8452 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 8453 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 8454 8455 case ISD::LOAD: return LowerLOAD(Op, DAG); 8456 case ISD::STORE: return LowerSTORE(Op, DAG); 8457 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 8458 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 8459 case ISD::FP_TO_UINT: 8460 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 8461 SDLoc(Op)); 8462 case ISD::UINT_TO_FP: 8463 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 8464 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 8465 8466 // Lower 64-bit shifts. 8467 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 8468 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 8469 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 8470 8471 // Vector-related lowering. 8472 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 8473 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 8474 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 8475 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 8476 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 8477 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 8478 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 8479 case ISD::MUL: return LowerMUL(Op, DAG); 8480 8481 // For counter-based loop handling. 8482 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 8483 8484 // Frame & Return address. 8485 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 8486 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 8487 } 8488 } 8489 8490 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 8491 SmallVectorImpl<SDValue>&Results, 8492 SelectionDAG &DAG) const { 8493 SDLoc dl(N); 8494 switch (N->getOpcode()) { 8495 default: 8496 llvm_unreachable("Do not know how to custom type legalize this operation!"); 8497 case ISD::READCYCLECOUNTER: { 8498 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 8499 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); 8500 8501 Results.push_back(RTB); 8502 Results.push_back(RTB.getValue(1)); 8503 Results.push_back(RTB.getValue(2)); 8504 break; 8505 } 8506 case ISD::INTRINSIC_W_CHAIN: { 8507 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 8508 Intrinsic::ppc_is_decremented_ctr_nonzero) 8509 break; 8510 8511 assert(N->getValueType(0) == MVT::i1 && 8512 "Unexpected result type for CTR decrement intrinsic"); 8513 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), 8514 N->getValueType(0)); 8515 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 8516 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 8517 N->getOperand(1)); 8518 8519 Results.push_back(NewInt); 8520 Results.push_back(NewInt.getValue(1)); 8521 break; 8522 } 8523 case ISD::VAARG: { 8524 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) 8525 return; 8526 8527 EVT VT = N->getValueType(0); 8528 8529 if (VT == MVT::i64) { 8530 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); 8531 8532 Results.push_back(NewNode); 8533 Results.push_back(NewNode.getValue(1)); 8534 } 8535 return; 8536 } 8537 case ISD::FP_ROUND_INREG: { 8538 assert(N->getValueType(0) == MVT::ppcf128); 8539 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 8540 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 8541 MVT::f64, N->getOperand(0), 8542 DAG.getIntPtrConstant(0, dl)); 8543 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 8544 MVT::f64, N->getOperand(0), 8545 DAG.getIntPtrConstant(1, dl)); 8546 8547 // Add the two halves of the long double in round-to-zero mode. 8548 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 8549 8550 // We know the low half is about to be thrown away, so just use something 8551 // convenient. 8552 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 8553 FPreg, FPreg)); 8554 return; 8555 } 8556 case ISD::FP_TO_SINT: 8557 case ISD::FP_TO_UINT: 8558 // LowerFP_TO_INT() can only handle f32 and f64. 8559 if (N->getOperand(0).getValueType() == MVT::ppcf128) 8560 return; 8561 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 8562 return; 8563 } 8564 } 8565 8566 //===----------------------------------------------------------------------===// 8567 // Other Lowering Code 8568 //===----------------------------------------------------------------------===// 8569 8570 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { 8571 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 8572 Function *Func = Intrinsic::getDeclaration(M, Id); 8573 return Builder.CreateCall(Func, {}); 8574 } 8575 8576 // The mappings for emitLeading/TrailingFence is taken from 8577 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 8578 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 8579 AtomicOrdering Ord, bool IsStore, 8580 bool IsLoad) const { 8581 if (Ord == AtomicOrdering::SequentiallyConsistent) 8582 return callIntrinsic(Builder, Intrinsic::ppc_sync); 8583 if (isReleaseOrStronger(Ord)) 8584 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 8585 return nullptr; 8586 } 8587 8588 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 8589 AtomicOrdering Ord, bool IsStore, 8590 bool IsLoad) const { 8591 if (IsLoad && isAcquireOrStronger(Ord)) 8592 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 8593 // FIXME: this is too conservative, a dependent branch + isync is enough. 8594 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and 8595 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 8596 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. 8597 return nullptr; 8598 } 8599 8600 MachineBasicBlock * 8601 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, 8602 unsigned AtomicSize, 8603 unsigned BinOpcode, 8604 unsigned CmpOpcode, 8605 unsigned CmpPred) const { 8606 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 8607 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8608 8609 auto LoadMnemonic = PPC::LDARX; 8610 auto StoreMnemonic = PPC::STDCX; 8611 switch (AtomicSize) { 8612 default: 8613 llvm_unreachable("Unexpected size of atomic entity"); 8614 case 1: 8615 LoadMnemonic = PPC::LBARX; 8616 StoreMnemonic = PPC::STBCX; 8617 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 8618 break; 8619 case 2: 8620 LoadMnemonic = PPC::LHARX; 8621 StoreMnemonic = PPC::STHCX; 8622 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); 8623 break; 8624 case 4: 8625 LoadMnemonic = PPC::LWARX; 8626 StoreMnemonic = PPC::STWCX; 8627 break; 8628 case 8: 8629 LoadMnemonic = PPC::LDARX; 8630 StoreMnemonic = PPC::STDCX; 8631 break; 8632 } 8633 8634 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8635 MachineFunction *F = BB->getParent(); 8636 MachineFunction::iterator It = ++BB->getIterator(); 8637 8638 unsigned dest = MI.getOperand(0).getReg(); 8639 unsigned ptrA = MI.getOperand(1).getReg(); 8640 unsigned ptrB = MI.getOperand(2).getReg(); 8641 unsigned incr = MI.getOperand(3).getReg(); 8642 DebugLoc dl = MI.getDebugLoc(); 8643 8644 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 8645 MachineBasicBlock *loop2MBB = 8646 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; 8647 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8648 F->insert(It, loopMBB); 8649 if (CmpOpcode) 8650 F->insert(It, loop2MBB); 8651 F->insert(It, exitMBB); 8652 exitMBB->splice(exitMBB->begin(), BB, 8653 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8654 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8655 8656 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8657 unsigned TmpReg = (!BinOpcode) ? incr : 8658 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass 8659 : &PPC::GPRCRegClass); 8660 8661 // thisMBB: 8662 // ... 8663 // fallthrough --> loopMBB 8664 BB->addSuccessor(loopMBB); 8665 8666 // loopMBB: 8667 // l[wd]arx dest, ptr 8668 // add r0, dest, incr 8669 // st[wd]cx. r0, ptr 8670 // bne- loopMBB 8671 // fallthrough --> exitMBB 8672 8673 // For max/min... 8674 // loopMBB: 8675 // l[wd]arx dest, ptr 8676 // cmpl?[wd] incr, dest 8677 // bgt exitMBB 8678 // loop2MBB: 8679 // st[wd]cx. dest, ptr 8680 // bne- loopMBB 8681 // fallthrough --> exitMBB 8682 8683 BB = loopMBB; 8684 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 8685 .addReg(ptrA).addReg(ptrB); 8686 if (BinOpcode) 8687 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 8688 if (CmpOpcode) { 8689 // Signed comparisons of byte or halfword values must be sign-extended. 8690 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { 8691 unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 8692 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), 8693 ExtReg).addReg(dest); 8694 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 8695 .addReg(incr).addReg(ExtReg); 8696 } else 8697 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 8698 .addReg(incr).addReg(dest); 8699 8700 BuildMI(BB, dl, TII->get(PPC::BCC)) 8701 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); 8702 BB->addSuccessor(loop2MBB); 8703 BB->addSuccessor(exitMBB); 8704 BB = loop2MBB; 8705 } 8706 BuildMI(BB, dl, TII->get(StoreMnemonic)) 8707 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 8708 BuildMI(BB, dl, TII->get(PPC::BCC)) 8709 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 8710 BB->addSuccessor(loopMBB); 8711 BB->addSuccessor(exitMBB); 8712 8713 // exitMBB: 8714 // ... 8715 BB = exitMBB; 8716 return BB; 8717 } 8718 8719 MachineBasicBlock * 8720 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, 8721 MachineBasicBlock *BB, 8722 bool is8bit, // operation 8723 unsigned BinOpcode, 8724 unsigned CmpOpcode, 8725 unsigned CmpPred) const { 8726 // If we support part-word atomic mnemonics, just use them 8727 if (Subtarget.hasPartwordAtomics()) 8728 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, 8729 CmpOpcode, CmpPred); 8730 8731 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 8732 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8733 // In 64 bit mode we have to use 64 bits for addresses, even though the 8734 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 8735 // registers without caring whether they're 32 or 64, but here we're 8736 // doing actual arithmetic on the addresses. 8737 bool is64bit = Subtarget.isPPC64(); 8738 bool isLittleEndian = Subtarget.isLittleEndian(); 8739 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 8740 8741 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8742 MachineFunction *F = BB->getParent(); 8743 MachineFunction::iterator It = ++BB->getIterator(); 8744 8745 unsigned dest = MI.getOperand(0).getReg(); 8746 unsigned ptrA = MI.getOperand(1).getReg(); 8747 unsigned ptrB = MI.getOperand(2).getReg(); 8748 unsigned incr = MI.getOperand(3).getReg(); 8749 DebugLoc dl = MI.getDebugLoc(); 8750 8751 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 8752 MachineBasicBlock *loop2MBB = 8753 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; 8754 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 8755 F->insert(It, loopMBB); 8756 if (CmpOpcode) 8757 F->insert(It, loop2MBB); 8758 F->insert(It, exitMBB); 8759 exitMBB->splice(exitMBB->begin(), BB, 8760 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 8761 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 8762 8763 MachineRegisterInfo &RegInfo = F->getRegInfo(); 8764 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 8765 : &PPC::GPRCRegClass; 8766 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 8767 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 8768 unsigned ShiftReg = 8769 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); 8770 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 8771 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 8772 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 8773 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 8774 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 8775 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 8776 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 8777 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 8778 unsigned Ptr1Reg; 8779 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 8780 8781 // thisMBB: 8782 // ... 8783 // fallthrough --> loopMBB 8784 BB->addSuccessor(loopMBB); 8785 8786 // The 4-byte load must be aligned, while a char or short may be 8787 // anywhere in the word. Hence all this nasty bookkeeping code. 8788 // add ptr1, ptrA, ptrB [copy if ptrA==0] 8789 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 8790 // xori shift, shift1, 24 [16] 8791 // rlwinm ptr, ptr1, 0, 0, 29 8792 // slw incr2, incr, shift 8793 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 8794 // slw mask, mask2, shift 8795 // loopMBB: 8796 // lwarx tmpDest, ptr 8797 // add tmp, tmpDest, incr2 8798 // andc tmp2, tmpDest, mask 8799 // and tmp3, tmp, mask 8800 // or tmp4, tmp3, tmp2 8801 // stwcx. tmp4, ptr 8802 // bne- loopMBB 8803 // fallthrough --> exitMBB 8804 // srw dest, tmpDest, shift 8805 if (ptrA != ZeroReg) { 8806 Ptr1Reg = RegInfo.createVirtualRegister(RC); 8807 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 8808 .addReg(ptrA).addReg(ptrB); 8809 } else { 8810 Ptr1Reg = ptrB; 8811 } 8812 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 8813 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 8814 if (!isLittleEndian) 8815 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 8816 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 8817 if (is64bit) 8818 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 8819 .addReg(Ptr1Reg).addImm(0).addImm(61); 8820 else 8821 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 8822 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 8823 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 8824 .addReg(incr).addReg(ShiftReg); 8825 if (is8bit) 8826 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 8827 else { 8828 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 8829 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 8830 } 8831 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 8832 .addReg(Mask2Reg).addReg(ShiftReg); 8833 8834 BB = loopMBB; 8835 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 8836 .addReg(ZeroReg).addReg(PtrReg); 8837 if (BinOpcode) 8838 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 8839 .addReg(Incr2Reg).addReg(TmpDestReg); 8840 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 8841 .addReg(TmpDestReg).addReg(MaskReg); 8842 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 8843 .addReg(TmpReg).addReg(MaskReg); 8844 if (CmpOpcode) { 8845 // For unsigned comparisons, we can directly compare the shifted values. 8846 // For signed comparisons we shift and sign extend. 8847 unsigned SReg = RegInfo.createVirtualRegister(RC); 8848 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg) 8849 .addReg(TmpDestReg).addReg(MaskReg); 8850 unsigned ValueReg = SReg; 8851 unsigned CmpReg = Incr2Reg; 8852 if (CmpOpcode == PPC::CMPW) { 8853 ValueReg = RegInfo.createVirtualRegister(RC); 8854 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) 8855 .addReg(SReg).addReg(ShiftReg); 8856 unsigned ValueSReg = RegInfo.createVirtualRegister(RC); 8857 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) 8858 .addReg(ValueReg); 8859 ValueReg = ValueSReg; 8860 CmpReg = incr; 8861 } 8862 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) 8863 .addReg(CmpReg).addReg(ValueReg); 8864 BuildMI(BB, dl, TII->get(PPC::BCC)) 8865 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); 8866 BB->addSuccessor(loop2MBB); 8867 BB->addSuccessor(exitMBB); 8868 BB = loop2MBB; 8869 } 8870 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 8871 .addReg(Tmp3Reg).addReg(Tmp2Reg); 8872 BuildMI(BB, dl, TII->get(PPC::STWCX)) 8873 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 8874 BuildMI(BB, dl, TII->get(PPC::BCC)) 8875 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 8876 BB->addSuccessor(loopMBB); 8877 BB->addSuccessor(exitMBB); 8878 8879 // exitMBB: 8880 // ... 8881 BB = exitMBB; 8882 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 8883 .addReg(ShiftReg); 8884 return BB; 8885 } 8886 8887 llvm::MachineBasicBlock * 8888 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, 8889 MachineBasicBlock *MBB) const { 8890 DebugLoc DL = MI.getDebugLoc(); 8891 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 8892 8893 MachineFunction *MF = MBB->getParent(); 8894 MachineRegisterInfo &MRI = MF->getRegInfo(); 8895 8896 const BasicBlock *BB = MBB->getBasicBlock(); 8897 MachineFunction::iterator I = ++MBB->getIterator(); 8898 8899 // Memory Reference 8900 MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); 8901 MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); 8902 8903 unsigned DstReg = MI.getOperand(0).getReg(); 8904 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 8905 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 8906 unsigned mainDstReg = MRI.createVirtualRegister(RC); 8907 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 8908 8909 MVT PVT = getPointerTy(MF->getDataLayout()); 8910 assert((PVT == MVT::i64 || PVT == MVT::i32) && 8911 "Invalid Pointer Size!"); 8912 // For v = setjmp(buf), we generate 8913 // 8914 // thisMBB: 8915 // SjLjSetup mainMBB 8916 // bl mainMBB 8917 // v_restore = 1 8918 // b sinkMBB 8919 // 8920 // mainMBB: 8921 // buf[LabelOffset] = LR 8922 // v_main = 0 8923 // 8924 // sinkMBB: 8925 // v = phi(main, restore) 8926 // 8927 8928 MachineBasicBlock *thisMBB = MBB; 8929 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 8930 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 8931 MF->insert(I, mainMBB); 8932 MF->insert(I, sinkMBB); 8933 8934 MachineInstrBuilder MIB; 8935 8936 // Transfer the remainder of BB and its successor edges to sinkMBB. 8937 sinkMBB->splice(sinkMBB->begin(), MBB, 8938 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 8939 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 8940 8941 // Note that the structure of the jmp_buf used here is not compatible 8942 // with that used by libc, and is not designed to be. Specifically, it 8943 // stores only those 'reserved' registers that LLVM does not otherwise 8944 // understand how to spill. Also, by convention, by the time this 8945 // intrinsic is called, Clang has already stored the frame address in the 8946 // first slot of the buffer and stack address in the third. Following the 8947 // X86 target code, we'll store the jump address in the second slot. We also 8948 // need to save the TOC pointer (R2) to handle jumps between shared 8949 // libraries, and that will be stored in the fourth slot. The thread 8950 // identifier (R13) is not affected. 8951 8952 // thisMBB: 8953 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 8954 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 8955 const int64_t BPOffset = 4 * PVT.getStoreSize(); 8956 8957 // Prepare IP either in reg. 8958 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 8959 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 8960 unsigned BufReg = MI.getOperand(1).getReg(); 8961 8962 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 8963 setUsesTOCBasePtr(*MBB->getParent()); 8964 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 8965 .addReg(PPC::X2) 8966 .addImm(TOCOffset) 8967 .addReg(BufReg); 8968 MIB.setMemRefs(MMOBegin, MMOEnd); 8969 } 8970 8971 // Naked functions never have a base pointer, and so we use r1. For all 8972 // other functions, this decision must be delayed until during PEI. 8973 unsigned BaseReg; 8974 if (MF->getFunction()->hasFnAttribute(Attribute::Naked)) 8975 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 8976 else 8977 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 8978 8979 MIB = BuildMI(*thisMBB, MI, DL, 8980 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 8981 .addReg(BaseReg) 8982 .addImm(BPOffset) 8983 .addReg(BufReg); 8984 MIB.setMemRefs(MMOBegin, MMOEnd); 8985 8986 // Setup 8987 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 8988 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 8989 MIB.addRegMask(TRI->getNoPreservedMask()); 8990 8991 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 8992 8993 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 8994 .addMBB(mainMBB); 8995 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 8996 8997 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); 8998 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); 8999 9000 // mainMBB: 9001 // mainDstReg = 0 9002 MIB = 9003 BuildMI(mainMBB, DL, 9004 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 9005 9006 // Store IP 9007 if (Subtarget.isPPC64()) { 9008 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 9009 .addReg(LabelReg) 9010 .addImm(LabelOffset) 9011 .addReg(BufReg); 9012 } else { 9013 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 9014 .addReg(LabelReg) 9015 .addImm(LabelOffset) 9016 .addReg(BufReg); 9017 } 9018 9019 MIB.setMemRefs(MMOBegin, MMOEnd); 9020 9021 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 9022 mainMBB->addSuccessor(sinkMBB); 9023 9024 // sinkMBB: 9025 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 9026 TII->get(PPC::PHI), DstReg) 9027 .addReg(mainDstReg).addMBB(mainMBB) 9028 .addReg(restoreDstReg).addMBB(thisMBB); 9029 9030 MI.eraseFromParent(); 9031 return sinkMBB; 9032 } 9033 9034 MachineBasicBlock * 9035 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, 9036 MachineBasicBlock *MBB) const { 9037 DebugLoc DL = MI.getDebugLoc(); 9038 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 9039 9040 MachineFunction *MF = MBB->getParent(); 9041 MachineRegisterInfo &MRI = MF->getRegInfo(); 9042 9043 // Memory Reference 9044 MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); 9045 MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); 9046 9047 MVT PVT = getPointerTy(MF->getDataLayout()); 9048 assert((PVT == MVT::i64 || PVT == MVT::i32) && 9049 "Invalid Pointer Size!"); 9050 9051 const TargetRegisterClass *RC = 9052 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 9053 unsigned Tmp = MRI.createVirtualRegister(RC); 9054 // Since FP is only updated here but NOT referenced, it's treated as GPR. 9055 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 9056 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 9057 unsigned BP = 9058 (PVT == MVT::i64) 9059 ? PPC::X30 9060 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29 9061 : PPC::R30); 9062 9063 MachineInstrBuilder MIB; 9064 9065 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 9066 const int64_t SPOffset = 2 * PVT.getStoreSize(); 9067 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 9068 const int64_t BPOffset = 4 * PVT.getStoreSize(); 9069 9070 unsigned BufReg = MI.getOperand(0).getReg(); 9071 9072 // Reload FP (the jumped-to function may not have had a 9073 // frame pointer, and if so, then its r31 will be restored 9074 // as necessary). 9075 if (PVT == MVT::i64) { 9076 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 9077 .addImm(0) 9078 .addReg(BufReg); 9079 } else { 9080 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 9081 .addImm(0) 9082 .addReg(BufReg); 9083 } 9084 MIB.setMemRefs(MMOBegin, MMOEnd); 9085 9086 // Reload IP 9087 if (PVT == MVT::i64) { 9088 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 9089 .addImm(LabelOffset) 9090 .addReg(BufReg); 9091 } else { 9092 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 9093 .addImm(LabelOffset) 9094 .addReg(BufReg); 9095 } 9096 MIB.setMemRefs(MMOBegin, MMOEnd); 9097 9098 // Reload SP 9099 if (PVT == MVT::i64) { 9100 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 9101 .addImm(SPOffset) 9102 .addReg(BufReg); 9103 } else { 9104 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 9105 .addImm(SPOffset) 9106 .addReg(BufReg); 9107 } 9108 MIB.setMemRefs(MMOBegin, MMOEnd); 9109 9110 // Reload BP 9111 if (PVT == MVT::i64) { 9112 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 9113 .addImm(BPOffset) 9114 .addReg(BufReg); 9115 } else { 9116 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 9117 .addImm(BPOffset) 9118 .addReg(BufReg); 9119 } 9120 MIB.setMemRefs(MMOBegin, MMOEnd); 9121 9122 // Reload TOC 9123 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 9124 setUsesTOCBasePtr(*MBB->getParent()); 9125 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 9126 .addImm(TOCOffset) 9127 .addReg(BufReg); 9128 9129 MIB.setMemRefs(MMOBegin, MMOEnd); 9130 } 9131 9132 // Jump 9133 BuildMI(*MBB, MI, DL, 9134 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 9135 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 9136 9137 MI.eraseFromParent(); 9138 return MBB; 9139 } 9140 9141 MachineBasicBlock * 9142 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 9143 MachineBasicBlock *BB) const { 9144 if (MI.getOpcode() == TargetOpcode::STACKMAP || 9145 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 9146 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && 9147 MI.getOpcode() == TargetOpcode::PATCHPOINT) { 9148 // Call lowering should have added an r2 operand to indicate a dependence 9149 // on the TOC base pointer value. It can't however, because there is no 9150 // way to mark the dependence as implicit there, and so the stackmap code 9151 // will confuse it with a regular operand. Instead, add the dependence 9152 // here. 9153 setUsesTOCBasePtr(*BB->getParent()); 9154 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); 9155 } 9156 9157 return emitPatchPoint(MI, BB); 9158 } 9159 9160 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || 9161 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) { 9162 return emitEHSjLjSetJmp(MI, BB); 9163 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || 9164 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) { 9165 return emitEHSjLjLongJmp(MI, BB); 9166 } 9167 9168 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 9169 9170 // To "insert" these instructions we actually have to insert their 9171 // control-flow patterns. 9172 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 9173 MachineFunction::iterator It = ++BB->getIterator(); 9174 9175 MachineFunction *F = BB->getParent(); 9176 9177 if (Subtarget.hasISEL() && 9178 (MI.getOpcode() == PPC::SELECT_CC_I4 || 9179 MI.getOpcode() == PPC::SELECT_CC_I8 || 9180 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) { 9181 SmallVector<MachineOperand, 2> Cond; 9182 if (MI.getOpcode() == PPC::SELECT_CC_I4 || 9183 MI.getOpcode() == PPC::SELECT_CC_I8) 9184 Cond.push_back(MI.getOperand(4)); 9185 else 9186 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 9187 Cond.push_back(MI.getOperand(1)); 9188 9189 DebugLoc dl = MI.getDebugLoc(); 9190 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, 9191 MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); 9192 } else if (MI.getOpcode() == PPC::SELECT_CC_I4 || 9193 MI.getOpcode() == PPC::SELECT_CC_I8 || 9194 MI.getOpcode() == PPC::SELECT_CC_F4 || 9195 MI.getOpcode() == PPC::SELECT_CC_F8 || 9196 MI.getOpcode() == PPC::SELECT_CC_QFRC || 9197 MI.getOpcode() == PPC::SELECT_CC_QSRC || 9198 MI.getOpcode() == PPC::SELECT_CC_QBRC || 9199 MI.getOpcode() == PPC::SELECT_CC_VRRC || 9200 MI.getOpcode() == PPC::SELECT_CC_VSFRC || 9201 MI.getOpcode() == PPC::SELECT_CC_VSSRC || 9202 MI.getOpcode() == PPC::SELECT_CC_VSRC || 9203 MI.getOpcode() == PPC::SELECT_I4 || 9204 MI.getOpcode() == PPC::SELECT_I8 || 9205 MI.getOpcode() == PPC::SELECT_F4 || 9206 MI.getOpcode() == PPC::SELECT_F8 || 9207 MI.getOpcode() == PPC::SELECT_QFRC || 9208 MI.getOpcode() == PPC::SELECT_QSRC || 9209 MI.getOpcode() == PPC::SELECT_QBRC || 9210 MI.getOpcode() == PPC::SELECT_VRRC || 9211 MI.getOpcode() == PPC::SELECT_VSFRC || 9212 MI.getOpcode() == PPC::SELECT_VSSRC || 9213 MI.getOpcode() == PPC::SELECT_VSRC) { 9214 // The incoming instruction knows the destination vreg to set, the 9215 // condition code register to branch on, the true/false values to 9216 // select between, and a branch opcode to use. 9217 9218 // thisMBB: 9219 // ... 9220 // TrueVal = ... 9221 // cmpTY ccX, r1, r2 9222 // bCC copy1MBB 9223 // fallthrough --> copy0MBB 9224 MachineBasicBlock *thisMBB = BB; 9225 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 9226 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 9227 DebugLoc dl = MI.getDebugLoc(); 9228 F->insert(It, copy0MBB); 9229 F->insert(It, sinkMBB); 9230 9231 // Transfer the remainder of BB and its successor edges to sinkMBB. 9232 sinkMBB->splice(sinkMBB->begin(), BB, 9233 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9234 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 9235 9236 // Next, add the true and fallthrough blocks as its successors. 9237 BB->addSuccessor(copy0MBB); 9238 BB->addSuccessor(sinkMBB); 9239 9240 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || 9241 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || 9242 MI.getOpcode() == PPC::SELECT_QFRC || 9243 MI.getOpcode() == PPC::SELECT_QSRC || 9244 MI.getOpcode() == PPC::SELECT_QBRC || 9245 MI.getOpcode() == PPC::SELECT_VRRC || 9246 MI.getOpcode() == PPC::SELECT_VSFRC || 9247 MI.getOpcode() == PPC::SELECT_VSSRC || 9248 MI.getOpcode() == PPC::SELECT_VSRC) { 9249 BuildMI(BB, dl, TII->get(PPC::BC)) 9250 .addReg(MI.getOperand(1).getReg()) 9251 .addMBB(sinkMBB); 9252 } else { 9253 unsigned SelectPred = MI.getOperand(4).getImm(); 9254 BuildMI(BB, dl, TII->get(PPC::BCC)) 9255 .addImm(SelectPred) 9256 .addReg(MI.getOperand(1).getReg()) 9257 .addMBB(sinkMBB); 9258 } 9259 9260 // copy0MBB: 9261 // %FalseValue = ... 9262 // # fallthrough to sinkMBB 9263 BB = copy0MBB; 9264 9265 // Update machine-CFG edges 9266 BB->addSuccessor(sinkMBB); 9267 9268 // sinkMBB: 9269 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 9270 // ... 9271 BB = sinkMBB; 9272 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) 9273 .addReg(MI.getOperand(3).getReg()) 9274 .addMBB(copy0MBB) 9275 .addReg(MI.getOperand(2).getReg()) 9276 .addMBB(thisMBB); 9277 } else if (MI.getOpcode() == PPC::ReadTB) { 9278 // To read the 64-bit time-base register on a 32-bit target, we read the 9279 // two halves. Should the counter have wrapped while it was being read, we 9280 // need to try again. 9281 // ... 9282 // readLoop: 9283 // mfspr Rx,TBU # load from TBU 9284 // mfspr Ry,TB # load from TB 9285 // mfspr Rz,TBU # load from TBU 9286 // cmpw crX,Rx,Rz # check if 'old'='new' 9287 // bne readLoop # branch if they're not equal 9288 // ... 9289 9290 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); 9291 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 9292 DebugLoc dl = MI.getDebugLoc(); 9293 F->insert(It, readMBB); 9294 F->insert(It, sinkMBB); 9295 9296 // Transfer the remainder of BB and its successor edges to sinkMBB. 9297 sinkMBB->splice(sinkMBB->begin(), BB, 9298 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9299 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 9300 9301 BB->addSuccessor(readMBB); 9302 BB = readMBB; 9303 9304 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9305 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 9306 unsigned LoReg = MI.getOperand(0).getReg(); 9307 unsigned HiReg = MI.getOperand(1).getReg(); 9308 9309 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); 9310 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); 9311 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); 9312 9313 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 9314 9315 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) 9316 .addReg(HiReg).addReg(ReadAgainReg); 9317 BuildMI(BB, dl, TII->get(PPC::BCC)) 9318 .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); 9319 9320 BB->addSuccessor(readMBB); 9321 BB->addSuccessor(sinkMBB); 9322 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 9323 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 9324 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 9325 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 9326 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 9327 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); 9328 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 9329 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); 9330 9331 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 9332 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 9333 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 9334 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 9335 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 9336 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); 9337 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 9338 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); 9339 9340 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 9341 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 9342 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 9343 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 9344 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 9345 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); 9346 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 9347 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); 9348 9349 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 9350 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 9351 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 9352 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 9353 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 9354 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); 9355 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 9356 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); 9357 9358 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 9359 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 9360 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 9361 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 9362 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 9363 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); 9364 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 9365 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); 9366 9367 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 9368 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 9369 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 9370 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 9371 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 9372 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); 9373 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 9374 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); 9375 9376 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) 9377 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); 9378 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) 9379 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); 9380 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) 9381 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); 9382 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) 9383 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); 9384 9385 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) 9386 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); 9387 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) 9388 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); 9389 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) 9390 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); 9391 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) 9392 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); 9393 9394 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) 9395 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); 9396 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) 9397 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); 9398 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) 9399 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); 9400 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) 9401 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); 9402 9403 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) 9404 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); 9405 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) 9406 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); 9407 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) 9408 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); 9409 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) 9410 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); 9411 9412 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) 9413 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 9414 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) 9415 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 9416 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32) 9417 BB = EmitAtomicBinary(MI, BB, 4, 0); 9418 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) 9419 BB = EmitAtomicBinary(MI, BB, 8, 0); 9420 9421 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 9422 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || 9423 (Subtarget.hasPartwordAtomics() && 9424 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || 9425 (Subtarget.hasPartwordAtomics() && 9426 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { 9427 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 9428 9429 auto LoadMnemonic = PPC::LDARX; 9430 auto StoreMnemonic = PPC::STDCX; 9431 switch (MI.getOpcode()) { 9432 default: 9433 llvm_unreachable("Compare and swap of unknown size"); 9434 case PPC::ATOMIC_CMP_SWAP_I8: 9435 LoadMnemonic = PPC::LBARX; 9436 StoreMnemonic = PPC::STBCX; 9437 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 9438 break; 9439 case PPC::ATOMIC_CMP_SWAP_I16: 9440 LoadMnemonic = PPC::LHARX; 9441 StoreMnemonic = PPC::STHCX; 9442 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); 9443 break; 9444 case PPC::ATOMIC_CMP_SWAP_I32: 9445 LoadMnemonic = PPC::LWARX; 9446 StoreMnemonic = PPC::STWCX; 9447 break; 9448 case PPC::ATOMIC_CMP_SWAP_I64: 9449 LoadMnemonic = PPC::LDARX; 9450 StoreMnemonic = PPC::STDCX; 9451 break; 9452 } 9453 unsigned dest = MI.getOperand(0).getReg(); 9454 unsigned ptrA = MI.getOperand(1).getReg(); 9455 unsigned ptrB = MI.getOperand(2).getReg(); 9456 unsigned oldval = MI.getOperand(3).getReg(); 9457 unsigned newval = MI.getOperand(4).getReg(); 9458 DebugLoc dl = MI.getDebugLoc(); 9459 9460 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 9461 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 9462 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 9463 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9464 F->insert(It, loop1MBB); 9465 F->insert(It, loop2MBB); 9466 F->insert(It, midMBB); 9467 F->insert(It, exitMBB); 9468 exitMBB->splice(exitMBB->begin(), BB, 9469 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9470 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9471 9472 // thisMBB: 9473 // ... 9474 // fallthrough --> loopMBB 9475 BB->addSuccessor(loop1MBB); 9476 9477 // loop1MBB: 9478 // l[bhwd]arx dest, ptr 9479 // cmp[wd] dest, oldval 9480 // bne- midMBB 9481 // loop2MBB: 9482 // st[bhwd]cx. newval, ptr 9483 // bne- loopMBB 9484 // b exitBB 9485 // midMBB: 9486 // st[bhwd]cx. dest, ptr 9487 // exitBB: 9488 BB = loop1MBB; 9489 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) 9490 .addReg(ptrA).addReg(ptrB); 9491 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 9492 .addReg(oldval).addReg(dest); 9493 BuildMI(BB, dl, TII->get(PPC::BCC)) 9494 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 9495 BB->addSuccessor(loop2MBB); 9496 BB->addSuccessor(midMBB); 9497 9498 BB = loop2MBB; 9499 BuildMI(BB, dl, TII->get(StoreMnemonic)) 9500 .addReg(newval).addReg(ptrA).addReg(ptrB); 9501 BuildMI(BB, dl, TII->get(PPC::BCC)) 9502 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 9503 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 9504 BB->addSuccessor(loop1MBB); 9505 BB->addSuccessor(exitMBB); 9506 9507 BB = midMBB; 9508 BuildMI(BB, dl, TII->get(StoreMnemonic)) 9509 .addReg(dest).addReg(ptrA).addReg(ptrB); 9510 BB->addSuccessor(exitMBB); 9511 9512 // exitMBB: 9513 // ... 9514 BB = exitMBB; 9515 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 9516 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 9517 // We must use 64-bit registers for addresses when targeting 64-bit, 9518 // since we're actually doing arithmetic on them. Other registers 9519 // can be 32-bit. 9520 bool is64bit = Subtarget.isPPC64(); 9521 bool isLittleEndian = Subtarget.isLittleEndian(); 9522 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 9523 9524 unsigned dest = MI.getOperand(0).getReg(); 9525 unsigned ptrA = MI.getOperand(1).getReg(); 9526 unsigned ptrB = MI.getOperand(2).getReg(); 9527 unsigned oldval = MI.getOperand(3).getReg(); 9528 unsigned newval = MI.getOperand(4).getReg(); 9529 DebugLoc dl = MI.getDebugLoc(); 9530 9531 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 9532 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 9533 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 9534 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 9535 F->insert(It, loop1MBB); 9536 F->insert(It, loop2MBB); 9537 F->insert(It, midMBB); 9538 F->insert(It, exitMBB); 9539 exitMBB->splice(exitMBB->begin(), BB, 9540 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 9541 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 9542 9543 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9544 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 9545 : &PPC::GPRCRegClass; 9546 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 9547 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 9548 unsigned ShiftReg = 9549 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); 9550 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 9551 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 9552 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 9553 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 9554 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 9555 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 9556 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 9557 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 9558 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 9559 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 9560 unsigned Ptr1Reg; 9561 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 9562 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 9563 // thisMBB: 9564 // ... 9565 // fallthrough --> loopMBB 9566 BB->addSuccessor(loop1MBB); 9567 9568 // The 4-byte load must be aligned, while a char or short may be 9569 // anywhere in the word. Hence all this nasty bookkeeping code. 9570 // add ptr1, ptrA, ptrB [copy if ptrA==0] 9571 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 9572 // xori shift, shift1, 24 [16] 9573 // rlwinm ptr, ptr1, 0, 0, 29 9574 // slw newval2, newval, shift 9575 // slw oldval2, oldval,shift 9576 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 9577 // slw mask, mask2, shift 9578 // and newval3, newval2, mask 9579 // and oldval3, oldval2, mask 9580 // loop1MBB: 9581 // lwarx tmpDest, ptr 9582 // and tmp, tmpDest, mask 9583 // cmpw tmp, oldval3 9584 // bne- midMBB 9585 // loop2MBB: 9586 // andc tmp2, tmpDest, mask 9587 // or tmp4, tmp2, newval3 9588 // stwcx. tmp4, ptr 9589 // bne- loop1MBB 9590 // b exitBB 9591 // midMBB: 9592 // stwcx. tmpDest, ptr 9593 // exitBB: 9594 // srw dest, tmpDest, shift 9595 if (ptrA != ZeroReg) { 9596 Ptr1Reg = RegInfo.createVirtualRegister(RC); 9597 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 9598 .addReg(ptrA).addReg(ptrB); 9599 } else { 9600 Ptr1Reg = ptrB; 9601 } 9602 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 9603 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 9604 if (!isLittleEndian) 9605 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 9606 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 9607 if (is64bit) 9608 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 9609 .addReg(Ptr1Reg).addImm(0).addImm(61); 9610 else 9611 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 9612 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 9613 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 9614 .addReg(newval).addReg(ShiftReg); 9615 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 9616 .addReg(oldval).addReg(ShiftReg); 9617 if (is8bit) 9618 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 9619 else { 9620 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 9621 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 9622 .addReg(Mask3Reg).addImm(65535); 9623 } 9624 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 9625 .addReg(Mask2Reg).addReg(ShiftReg); 9626 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 9627 .addReg(NewVal2Reg).addReg(MaskReg); 9628 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 9629 .addReg(OldVal2Reg).addReg(MaskReg); 9630 9631 BB = loop1MBB; 9632 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 9633 .addReg(ZeroReg).addReg(PtrReg); 9634 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 9635 .addReg(TmpDestReg).addReg(MaskReg); 9636 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 9637 .addReg(TmpReg).addReg(OldVal3Reg); 9638 BuildMI(BB, dl, TII->get(PPC::BCC)) 9639 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 9640 BB->addSuccessor(loop2MBB); 9641 BB->addSuccessor(midMBB); 9642 9643 BB = loop2MBB; 9644 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 9645 .addReg(TmpDestReg).addReg(MaskReg); 9646 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 9647 .addReg(Tmp2Reg).addReg(NewVal3Reg); 9648 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 9649 .addReg(ZeroReg).addReg(PtrReg); 9650 BuildMI(BB, dl, TII->get(PPC::BCC)) 9651 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 9652 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 9653 BB->addSuccessor(loop1MBB); 9654 BB->addSuccessor(exitMBB); 9655 9656 BB = midMBB; 9657 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 9658 .addReg(ZeroReg).addReg(PtrReg); 9659 BB->addSuccessor(exitMBB); 9660 9661 // exitMBB: 9662 // ... 9663 BB = exitMBB; 9664 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 9665 .addReg(ShiftReg); 9666 } else if (MI.getOpcode() == PPC::FADDrtz) { 9667 // This pseudo performs an FADD with rounding mode temporarily forced 9668 // to round-to-zero. We emit this via custom inserter since the FPSCR 9669 // is not modeled at the SelectionDAG level. 9670 unsigned Dest = MI.getOperand(0).getReg(); 9671 unsigned Src1 = MI.getOperand(1).getReg(); 9672 unsigned Src2 = MI.getOperand(2).getReg(); 9673 DebugLoc dl = MI.getDebugLoc(); 9674 9675 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9676 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 9677 9678 // Save FPSCR value. 9679 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 9680 9681 // Set rounding mode to round-to-zero. 9682 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 9683 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 9684 9685 // Perform addition. 9686 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 9687 9688 // Restore FPSCR value. 9689 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); 9690 } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 9691 MI.getOpcode() == PPC::ANDIo_1_GT_BIT || 9692 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 9693 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) { 9694 unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 9695 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) 9696 ? PPC::ANDIo8 9697 : PPC::ANDIo; 9698 bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || 9699 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); 9700 9701 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9702 unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? 9703 &PPC::GPRCRegClass : 9704 &PPC::G8RCRegClass); 9705 9706 DebugLoc dl = MI.getDebugLoc(); 9707 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 9708 .addReg(MI.getOperand(1).getReg()) 9709 .addImm(1); 9710 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 9711 MI.getOperand(0).getReg()) 9712 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 9713 } else if (MI.getOpcode() == PPC::TCHECK_RET) { 9714 DebugLoc Dl = MI.getDebugLoc(); 9715 MachineRegisterInfo &RegInfo = F->getRegInfo(); 9716 unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 9717 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); 9718 return BB; 9719 } else { 9720 llvm_unreachable("Unexpected instr type to insert"); 9721 } 9722 9723 MI.eraseFromParent(); // The pseudo instruction is gone now. 9724 return BB; 9725 } 9726 9727 //===----------------------------------------------------------------------===// 9728 // Target Optimization Hooks 9729 //===----------------------------------------------------------------------===// 9730 9731 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { 9732 // For the estimates, convergence is quadratic, so we essentially double the 9733 // number of digits correct after every iteration. For both FRE and FRSQRTE, 9734 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), 9735 // this is 2^-14. IEEE float has 23 digits and double has 52 digits. 9736 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 9737 if (VT.getScalarType() == MVT::f64) 9738 RefinementSteps++; 9739 return RefinementSteps; 9740 } 9741 9742 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, 9743 int Enabled, int &RefinementSteps, 9744 bool &UseOneConstNR, 9745 bool Reciprocal) const { 9746 EVT VT = Operand.getValueType(); 9747 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 9748 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 9749 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 9750 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 9751 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 9752 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 9753 if (RefinementSteps == ReciprocalEstimate::Unspecified) 9754 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 9755 9756 UseOneConstNR = true; 9757 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); 9758 } 9759 return SDValue(); 9760 } 9761 9762 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, 9763 int Enabled, 9764 int &RefinementSteps) const { 9765 EVT VT = Operand.getValueType(); 9766 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 9767 (VT == MVT::f64 && Subtarget.hasFRE()) || 9768 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 9769 (VT == MVT::v2f64 && Subtarget.hasVSX()) || 9770 (VT == MVT::v4f32 && Subtarget.hasQPX()) || 9771 (VT == MVT::v4f64 && Subtarget.hasQPX())) { 9772 if (RefinementSteps == ReciprocalEstimate::Unspecified) 9773 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 9774 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); 9775 } 9776 return SDValue(); 9777 } 9778 9779 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { 9780 // Note: This functionality is used only when unsafe-fp-math is enabled, and 9781 // on cores with reciprocal estimates (which are used when unsafe-fp-math is 9782 // enabled for division), this functionality is redundant with the default 9783 // combiner logic (once the division -> reciprocal/multiply transformation 9784 // has taken place). As a result, this matters more for older cores than for 9785 // newer ones. 9786 9787 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 9788 // reciprocal if there are two or more FDIVs (for embedded cores with only 9789 // one FP pipeline) for three or more FDIVs (for generic OOO cores). 9790 switch (Subtarget.getDarwinDirective()) { 9791 default: 9792 return 3; 9793 case PPC::DIR_440: 9794 case PPC::DIR_A2: 9795 case PPC::DIR_E500mc: 9796 case PPC::DIR_E5500: 9797 return 2; 9798 } 9799 } 9800 9801 // isConsecutiveLSLoc needs to work even if all adds have not yet been 9802 // collapsed, and so we need to look through chains of them. 9803 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, 9804 int64_t& Offset, SelectionDAG &DAG) { 9805 if (DAG.isBaseWithConstantOffset(Loc)) { 9806 Base = Loc.getOperand(0); 9807 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); 9808 9809 // The base might itself be a base plus an offset, and if so, accumulate 9810 // that as well. 9811 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); 9812 } 9813 } 9814 9815 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, 9816 unsigned Bytes, int Dist, 9817 SelectionDAG &DAG) { 9818 if (VT.getSizeInBits() / 8 != Bytes) 9819 return false; 9820 9821 SDValue BaseLoc = Base->getBasePtr(); 9822 if (Loc.getOpcode() == ISD::FrameIndex) { 9823 if (BaseLoc.getOpcode() != ISD::FrameIndex) 9824 return false; 9825 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 9826 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 9827 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 9828 int FS = MFI.getObjectSize(FI); 9829 int BFS = MFI.getObjectSize(BFI); 9830 if (FS != BFS || FS != (int)Bytes) return false; 9831 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); 9832 } 9833 9834 SDValue Base1 = Loc, Base2 = BaseLoc; 9835 int64_t Offset1 = 0, Offset2 = 0; 9836 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); 9837 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); 9838 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) 9839 return true; 9840 9841 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9842 const GlobalValue *GV1 = nullptr; 9843 const GlobalValue *GV2 = nullptr; 9844 Offset1 = 0; 9845 Offset2 = 0; 9846 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 9847 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 9848 if (isGA1 && isGA2 && GV1 == GV2) 9849 return Offset1 == (Offset2 + Dist*Bytes); 9850 return false; 9851 } 9852 9853 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 9854 // not enforce equality of the chain operands. 9855 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, 9856 unsigned Bytes, int Dist, 9857 SelectionDAG &DAG) { 9858 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { 9859 EVT VT = LS->getMemoryVT(); 9860 SDValue Loc = LS->getBasePtr(); 9861 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); 9862 } 9863 9864 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 9865 EVT VT; 9866 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9867 default: return false; 9868 case Intrinsic::ppc_qpx_qvlfd: 9869 case Intrinsic::ppc_qpx_qvlfda: 9870 VT = MVT::v4f64; 9871 break; 9872 case Intrinsic::ppc_qpx_qvlfs: 9873 case Intrinsic::ppc_qpx_qvlfsa: 9874 VT = MVT::v4f32; 9875 break; 9876 case Intrinsic::ppc_qpx_qvlfcd: 9877 case Intrinsic::ppc_qpx_qvlfcda: 9878 VT = MVT::v2f64; 9879 break; 9880 case Intrinsic::ppc_qpx_qvlfcs: 9881 case Intrinsic::ppc_qpx_qvlfcsa: 9882 VT = MVT::v2f32; 9883 break; 9884 case Intrinsic::ppc_qpx_qvlfiwa: 9885 case Intrinsic::ppc_qpx_qvlfiwz: 9886 case Intrinsic::ppc_altivec_lvx: 9887 case Intrinsic::ppc_altivec_lvxl: 9888 case Intrinsic::ppc_vsx_lxvw4x: 9889 case Intrinsic::ppc_vsx_lxvw4x_be: 9890 VT = MVT::v4i32; 9891 break; 9892 case Intrinsic::ppc_vsx_lxvd2x: 9893 case Intrinsic::ppc_vsx_lxvd2x_be: 9894 VT = MVT::v2f64; 9895 break; 9896 case Intrinsic::ppc_altivec_lvebx: 9897 VT = MVT::i8; 9898 break; 9899 case Intrinsic::ppc_altivec_lvehx: 9900 VT = MVT::i16; 9901 break; 9902 case Intrinsic::ppc_altivec_lvewx: 9903 VT = MVT::i32; 9904 break; 9905 } 9906 9907 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); 9908 } 9909 9910 if (N->getOpcode() == ISD::INTRINSIC_VOID) { 9911 EVT VT; 9912 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9913 default: return false; 9914 case Intrinsic::ppc_qpx_qvstfd: 9915 case Intrinsic::ppc_qpx_qvstfda: 9916 VT = MVT::v4f64; 9917 break; 9918 case Intrinsic::ppc_qpx_qvstfs: 9919 case Intrinsic::ppc_qpx_qvstfsa: 9920 VT = MVT::v4f32; 9921 break; 9922 case Intrinsic::ppc_qpx_qvstfcd: 9923 case Intrinsic::ppc_qpx_qvstfcda: 9924 VT = MVT::v2f64; 9925 break; 9926 case Intrinsic::ppc_qpx_qvstfcs: 9927 case Intrinsic::ppc_qpx_qvstfcsa: 9928 VT = MVT::v2f32; 9929 break; 9930 case Intrinsic::ppc_qpx_qvstfiw: 9931 case Intrinsic::ppc_qpx_qvstfiwa: 9932 case Intrinsic::ppc_altivec_stvx: 9933 case Intrinsic::ppc_altivec_stvxl: 9934 case Intrinsic::ppc_vsx_stxvw4x: 9935 VT = MVT::v4i32; 9936 break; 9937 case Intrinsic::ppc_vsx_stxvd2x: 9938 VT = MVT::v2f64; 9939 break; 9940 case Intrinsic::ppc_vsx_stxvw4x_be: 9941 VT = MVT::v4i32; 9942 break; 9943 case Intrinsic::ppc_vsx_stxvd2x_be: 9944 VT = MVT::v2f64; 9945 break; 9946 case Intrinsic::ppc_altivec_stvebx: 9947 VT = MVT::i8; 9948 break; 9949 case Intrinsic::ppc_altivec_stvehx: 9950 VT = MVT::i16; 9951 break; 9952 case Intrinsic::ppc_altivec_stvewx: 9953 VT = MVT::i32; 9954 break; 9955 } 9956 9957 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); 9958 } 9959 9960 return false; 9961 } 9962 9963 // Return true is there is a nearyby consecutive load to the one provided 9964 // (regardless of alignment). We search up and down the chain, looking though 9965 // token factors and other loads (but nothing else). As a result, a true result 9966 // indicates that it is safe to create a new consecutive load adjacent to the 9967 // load provided. 9968 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 9969 SDValue Chain = LD->getChain(); 9970 EVT VT = LD->getMemoryVT(); 9971 9972 SmallSet<SDNode *, 16> LoadRoots; 9973 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 9974 SmallSet<SDNode *, 16> Visited; 9975 9976 // First, search up the chain, branching to follow all token-factor operands. 9977 // If we find a consecutive load, then we're done, otherwise, record all 9978 // nodes just above the top-level loads and token factors. 9979 while (!Queue.empty()) { 9980 SDNode *ChainNext = Queue.pop_back_val(); 9981 if (!Visited.insert(ChainNext).second) 9982 continue; 9983 9984 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { 9985 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 9986 return true; 9987 9988 if (!Visited.count(ChainLD->getChain().getNode())) 9989 Queue.push_back(ChainLD->getChain().getNode()); 9990 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 9991 for (const SDUse &O : ChainNext->ops()) 9992 if (!Visited.count(O.getNode())) 9993 Queue.push_back(O.getNode()); 9994 } else 9995 LoadRoots.insert(ChainNext); 9996 } 9997 9998 // Second, search down the chain, starting from the top-level nodes recorded 9999 // in the first phase. These top-level nodes are the nodes just above all 10000 // loads and token factors. Starting with their uses, recursively look though 10001 // all loads (just the chain uses) and token factors to find a consecutive 10002 // load. 10003 Visited.clear(); 10004 Queue.clear(); 10005 10006 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 10007 IE = LoadRoots.end(); I != IE; ++I) { 10008 Queue.push_back(*I); 10009 10010 while (!Queue.empty()) { 10011 SDNode *LoadRoot = Queue.pop_back_val(); 10012 if (!Visited.insert(LoadRoot).second) 10013 continue; 10014 10015 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) 10016 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 10017 return true; 10018 10019 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 10020 UE = LoadRoot->use_end(); UI != UE; ++UI) 10021 if (((isa<MemSDNode>(*UI) && 10022 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || 10023 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 10024 Queue.push_back(*UI); 10025 } 10026 } 10027 10028 return false; 10029 } 10030 10031 10032 /// This function is called when we have proved that a SETCC node can be replaced 10033 /// by subtraction (and other supporting instructions) so that the result of 10034 /// comparison is kept in a GPR instead of CR. This function is purely for 10035 /// codegen purposes and has some flags to guide the codegen process. 10036 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, 10037 bool Swap, SDLoc &DL, SelectionDAG &DAG) { 10038 10039 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); 10040 10041 // Zero extend the operands to the largest legal integer. Originally, they 10042 // must be of a strictly smaller size. 10043 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0), 10044 DAG.getConstant(Size, DL, MVT::i32)); 10045 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1), 10046 DAG.getConstant(Size, DL, MVT::i32)); 10047 10048 // Swap if needed. Depends on the condition code. 10049 if (Swap) 10050 std::swap(Op0, Op1); 10051 10052 // Subtract extended integers. 10053 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1); 10054 10055 // Move the sign bit to the least significant position and zero out the rest. 10056 // Now the least significant bit carries the result of original comparison. 10057 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode, 10058 DAG.getConstant(Size - 1, DL, MVT::i32)); 10059 auto Final = Shifted; 10060 10061 // Complement the result if needed. Based on the condition code. 10062 if (Complement) 10063 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted, 10064 DAG.getConstant(1, DL, MVT::i64)); 10065 10066 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final); 10067 } 10068 10069 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, 10070 DAGCombinerInfo &DCI) const { 10071 10072 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); 10073 10074 SelectionDAG &DAG = DCI.DAG; 10075 SDLoc DL(N); 10076 10077 // Size of integers being compared has a critical role in the following 10078 // analysis, so we prefer to do this when all types are legal. 10079 if (!DCI.isAfterLegalizeVectorOps()) 10080 return SDValue(); 10081 10082 // If all users of SETCC extend its value to a legal integer type 10083 // then we replace SETCC with a subtraction 10084 for (SDNode::use_iterator UI = N->use_begin(), 10085 UE = N->use_end(); UI != UE; ++UI) { 10086 if (UI->getOpcode() != ISD::ZERO_EXTEND) 10087 return SDValue(); 10088 } 10089 10090 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 10091 auto OpSize = N->getOperand(0).getValueSizeInBits(); 10092 10093 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits(); 10094 10095 if (OpSize < Size) { 10096 switch (CC) { 10097 default: break; 10098 case ISD::SETULT: 10099 return generateEquivalentSub(N, Size, false, false, DL, DAG); 10100 case ISD::SETULE: 10101 return generateEquivalentSub(N, Size, true, true, DL, DAG); 10102 case ISD::SETUGT: 10103 return generateEquivalentSub(N, Size, false, true, DL, DAG); 10104 case ISD::SETUGE: 10105 return generateEquivalentSub(N, Size, true, false, DL, DAG); 10106 } 10107 } 10108 10109 return SDValue(); 10110 } 10111 10112 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 10113 DAGCombinerInfo &DCI) const { 10114 SelectionDAG &DAG = DCI.DAG; 10115 SDLoc dl(N); 10116 10117 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); 10118 // If we're tracking CR bits, we need to be careful that we don't have: 10119 // trunc(binary-ops(zext(x), zext(y))) 10120 // or 10121 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 10122 // such that we're unnecessarily moving things into GPRs when it would be 10123 // better to keep them in CR bits. 10124 10125 // Note that trunc here can be an actual i1 trunc, or can be the effective 10126 // truncation that comes from a setcc or select_cc. 10127 if (N->getOpcode() == ISD::TRUNCATE && 10128 N->getValueType(0) != MVT::i1) 10129 return SDValue(); 10130 10131 if (N->getOperand(0).getValueType() != MVT::i32 && 10132 N->getOperand(0).getValueType() != MVT::i64) 10133 return SDValue(); 10134 10135 if (N->getOpcode() == ISD::SETCC || 10136 N->getOpcode() == ISD::SELECT_CC) { 10137 // If we're looking at a comparison, then we need to make sure that the 10138 // high bits (all except for the first) don't matter the result. 10139 ISD::CondCode CC = 10140 cast<CondCodeSDNode>(N->getOperand( 10141 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 10142 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 10143 10144 if (ISD::isSignedIntSetCC(CC)) { 10145 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 10146 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 10147 return SDValue(); 10148 } else if (ISD::isUnsignedIntSetCC(CC)) { 10149 if (!DAG.MaskedValueIsZero(N->getOperand(0), 10150 APInt::getHighBitsSet(OpBits, OpBits-1)) || 10151 !DAG.MaskedValueIsZero(N->getOperand(1), 10152 APInt::getHighBitsSet(OpBits, OpBits-1))) 10153 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI) 10154 : SDValue()); 10155 } else { 10156 // This is neither a signed nor an unsigned comparison, just make sure 10157 // that the high bits are equal. 10158 APInt Op1Zero, Op1One; 10159 APInt Op2Zero, Op2One; 10160 DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); 10161 DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); 10162 10163 // We don't really care about what is known about the first bit (if 10164 // anything), so clear it in all masks prior to comparing them. 10165 Op1Zero.clearBit(0); Op1One.clearBit(0); 10166 Op2Zero.clearBit(0); Op2One.clearBit(0); 10167 10168 if (Op1Zero != Op2Zero || Op1One != Op2One) 10169 return SDValue(); 10170 } 10171 } 10172 10173 // We now know that the higher-order bits are irrelevant, we just need to 10174 // make sure that all of the intermediate operations are bit operations, and 10175 // all inputs are extensions. 10176 if (N->getOperand(0).getOpcode() != ISD::AND && 10177 N->getOperand(0).getOpcode() != ISD::OR && 10178 N->getOperand(0).getOpcode() != ISD::XOR && 10179 N->getOperand(0).getOpcode() != ISD::SELECT && 10180 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 10181 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 10182 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 10183 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 10184 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 10185 return SDValue(); 10186 10187 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 10188 N->getOperand(1).getOpcode() != ISD::AND && 10189 N->getOperand(1).getOpcode() != ISD::OR && 10190 N->getOperand(1).getOpcode() != ISD::XOR && 10191 N->getOperand(1).getOpcode() != ISD::SELECT && 10192 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 10193 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 10194 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 10195 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 10196 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 10197 return SDValue(); 10198 10199 SmallVector<SDValue, 4> Inputs; 10200 SmallVector<SDValue, 8> BinOps, PromOps; 10201 SmallPtrSet<SDNode *, 16> Visited; 10202 10203 for (unsigned i = 0; i < 2; ++i) { 10204 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 10205 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 10206 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 10207 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 10208 isa<ConstantSDNode>(N->getOperand(i))) 10209 Inputs.push_back(N->getOperand(i)); 10210 else 10211 BinOps.push_back(N->getOperand(i)); 10212 10213 if (N->getOpcode() == ISD::TRUNCATE) 10214 break; 10215 } 10216 10217 // Visit all inputs, collect all binary operations (and, or, xor and 10218 // select) that are all fed by extensions. 10219 while (!BinOps.empty()) { 10220 SDValue BinOp = BinOps.back(); 10221 BinOps.pop_back(); 10222 10223 if (!Visited.insert(BinOp.getNode()).second) 10224 continue; 10225 10226 PromOps.push_back(BinOp); 10227 10228 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 10229 // The condition of the select is not promoted. 10230 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 10231 continue; 10232 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 10233 continue; 10234 10235 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 10236 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 10237 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 10238 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 10239 isa<ConstantSDNode>(BinOp.getOperand(i))) { 10240 Inputs.push_back(BinOp.getOperand(i)); 10241 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 10242 BinOp.getOperand(i).getOpcode() == ISD::OR || 10243 BinOp.getOperand(i).getOpcode() == ISD::XOR || 10244 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 10245 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 10246 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 10247 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 10248 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 10249 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 10250 BinOps.push_back(BinOp.getOperand(i)); 10251 } else { 10252 // We have an input that is not an extension or another binary 10253 // operation; we'll abort this transformation. 10254 return SDValue(); 10255 } 10256 } 10257 } 10258 10259 // Make sure that this is a self-contained cluster of operations (which 10260 // is not quite the same thing as saying that everything has only one 10261 // use). 10262 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10263 if (isa<ConstantSDNode>(Inputs[i])) 10264 continue; 10265 10266 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 10267 UE = Inputs[i].getNode()->use_end(); 10268 UI != UE; ++UI) { 10269 SDNode *User = *UI; 10270 if (User != N && !Visited.count(User)) 10271 return SDValue(); 10272 10273 // Make sure that we're not going to promote the non-output-value 10274 // operand(s) or SELECT or SELECT_CC. 10275 // FIXME: Although we could sometimes handle this, and it does occur in 10276 // practice that one of the condition inputs to the select is also one of 10277 // the outputs, we currently can't deal with this. 10278 if (User->getOpcode() == ISD::SELECT) { 10279 if (User->getOperand(0) == Inputs[i]) 10280 return SDValue(); 10281 } else if (User->getOpcode() == ISD::SELECT_CC) { 10282 if (User->getOperand(0) == Inputs[i] || 10283 User->getOperand(1) == Inputs[i]) 10284 return SDValue(); 10285 } 10286 } 10287 } 10288 10289 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 10290 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 10291 UE = PromOps[i].getNode()->use_end(); 10292 UI != UE; ++UI) { 10293 SDNode *User = *UI; 10294 if (User != N && !Visited.count(User)) 10295 return SDValue(); 10296 10297 // Make sure that we're not going to promote the non-output-value 10298 // operand(s) or SELECT or SELECT_CC. 10299 // FIXME: Although we could sometimes handle this, and it does occur in 10300 // practice that one of the condition inputs to the select is also one of 10301 // the outputs, we currently can't deal with this. 10302 if (User->getOpcode() == ISD::SELECT) { 10303 if (User->getOperand(0) == PromOps[i]) 10304 return SDValue(); 10305 } else if (User->getOpcode() == ISD::SELECT_CC) { 10306 if (User->getOperand(0) == PromOps[i] || 10307 User->getOperand(1) == PromOps[i]) 10308 return SDValue(); 10309 } 10310 } 10311 } 10312 10313 // Replace all inputs with the extension operand. 10314 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10315 // Constants may have users outside the cluster of to-be-promoted nodes, 10316 // and so we need to replace those as we do the promotions. 10317 if (isa<ConstantSDNode>(Inputs[i])) 10318 continue; 10319 else 10320 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 10321 } 10322 10323 std::list<HandleSDNode> PromOpHandles; 10324 for (auto &PromOp : PromOps) 10325 PromOpHandles.emplace_back(PromOp); 10326 10327 // Replace all operations (these are all the same, but have a different 10328 // (i1) return type). DAG.getNode will validate that the types of 10329 // a binary operator match, so go through the list in reverse so that 10330 // we've likely promoted both operands first. Any intermediate truncations or 10331 // extensions disappear. 10332 while (!PromOpHandles.empty()) { 10333 SDValue PromOp = PromOpHandles.back().getValue(); 10334 PromOpHandles.pop_back(); 10335 10336 if (PromOp.getOpcode() == ISD::TRUNCATE || 10337 PromOp.getOpcode() == ISD::SIGN_EXTEND || 10338 PromOp.getOpcode() == ISD::ZERO_EXTEND || 10339 PromOp.getOpcode() == ISD::ANY_EXTEND) { 10340 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 10341 PromOp.getOperand(0).getValueType() != MVT::i1) { 10342 // The operand is not yet ready (see comment below). 10343 PromOpHandles.emplace_front(PromOp); 10344 continue; 10345 } 10346 10347 SDValue RepValue = PromOp.getOperand(0); 10348 if (isa<ConstantSDNode>(RepValue)) 10349 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 10350 10351 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 10352 continue; 10353 } 10354 10355 unsigned C; 10356 switch (PromOp.getOpcode()) { 10357 default: C = 0; break; 10358 case ISD::SELECT: C = 1; break; 10359 case ISD::SELECT_CC: C = 2; break; 10360 } 10361 10362 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 10363 PromOp.getOperand(C).getValueType() != MVT::i1) || 10364 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 10365 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 10366 // The to-be-promoted operands of this node have not yet been 10367 // promoted (this should be rare because we're going through the 10368 // list backward, but if one of the operands has several users in 10369 // this cluster of to-be-promoted nodes, it is possible). 10370 PromOpHandles.emplace_front(PromOp); 10371 continue; 10372 } 10373 10374 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 10375 PromOp.getNode()->op_end()); 10376 10377 // If there are any constant inputs, make sure they're replaced now. 10378 for (unsigned i = 0; i < 2; ++i) 10379 if (isa<ConstantSDNode>(Ops[C+i])) 10380 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 10381 10382 DAG.ReplaceAllUsesOfValueWith(PromOp, 10383 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 10384 } 10385 10386 // Now we're left with the initial truncation itself. 10387 if (N->getOpcode() == ISD::TRUNCATE) 10388 return N->getOperand(0); 10389 10390 // Otherwise, this is a comparison. The operands to be compared have just 10391 // changed type (to i1), but everything else is the same. 10392 return SDValue(N, 0); 10393 } 10394 10395 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 10396 DAGCombinerInfo &DCI) const { 10397 SelectionDAG &DAG = DCI.DAG; 10398 SDLoc dl(N); 10399 10400 // If we're tracking CR bits, we need to be careful that we don't have: 10401 // zext(binary-ops(trunc(x), trunc(y))) 10402 // or 10403 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 10404 // such that we're unnecessarily moving things into CR bits that can more 10405 // efficiently stay in GPRs. Note that if we're not certain that the high 10406 // bits are set as required by the final extension, we still may need to do 10407 // some masking to get the proper behavior. 10408 10409 // This same functionality is important on PPC64 when dealing with 10410 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 10411 // the return values of functions. Because it is so similar, it is handled 10412 // here as well. 10413 10414 if (N->getValueType(0) != MVT::i32 && 10415 N->getValueType(0) != MVT::i64) 10416 return SDValue(); 10417 10418 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || 10419 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) 10420 return SDValue(); 10421 10422 if (N->getOperand(0).getOpcode() != ISD::AND && 10423 N->getOperand(0).getOpcode() != ISD::OR && 10424 N->getOperand(0).getOpcode() != ISD::XOR && 10425 N->getOperand(0).getOpcode() != ISD::SELECT && 10426 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 10427 return SDValue(); 10428 10429 SmallVector<SDValue, 4> Inputs; 10430 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 10431 SmallPtrSet<SDNode *, 16> Visited; 10432 10433 // Visit all inputs, collect all binary operations (and, or, xor and 10434 // select) that are all fed by truncations. 10435 while (!BinOps.empty()) { 10436 SDValue BinOp = BinOps.back(); 10437 BinOps.pop_back(); 10438 10439 if (!Visited.insert(BinOp.getNode()).second) 10440 continue; 10441 10442 PromOps.push_back(BinOp); 10443 10444 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 10445 // The condition of the select is not promoted. 10446 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 10447 continue; 10448 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 10449 continue; 10450 10451 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 10452 isa<ConstantSDNode>(BinOp.getOperand(i))) { 10453 Inputs.push_back(BinOp.getOperand(i)); 10454 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 10455 BinOp.getOperand(i).getOpcode() == ISD::OR || 10456 BinOp.getOperand(i).getOpcode() == ISD::XOR || 10457 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 10458 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 10459 BinOps.push_back(BinOp.getOperand(i)); 10460 } else { 10461 // We have an input that is not a truncation or another binary 10462 // operation; we'll abort this transformation. 10463 return SDValue(); 10464 } 10465 } 10466 } 10467 10468 // The operands of a select that must be truncated when the select is 10469 // promoted because the operand is actually part of the to-be-promoted set. 10470 DenseMap<SDNode *, EVT> SelectTruncOp[2]; 10471 10472 // Make sure that this is a self-contained cluster of operations (which 10473 // is not quite the same thing as saying that everything has only one 10474 // use). 10475 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10476 if (isa<ConstantSDNode>(Inputs[i])) 10477 continue; 10478 10479 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 10480 UE = Inputs[i].getNode()->use_end(); 10481 UI != UE; ++UI) { 10482 SDNode *User = *UI; 10483 if (User != N && !Visited.count(User)) 10484 return SDValue(); 10485 10486 // If we're going to promote the non-output-value operand(s) or SELECT or 10487 // SELECT_CC, record them for truncation. 10488 if (User->getOpcode() == ISD::SELECT) { 10489 if (User->getOperand(0) == Inputs[i]) 10490 SelectTruncOp[0].insert(std::make_pair(User, 10491 User->getOperand(0).getValueType())); 10492 } else if (User->getOpcode() == ISD::SELECT_CC) { 10493 if (User->getOperand(0) == Inputs[i]) 10494 SelectTruncOp[0].insert(std::make_pair(User, 10495 User->getOperand(0).getValueType())); 10496 if (User->getOperand(1) == Inputs[i]) 10497 SelectTruncOp[1].insert(std::make_pair(User, 10498 User->getOperand(1).getValueType())); 10499 } 10500 } 10501 } 10502 10503 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 10504 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 10505 UE = PromOps[i].getNode()->use_end(); 10506 UI != UE; ++UI) { 10507 SDNode *User = *UI; 10508 if (User != N && !Visited.count(User)) 10509 return SDValue(); 10510 10511 // If we're going to promote the non-output-value operand(s) or SELECT or 10512 // SELECT_CC, record them for truncation. 10513 if (User->getOpcode() == ISD::SELECT) { 10514 if (User->getOperand(0) == PromOps[i]) 10515 SelectTruncOp[0].insert(std::make_pair(User, 10516 User->getOperand(0).getValueType())); 10517 } else if (User->getOpcode() == ISD::SELECT_CC) { 10518 if (User->getOperand(0) == PromOps[i]) 10519 SelectTruncOp[0].insert(std::make_pair(User, 10520 User->getOperand(0).getValueType())); 10521 if (User->getOperand(1) == PromOps[i]) 10522 SelectTruncOp[1].insert(std::make_pair(User, 10523 User->getOperand(1).getValueType())); 10524 } 10525 } 10526 } 10527 10528 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 10529 bool ReallyNeedsExt = false; 10530 if (N->getOpcode() != ISD::ANY_EXTEND) { 10531 // If all of the inputs are not already sign/zero extended, then 10532 // we'll still need to do that at the end. 10533 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10534 if (isa<ConstantSDNode>(Inputs[i])) 10535 continue; 10536 10537 unsigned OpBits = 10538 Inputs[i].getOperand(0).getValueSizeInBits(); 10539 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 10540 10541 if ((N->getOpcode() == ISD::ZERO_EXTEND && 10542 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 10543 APInt::getHighBitsSet(OpBits, 10544 OpBits-PromBits))) || 10545 (N->getOpcode() == ISD::SIGN_EXTEND && 10546 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 10547 (OpBits-(PromBits-1)))) { 10548 ReallyNeedsExt = true; 10549 break; 10550 } 10551 } 10552 } 10553 10554 // Replace all inputs, either with the truncation operand, or a 10555 // truncation or extension to the final output type. 10556 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 10557 // Constant inputs need to be replaced with the to-be-promoted nodes that 10558 // use them because they might have users outside of the cluster of 10559 // promoted nodes. 10560 if (isa<ConstantSDNode>(Inputs[i])) 10561 continue; 10562 10563 SDValue InSrc = Inputs[i].getOperand(0); 10564 if (Inputs[i].getValueType() == N->getValueType(0)) 10565 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 10566 else if (N->getOpcode() == ISD::SIGN_EXTEND) 10567 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10568 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 10569 else if (N->getOpcode() == ISD::ZERO_EXTEND) 10570 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10571 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 10572 else 10573 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 10574 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 10575 } 10576 10577 std::list<HandleSDNode> PromOpHandles; 10578 for (auto &PromOp : PromOps) 10579 PromOpHandles.emplace_back(PromOp); 10580 10581 // Replace all operations (these are all the same, but have a different 10582 // (promoted) return type). DAG.getNode will validate that the types of 10583 // a binary operator match, so go through the list in reverse so that 10584 // we've likely promoted both operands first. 10585 while (!PromOpHandles.empty()) { 10586 SDValue PromOp = PromOpHandles.back().getValue(); 10587 PromOpHandles.pop_back(); 10588 10589 unsigned C; 10590 switch (PromOp.getOpcode()) { 10591 default: C = 0; break; 10592 case ISD::SELECT: C = 1; break; 10593 case ISD::SELECT_CC: C = 2; break; 10594 } 10595 10596 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 10597 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 10598 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 10599 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 10600 // The to-be-promoted operands of this node have not yet been 10601 // promoted (this should be rare because we're going through the 10602 // list backward, but if one of the operands has several users in 10603 // this cluster of to-be-promoted nodes, it is possible). 10604 PromOpHandles.emplace_front(PromOp); 10605 continue; 10606 } 10607 10608 // For SELECT and SELECT_CC nodes, we do a similar check for any 10609 // to-be-promoted comparison inputs. 10610 if (PromOp.getOpcode() == ISD::SELECT || 10611 PromOp.getOpcode() == ISD::SELECT_CC) { 10612 if ((SelectTruncOp[0].count(PromOp.getNode()) && 10613 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || 10614 (SelectTruncOp[1].count(PromOp.getNode()) && 10615 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { 10616 PromOpHandles.emplace_front(PromOp); 10617 continue; 10618 } 10619 } 10620 10621 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 10622 PromOp.getNode()->op_end()); 10623 10624 // If this node has constant inputs, then they'll need to be promoted here. 10625 for (unsigned i = 0; i < 2; ++i) { 10626 if (!isa<ConstantSDNode>(Ops[C+i])) 10627 continue; 10628 if (Ops[C+i].getValueType() == N->getValueType(0)) 10629 continue; 10630 10631 if (N->getOpcode() == ISD::SIGN_EXTEND) 10632 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10633 else if (N->getOpcode() == ISD::ZERO_EXTEND) 10634 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10635 else 10636 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 10637 } 10638 10639 // If we've promoted the comparison inputs of a SELECT or SELECT_CC, 10640 // truncate them again to the original value type. 10641 if (PromOp.getOpcode() == ISD::SELECT || 10642 PromOp.getOpcode() == ISD::SELECT_CC) { 10643 auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); 10644 if (SI0 != SelectTruncOp[0].end()) 10645 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); 10646 auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); 10647 if (SI1 != SelectTruncOp[1].end()) 10648 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); 10649 } 10650 10651 DAG.ReplaceAllUsesOfValueWith(PromOp, 10652 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 10653 } 10654 10655 // Now we're left with the initial extension itself. 10656 if (!ReallyNeedsExt) 10657 return N->getOperand(0); 10658 10659 // To zero extend, just mask off everything except for the first bit (in the 10660 // i1 case). 10661 if (N->getOpcode() == ISD::ZERO_EXTEND) 10662 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 10663 DAG.getConstant(APInt::getLowBitsSet( 10664 N->getValueSizeInBits(0), PromBits), 10665 dl, N->getValueType(0))); 10666 10667 assert(N->getOpcode() == ISD::SIGN_EXTEND && 10668 "Invalid extension type"); 10669 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); 10670 SDValue ShiftCst = 10671 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); 10672 return DAG.getNode( 10673 ISD::SRA, dl, N->getValueType(0), 10674 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), 10675 ShiftCst); 10676 } 10677 10678 /// \brief Reduces the number of fp-to-int conversion when building a vector. 10679 /// 10680 /// If this vector is built out of floating to integer conversions, 10681 /// transform it to a vector built out of floating point values followed by a 10682 /// single floating to integer conversion of the vector. 10683 /// Namely (build_vector (fptosi $A), (fptosi $B), ...) 10684 /// becomes (fptosi (build_vector ($A, $B, ...))) 10685 SDValue PPCTargetLowering:: 10686 combineElementTruncationToVectorTruncation(SDNode *N, 10687 DAGCombinerInfo &DCI) const { 10688 assert(N->getOpcode() == ISD::BUILD_VECTOR && 10689 "Should be called with a BUILD_VECTOR node"); 10690 10691 SelectionDAG &DAG = DCI.DAG; 10692 SDLoc dl(N); 10693 10694 SDValue FirstInput = N->getOperand(0); 10695 assert(FirstInput.getOpcode() == PPCISD::MFVSR && 10696 "The input operand must be an fp-to-int conversion."); 10697 10698 // This combine happens after legalization so the fp_to_[su]i nodes are 10699 // already converted to PPCSISD nodes. 10700 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode(); 10701 if (FirstConversion == PPCISD::FCTIDZ || 10702 FirstConversion == PPCISD::FCTIDUZ || 10703 FirstConversion == PPCISD::FCTIWZ || 10704 FirstConversion == PPCISD::FCTIWUZ) { 10705 bool IsSplat = true; 10706 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ || 10707 FirstConversion == PPCISD::FCTIWUZ; 10708 EVT SrcVT = FirstInput.getOperand(0).getValueType(); 10709 SmallVector<SDValue, 4> Ops; 10710 EVT TargetVT = N->getValueType(0); 10711 for (int i = 0, e = N->getNumOperands(); i < e; ++i) { 10712 if (N->getOperand(i).getOpcode() != PPCISD::MFVSR) 10713 return SDValue(); 10714 unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode(); 10715 if (NextConversion != FirstConversion) 10716 return SDValue(); 10717 if (N->getOperand(i) != FirstInput) 10718 IsSplat = false; 10719 } 10720 10721 // If this is a splat, we leave it as-is since there will be only a single 10722 // fp-to-int conversion followed by a splat of the integer. This is better 10723 // for 32-bit and smaller ints and neutral for 64-bit ints. 10724 if (IsSplat) 10725 return SDValue(); 10726 10727 // Now that we know we have the right type of node, get its operands 10728 for (int i = 0, e = N->getNumOperands(); i < e; ++i) { 10729 SDValue In = N->getOperand(i).getOperand(0); 10730 // For 32-bit values, we need to add an FP_ROUND node. 10731 if (Is32Bit) { 10732 if (In.isUndef()) 10733 Ops.push_back(DAG.getUNDEF(SrcVT)); 10734 else { 10735 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl, 10736 MVT::f32, In.getOperand(0), 10737 DAG.getIntPtrConstant(1, dl)); 10738 Ops.push_back(Trunc); 10739 } 10740 } else 10741 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0)); 10742 } 10743 10744 unsigned Opcode; 10745 if (FirstConversion == PPCISD::FCTIDZ || 10746 FirstConversion == PPCISD::FCTIWZ) 10747 Opcode = ISD::FP_TO_SINT; 10748 else 10749 Opcode = ISD::FP_TO_UINT; 10750 10751 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32; 10752 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops); 10753 return DAG.getNode(Opcode, dl, TargetVT, BV); 10754 } 10755 return SDValue(); 10756 } 10757 10758 /// \brief Reduce the number of loads when building a vector. 10759 /// 10760 /// Building a vector out of multiple loads can be converted to a load 10761 /// of the vector type if the loads are consecutive. If the loads are 10762 /// consecutive but in descending order, a shuffle is added at the end 10763 /// to reorder the vector. 10764 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { 10765 assert(N->getOpcode() == ISD::BUILD_VECTOR && 10766 "Should be called with a BUILD_VECTOR node"); 10767 10768 SDLoc dl(N); 10769 bool InputsAreConsecutiveLoads = true; 10770 bool InputsAreReverseConsecutive = true; 10771 unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8; 10772 SDValue FirstInput = N->getOperand(0); 10773 bool IsRoundOfExtLoad = false; 10774 10775 if (FirstInput.getOpcode() == ISD::FP_ROUND && 10776 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) { 10777 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0)); 10778 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD; 10779 } 10780 // Not a build vector of (possibly fp_rounded) loads. 10781 if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) 10782 return SDValue(); 10783 10784 for (int i = 1, e = N->getNumOperands(); i < e; ++i) { 10785 // If any inputs are fp_round(extload), they all must be. 10786 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND) 10787 return SDValue(); 10788 10789 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) : 10790 N->getOperand(i); 10791 if (NextInput.getOpcode() != ISD::LOAD) 10792 return SDValue(); 10793 10794 SDValue PreviousInput = 10795 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1); 10796 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput); 10797 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput); 10798 10799 // If any inputs are fp_round(extload), they all must be. 10800 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD) 10801 return SDValue(); 10802 10803 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG)) 10804 InputsAreConsecutiveLoads = false; 10805 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG)) 10806 InputsAreReverseConsecutive = false; 10807 10808 // Exit early if the loads are neither consecutive nor reverse consecutive. 10809 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive) 10810 return SDValue(); 10811 } 10812 10813 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && 10814 "The loads cannot be both consecutive and reverse consecutive."); 10815 10816 SDValue FirstLoadOp = 10817 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput; 10818 SDValue LastLoadOp = 10819 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) : 10820 N->getOperand(N->getNumOperands()-1); 10821 10822 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp); 10823 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp); 10824 if (InputsAreConsecutiveLoads) { 10825 assert(LD1 && "Input needs to be a LoadSDNode."); 10826 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(), 10827 LD1->getBasePtr(), LD1->getPointerInfo(), 10828 LD1->getAlignment()); 10829 } 10830 if (InputsAreReverseConsecutive) { 10831 assert(LDL && "Input needs to be a LoadSDNode."); 10832 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), 10833 LDL->getBasePtr(), LDL->getPointerInfo(), 10834 LDL->getAlignment()); 10835 SmallVector<int, 16> Ops; 10836 for (int i = N->getNumOperands() - 1; i >= 0; i--) 10837 Ops.push_back(i); 10838 10839 return DAG.getVectorShuffle(N->getValueType(0), dl, Load, 10840 DAG.getUNDEF(N->getValueType(0)), Ops); 10841 } 10842 return SDValue(); 10843 } 10844 10845 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, 10846 DAGCombinerInfo &DCI) const { 10847 assert(N->getOpcode() == ISD::BUILD_VECTOR && 10848 "Should be called with a BUILD_VECTOR node"); 10849 10850 SelectionDAG &DAG = DCI.DAG; 10851 SDLoc dl(N); 10852 10853 if (!Subtarget.hasVSX()) 10854 return SDValue(); 10855 10856 // The target independent DAG combiner will leave a build_vector of 10857 // float-to-int conversions intact. We can generate MUCH better code for 10858 // a float-to-int conversion of a vector of floats. 10859 SDValue FirstInput = N->getOperand(0); 10860 if (FirstInput.getOpcode() == PPCISD::MFVSR) { 10861 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI); 10862 if (Reduced) 10863 return Reduced; 10864 } 10865 10866 // If we're building a vector out of consecutive loads, just load that 10867 // vector type. 10868 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG); 10869 if (Reduced) 10870 return Reduced; 10871 10872 if (N->getValueType(0) != MVT::v2f64) 10873 return SDValue(); 10874 10875 // Looking for: 10876 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) 10877 if (FirstInput.getOpcode() != ISD::SINT_TO_FP && 10878 FirstInput.getOpcode() != ISD::UINT_TO_FP) 10879 return SDValue(); 10880 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && 10881 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) 10882 return SDValue(); 10883 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode()) 10884 return SDValue(); 10885 10886 SDValue Ext1 = FirstInput.getOperand(0); 10887 SDValue Ext2 = N->getOperand(1).getOperand(0); 10888 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 10889 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 10890 return SDValue(); 10891 10892 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1)); 10893 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1)); 10894 if (!Ext1Op || !Ext2Op) 10895 return SDValue(); 10896 if (Ext1.getValueType() != MVT::i32 || 10897 Ext2.getValueType() != MVT::i32) 10898 if (Ext1.getOperand(0) != Ext2.getOperand(0)) 10899 return SDValue(); 10900 10901 int FirstElem = Ext1Op->getZExtValue(); 10902 int SecondElem = Ext2Op->getZExtValue(); 10903 int SubvecIdx; 10904 if (FirstElem == 0 && SecondElem == 1) 10905 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; 10906 else if (FirstElem == 2 && SecondElem == 3) 10907 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; 10908 else 10909 return SDValue(); 10910 10911 SDValue SrcVec = Ext1.getOperand(0); 10912 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? 10913 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; 10914 return DAG.getNode(NodeType, dl, MVT::v2f64, 10915 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); 10916 } 10917 10918 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, 10919 DAGCombinerInfo &DCI) const { 10920 assert((N->getOpcode() == ISD::SINT_TO_FP || 10921 N->getOpcode() == ISD::UINT_TO_FP) && 10922 "Need an int -> FP conversion node here"); 10923 10924 if (useSoftFloat() || !Subtarget.has64BitSupport()) 10925 return SDValue(); 10926 10927 SelectionDAG &DAG = DCI.DAG; 10928 SDLoc dl(N); 10929 SDValue Op(N, 0); 10930 10931 SDValue FirstOperand(Op.getOperand(0)); 10932 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD && 10933 (FirstOperand.getValueType() == MVT::i8 || 10934 FirstOperand.getValueType() == MVT::i16); 10935 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) { 10936 bool Signed = N->getOpcode() == ISD::SINT_TO_FP; 10937 bool DstDouble = Op.getValueType() == MVT::f64; 10938 unsigned ConvOp = Signed ? 10939 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) : 10940 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS); 10941 SDValue WidthConst = 10942 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2, 10943 dl, false); 10944 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode()); 10945 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst }; 10946 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl, 10947 DAG.getVTList(MVT::f64, MVT::Other), 10948 Ops, MVT::i8, LDN->getMemOperand()); 10949 10950 // For signed conversion, we need to sign-extend the value in the VSR 10951 if (Signed) { 10952 SDValue ExtOps[] = { Ld, WidthConst }; 10953 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps); 10954 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext); 10955 } else 10956 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld); 10957 } 10958 10959 // Don't handle ppc_fp128 here or i1 conversions. 10960 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 10961 return SDValue(); 10962 if (Op.getOperand(0).getValueType() == MVT::i1) 10963 return SDValue(); 10964 10965 // For i32 intermediate values, unfortunately, the conversion functions 10966 // leave the upper 32 bits of the value are undefined. Within the set of 10967 // scalar instructions, we have no method for zero- or sign-extending the 10968 // value. Thus, we cannot handle i32 intermediate values here. 10969 if (Op.getOperand(0).getValueType() == MVT::i32) 10970 return SDValue(); 10971 10972 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 10973 "UINT_TO_FP is supported only with FPCVT"); 10974 10975 // If we have FCFIDS, then use it when converting to single-precision. 10976 // Otherwise, convert to double-precision and then round. 10977 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 10978 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS 10979 : PPCISD::FCFIDS) 10980 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU 10981 : PPCISD::FCFID); 10982 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) 10983 ? MVT::f32 10984 : MVT::f64; 10985 10986 // If we're converting from a float, to an int, and back to a float again, 10987 // then we don't need the store/load pair at all. 10988 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && 10989 Subtarget.hasFPCVT()) || 10990 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { 10991 SDValue Src = Op.getOperand(0).getOperand(0); 10992 if (Src.getValueType() == MVT::f32) { 10993 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 10994 DCI.AddToWorklist(Src.getNode()); 10995 } else if (Src.getValueType() != MVT::f64) { 10996 // Make sure that we don't pick up a ppc_fp128 source value. 10997 return SDValue(); 10998 } 10999 11000 unsigned FCTOp = 11001 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 11002 PPCISD::FCTIDUZ; 11003 11004 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); 11005 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); 11006 11007 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { 11008 FP = DAG.getNode(ISD::FP_ROUND, dl, 11009 MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); 11010 DCI.AddToWorklist(FP.getNode()); 11011 } 11012 11013 return FP; 11014 } 11015 11016 return SDValue(); 11017 } 11018 11019 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for 11020 // builtins) into loads with swaps. 11021 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, 11022 DAGCombinerInfo &DCI) const { 11023 SelectionDAG &DAG = DCI.DAG; 11024 SDLoc dl(N); 11025 SDValue Chain; 11026 SDValue Base; 11027 MachineMemOperand *MMO; 11028 11029 switch (N->getOpcode()) { 11030 default: 11031 llvm_unreachable("Unexpected opcode for little endian VSX load"); 11032 case ISD::LOAD: { 11033 LoadSDNode *LD = cast<LoadSDNode>(N); 11034 Chain = LD->getChain(); 11035 Base = LD->getBasePtr(); 11036 MMO = LD->getMemOperand(); 11037 // If the MMO suggests this isn't a load of a full vector, leave 11038 // things alone. For a built-in, we have to make the change for 11039 // correctness, so if there is a size problem that will be a bug. 11040 if (MMO->getSize() < 16) 11041 return SDValue(); 11042 break; 11043 } 11044 case ISD::INTRINSIC_W_CHAIN: { 11045 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 11046 Chain = Intrin->getChain(); 11047 // Similarly to the store case below, Intrin->getBasePtr() doesn't get 11048 // us what we want. Get operand 2 instead. 11049 Base = Intrin->getOperand(2); 11050 MMO = Intrin->getMemOperand(); 11051 break; 11052 } 11053 } 11054 11055 MVT VecTy = N->getValueType(0).getSimpleVT(); 11056 SDValue LoadOps[] = { Chain, Base }; 11057 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, 11058 DAG.getVTList(MVT::v2f64, MVT::Other), 11059 LoadOps, MVT::v2f64, MMO); 11060 11061 DCI.AddToWorklist(Load.getNode()); 11062 Chain = Load.getValue(1); 11063 SDValue Swap = DAG.getNode( 11064 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); 11065 DCI.AddToWorklist(Swap.getNode()); 11066 11067 // Add a bitcast if the resulting load type doesn't match v2f64. 11068 if (VecTy != MVT::v2f64) { 11069 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); 11070 DCI.AddToWorklist(N.getNode()); 11071 // Package {bitcast value, swap's chain} to match Load's shape. 11072 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), 11073 N, Swap.getValue(1)); 11074 } 11075 11076 return Swap; 11077 } 11078 11079 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for 11080 // builtins) into stores with swaps. 11081 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, 11082 DAGCombinerInfo &DCI) const { 11083 SelectionDAG &DAG = DCI.DAG; 11084 SDLoc dl(N); 11085 SDValue Chain; 11086 SDValue Base; 11087 unsigned SrcOpnd; 11088 MachineMemOperand *MMO; 11089 11090 switch (N->getOpcode()) { 11091 default: 11092 llvm_unreachable("Unexpected opcode for little endian VSX store"); 11093 case ISD::STORE: { 11094 StoreSDNode *ST = cast<StoreSDNode>(N); 11095 Chain = ST->getChain(); 11096 Base = ST->getBasePtr(); 11097 MMO = ST->getMemOperand(); 11098 SrcOpnd = 1; 11099 // If the MMO suggests this isn't a store of a full vector, leave 11100 // things alone. For a built-in, we have to make the change for 11101 // correctness, so if there is a size problem that will be a bug. 11102 if (MMO->getSize() < 16) 11103 return SDValue(); 11104 break; 11105 } 11106 case ISD::INTRINSIC_VOID: { 11107 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 11108 Chain = Intrin->getChain(); 11109 // Intrin->getBasePtr() oddly does not get what we want. 11110 Base = Intrin->getOperand(3); 11111 MMO = Intrin->getMemOperand(); 11112 SrcOpnd = 2; 11113 break; 11114 } 11115 } 11116 11117 SDValue Src = N->getOperand(SrcOpnd); 11118 MVT VecTy = Src.getValueType().getSimpleVT(); 11119 11120 // All stores are done as v2f64 and possible bit cast. 11121 if (VecTy != MVT::v2f64) { 11122 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); 11123 DCI.AddToWorklist(Src.getNode()); 11124 } 11125 11126 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 11127 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); 11128 DCI.AddToWorklist(Swap.getNode()); 11129 Chain = Swap.getValue(1); 11130 SDValue StoreOps[] = { Chain, Swap, Base }; 11131 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, 11132 DAG.getVTList(MVT::Other), 11133 StoreOps, VecTy, MMO); 11134 DCI.AddToWorklist(Store.getNode()); 11135 return Store; 11136 } 11137 11138 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 11139 DAGCombinerInfo &DCI) const { 11140 SelectionDAG &DAG = DCI.DAG; 11141 SDLoc dl(N); 11142 switch (N->getOpcode()) { 11143 default: break; 11144 case PPCISD::SHL: 11145 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. 11146 return N->getOperand(0); 11147 break; 11148 case PPCISD::SRL: 11149 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. 11150 return N->getOperand(0); 11151 break; 11152 case PPCISD::SRA: 11153 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 11154 if (C->isNullValue() || // 0 >>s V -> 0. 11155 C->isAllOnesValue()) // -1 >>s V -> -1. 11156 return N->getOperand(0); 11157 } 11158 break; 11159 case ISD::SIGN_EXTEND: 11160 case ISD::ZERO_EXTEND: 11161 case ISD::ANY_EXTEND: 11162 return DAGCombineExtBoolTrunc(N, DCI); 11163 case ISD::TRUNCATE: 11164 case ISD::SETCC: 11165 case ISD::SELECT_CC: 11166 return DAGCombineTruncBoolExt(N, DCI); 11167 case ISD::SINT_TO_FP: 11168 case ISD::UINT_TO_FP: 11169 return combineFPToIntToFP(N, DCI); 11170 case ISD::STORE: { 11171 EVT Op1VT = N->getOperand(1).getValueType(); 11172 bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) || 11173 (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16)); 11174 11175 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 11176 if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() && 11177 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 11178 ValidTypeForStoreFltAsInt && 11179 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 11180 SDValue Val = N->getOperand(1).getOperand(0); 11181 if (Val.getValueType() == MVT::f32) { 11182 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 11183 DCI.AddToWorklist(Val.getNode()); 11184 } 11185 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 11186 DCI.AddToWorklist(Val.getNode()); 11187 11188 if (Op1VT == MVT::i32) { 11189 SDValue Ops[] = { 11190 N->getOperand(0), Val, N->getOperand(2), 11191 DAG.getValueType(N->getOperand(1).getValueType()) 11192 }; 11193 11194 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 11195 DAG.getVTList(MVT::Other), Ops, 11196 cast<StoreSDNode>(N)->getMemoryVT(), 11197 cast<StoreSDNode>(N)->getMemOperand()); 11198 } else { 11199 unsigned WidthInBytes = 11200 N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2; 11201 SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false); 11202 11203 SDValue Ops[] = { 11204 N->getOperand(0), Val, N->getOperand(2), WidthConst, 11205 DAG.getValueType(N->getOperand(1).getValueType()) 11206 }; 11207 Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl, 11208 DAG.getVTList(MVT::Other), Ops, 11209 cast<StoreSDNode>(N)->getMemoryVT(), 11210 cast<StoreSDNode>(N)->getMemOperand()); 11211 } 11212 11213 DCI.AddToWorklist(Val.getNode()); 11214 return Val; 11215 } 11216 11217 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 11218 if (cast<StoreSDNode>(N)->isUnindexed() && 11219 N->getOperand(1).getOpcode() == ISD::BSWAP && 11220 N->getOperand(1).getNode()->hasOneUse() && 11221 (N->getOperand(1).getValueType() == MVT::i32 || 11222 N->getOperand(1).getValueType() == MVT::i16 || 11223 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 11224 N->getOperand(1).getValueType() == MVT::i64))) { 11225 SDValue BSwapOp = N->getOperand(1).getOperand(0); 11226 // Do an any-extend to 32-bits if this is a half-word input. 11227 if (BSwapOp.getValueType() == MVT::i16) 11228 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 11229 11230 SDValue Ops[] = { 11231 N->getOperand(0), BSwapOp, N->getOperand(2), 11232 DAG.getValueType(N->getOperand(1).getValueType()) 11233 }; 11234 return 11235 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 11236 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 11237 cast<StoreSDNode>(N)->getMemOperand()); 11238 } 11239 11240 // For little endian, VSX stores require generating xxswapd/lxvd2x. 11241 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. 11242 EVT VT = N->getOperand(1).getValueType(); 11243 if (VT.isSimple()) { 11244 MVT StoreVT = VT.getSimpleVT(); 11245 if (Subtarget.needsSwapsForVSXMemOps() && 11246 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || 11247 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) 11248 return expandVSXStoreForLE(N, DCI); 11249 } 11250 break; 11251 } 11252 case ISD::LOAD: { 11253 LoadSDNode *LD = cast<LoadSDNode>(N); 11254 EVT VT = LD->getValueType(0); 11255 11256 // For little endian, VSX loads require generating lxvd2x/xxswapd. 11257 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. 11258 if (VT.isSimple()) { 11259 MVT LoadVT = VT.getSimpleVT(); 11260 if (Subtarget.needsSwapsForVSXMemOps() && 11261 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || 11262 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) 11263 return expandVSXLoadForLE(N, DCI); 11264 } 11265 11266 // We sometimes end up with a 64-bit integer load, from which we extract 11267 // two single-precision floating-point numbers. This happens with 11268 // std::complex<float>, and other similar structures, because of the way we 11269 // canonicalize structure copies. However, if we lack direct moves, 11270 // then the final bitcasts from the extracted integer values to the 11271 // floating-point numbers turn into store/load pairs. Even with direct moves, 11272 // just loading the two floating-point numbers is likely better. 11273 auto ReplaceTwoFloatLoad = [&]() { 11274 if (VT != MVT::i64) 11275 return false; 11276 11277 if (LD->getExtensionType() != ISD::NON_EXTLOAD || 11278 LD->isVolatile()) 11279 return false; 11280 11281 // We're looking for a sequence like this: 11282 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 11283 // t16: i64 = srl t13, Constant:i32<32> 11284 // t17: i32 = truncate t16 11285 // t18: f32 = bitcast t17 11286 // t19: i32 = truncate t13 11287 // t20: f32 = bitcast t19 11288 11289 if (!LD->hasNUsesOfValue(2, 0)) 11290 return false; 11291 11292 auto UI = LD->use_begin(); 11293 while (UI.getUse().getResNo() != 0) ++UI; 11294 SDNode *Trunc = *UI++; 11295 while (UI.getUse().getResNo() != 0) ++UI; 11296 SDNode *RightShift = *UI; 11297 if (Trunc->getOpcode() != ISD::TRUNCATE) 11298 std::swap(Trunc, RightShift); 11299 11300 if (Trunc->getOpcode() != ISD::TRUNCATE || 11301 Trunc->getValueType(0) != MVT::i32 || 11302 !Trunc->hasOneUse()) 11303 return false; 11304 if (RightShift->getOpcode() != ISD::SRL || 11305 !isa<ConstantSDNode>(RightShift->getOperand(1)) || 11306 RightShift->getConstantOperandVal(1) != 32 || 11307 !RightShift->hasOneUse()) 11308 return false; 11309 11310 SDNode *Trunc2 = *RightShift->use_begin(); 11311 if (Trunc2->getOpcode() != ISD::TRUNCATE || 11312 Trunc2->getValueType(0) != MVT::i32 || 11313 !Trunc2->hasOneUse()) 11314 return false; 11315 11316 SDNode *Bitcast = *Trunc->use_begin(); 11317 SDNode *Bitcast2 = *Trunc2->use_begin(); 11318 11319 if (Bitcast->getOpcode() != ISD::BITCAST || 11320 Bitcast->getValueType(0) != MVT::f32) 11321 return false; 11322 if (Bitcast2->getOpcode() != ISD::BITCAST || 11323 Bitcast2->getValueType(0) != MVT::f32) 11324 return false; 11325 11326 if (Subtarget.isLittleEndian()) 11327 std::swap(Bitcast, Bitcast2); 11328 11329 // Bitcast has the second float (in memory-layout order) and Bitcast2 11330 // has the first one. 11331 11332 SDValue BasePtr = LD->getBasePtr(); 11333 if (LD->isIndexed()) { 11334 assert(LD->getAddressingMode() == ISD::PRE_INC && 11335 "Non-pre-inc AM on PPC?"); 11336 BasePtr = 11337 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, 11338 LD->getOffset()); 11339 } 11340 11341 auto MMOFlags = 11342 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; 11343 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, 11344 LD->getPointerInfo(), LD->getAlignment(), 11345 MMOFlags, LD->getAAInfo()); 11346 SDValue AddPtr = 11347 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), 11348 BasePtr, DAG.getIntPtrConstant(4, dl)); 11349 SDValue FloatLoad2 = DAG.getLoad( 11350 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, 11351 LD->getPointerInfo().getWithOffset(4), 11352 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); 11353 11354 if (LD->isIndexed()) { 11355 // Note that DAGCombine should re-form any pre-increment load(s) from 11356 // what is produced here if that makes sense. 11357 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); 11358 } 11359 11360 DCI.CombineTo(Bitcast2, FloatLoad); 11361 DCI.CombineTo(Bitcast, FloatLoad2); 11362 11363 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), 11364 SDValue(FloatLoad2.getNode(), 1)); 11365 return true; 11366 }; 11367 11368 if (ReplaceTwoFloatLoad()) 11369 return SDValue(N, 0); 11370 11371 EVT MemVT = LD->getMemoryVT(); 11372 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); 11373 unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); 11374 Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); 11375 unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); 11376 if (LD->isUnindexed() && VT.isVector() && 11377 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && 11378 // P8 and later hardware should just use LOAD. 11379 !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || 11380 VT == MVT::v4i32 || VT == MVT::v4f32)) || 11381 (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && 11382 LD->getAlignment() >= ScalarABIAlignment)) && 11383 LD->getAlignment() < ABIAlignment) { 11384 // This is a type-legal unaligned Altivec or QPX load. 11385 SDValue Chain = LD->getChain(); 11386 SDValue Ptr = LD->getBasePtr(); 11387 bool isLittleEndian = Subtarget.isLittleEndian(); 11388 11389 // This implements the loading of unaligned vectors as described in 11390 // the venerable Apple Velocity Engine overview. Specifically: 11391 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 11392 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 11393 // 11394 // The general idea is to expand a sequence of one or more unaligned 11395 // loads into an alignment-based permutation-control instruction (lvsl 11396 // or lvsr), a series of regular vector loads (which always truncate 11397 // their input address to an aligned address), and a series of 11398 // permutations. The results of these permutations are the requested 11399 // loaded values. The trick is that the last "extra" load is not taken 11400 // from the address you might suspect (sizeof(vector) bytes after the 11401 // last requested load), but rather sizeof(vector) - 1 bytes after the 11402 // last requested vector. The point of this is to avoid a page fault if 11403 // the base address happened to be aligned. This works because if the 11404 // base address is aligned, then adding less than a full vector length 11405 // will cause the last vector in the sequence to be (re)loaded. 11406 // Otherwise, the next vector will be fetched as you might suspect was 11407 // necessary. 11408 11409 // We might be able to reuse the permutation generation from 11410 // a different base address offset from this one by an aligned amount. 11411 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 11412 // optimization later. 11413 Intrinsic::ID Intr, IntrLD, IntrPerm; 11414 MVT PermCntlTy, PermTy, LDTy; 11415 if (Subtarget.hasAltivec()) { 11416 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : 11417 Intrinsic::ppc_altivec_lvsl; 11418 IntrLD = Intrinsic::ppc_altivec_lvx; 11419 IntrPerm = Intrinsic::ppc_altivec_vperm; 11420 PermCntlTy = MVT::v16i8; 11421 PermTy = MVT::v4i32; 11422 LDTy = MVT::v4i32; 11423 } else { 11424 Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : 11425 Intrinsic::ppc_qpx_qvlpcls; 11426 IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : 11427 Intrinsic::ppc_qpx_qvlfs; 11428 IntrPerm = Intrinsic::ppc_qpx_qvfperm; 11429 PermCntlTy = MVT::v4f64; 11430 PermTy = MVT::v4f64; 11431 LDTy = MemVT.getSimpleVT(); 11432 } 11433 11434 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); 11435 11436 // Create the new MMO for the new base load. It is like the original MMO, 11437 // but represents an area in memory almost twice the vector size centered 11438 // on the original address. If the address is unaligned, we might start 11439 // reading up to (sizeof(vector)-1) bytes below the address of the 11440 // original unaligned load. 11441 MachineFunction &MF = DAG.getMachineFunction(); 11442 MachineMemOperand *BaseMMO = 11443 MF.getMachineMemOperand(LD->getMemOperand(), 11444 -(long)MemVT.getStoreSize()+1, 11445 2*MemVT.getStoreSize()-1); 11446 11447 // Create the new base load. 11448 SDValue LDXIntID = 11449 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); 11450 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; 11451 SDValue BaseLoad = 11452 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 11453 DAG.getVTList(PermTy, MVT::Other), 11454 BaseLoadOps, LDTy, BaseMMO); 11455 11456 // Note that the value of IncOffset (which is provided to the next 11457 // load's pointer info offset value, and thus used to calculate the 11458 // alignment), and the value of IncValue (which is actually used to 11459 // increment the pointer value) are different! This is because we 11460 // require the next load to appear to be aligned, even though it 11461 // is actually offset from the base pointer by a lesser amount. 11462 int IncOffset = VT.getSizeInBits() / 8; 11463 int IncValue = IncOffset; 11464 11465 // Walk (both up and down) the chain looking for another load at the real 11466 // (aligned) offset (the alignment of the other load does not matter in 11467 // this case). If found, then do not use the offset reduction trick, as 11468 // that will prevent the loads from being later combined (as they would 11469 // otherwise be duplicates). 11470 if (!findConsecutiveLoad(LD, DAG)) 11471 --IncValue; 11472 11473 SDValue Increment = 11474 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); 11475 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 11476 11477 MachineMemOperand *ExtraMMO = 11478 MF.getMachineMemOperand(LD->getMemOperand(), 11479 1, 2*MemVT.getStoreSize()-1); 11480 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; 11481 SDValue ExtraLoad = 11482 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 11483 DAG.getVTList(PermTy, MVT::Other), 11484 ExtraLoadOps, LDTy, ExtraMMO); 11485 11486 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 11487 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 11488 11489 // Because vperm has a big-endian bias, we must reverse the order 11490 // of the input vectors and complement the permute control vector 11491 // when generating little endian code. We have already handled the 11492 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 11493 // and ExtraLoad here. 11494 SDValue Perm; 11495 if (isLittleEndian) 11496 Perm = BuildIntrinsicOp(IntrPerm, 11497 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 11498 else 11499 Perm = BuildIntrinsicOp(IntrPerm, 11500 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 11501 11502 if (VT != PermTy) 11503 Perm = Subtarget.hasAltivec() ? 11504 DAG.getNode(ISD::BITCAST, dl, VT, Perm) : 11505 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX 11506 DAG.getTargetConstant(1, dl, MVT::i64)); 11507 // second argument is 1 because this rounding 11508 // is always exact. 11509 11510 // The output of the permutation is our loaded result, the TokenFactor is 11511 // our new chain. 11512 DCI.CombineTo(N, Perm, TF); 11513 return SDValue(N, 0); 11514 } 11515 } 11516 break; 11517 case ISD::INTRINSIC_WO_CHAIN: { 11518 bool isLittleEndian = Subtarget.isLittleEndian(); 11519 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 11520 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr 11521 : Intrinsic::ppc_altivec_lvsl); 11522 if ((IID == Intr || 11523 IID == Intrinsic::ppc_qpx_qvlpcld || 11524 IID == Intrinsic::ppc_qpx_qvlpcls) && 11525 N->getOperand(1)->getOpcode() == ISD::ADD) { 11526 SDValue Add = N->getOperand(1); 11527 11528 int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? 11529 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; 11530 11531 if (DAG.MaskedValueIsZero(Add->getOperand(1), 11532 APInt::getAllOnesValue(Bits /* alignment */) 11533 .zext(Add.getScalarValueSizeInBits()))) { 11534 SDNode *BasePtr = Add->getOperand(0).getNode(); 11535 for (SDNode::use_iterator UI = BasePtr->use_begin(), 11536 UE = BasePtr->use_end(); 11537 UI != UE; ++UI) { 11538 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 11539 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) { 11540 // We've found another LVSL/LVSR, and this address is an aligned 11541 // multiple of that one. The results will be the same, so use the 11542 // one we've just found instead. 11543 11544 return SDValue(*UI, 0); 11545 } 11546 } 11547 } 11548 11549 if (isa<ConstantSDNode>(Add->getOperand(1))) { 11550 SDNode *BasePtr = Add->getOperand(0).getNode(); 11551 for (SDNode::use_iterator UI = BasePtr->use_begin(), 11552 UE = BasePtr->use_end(); UI != UE; ++UI) { 11553 if (UI->getOpcode() == ISD::ADD && 11554 isa<ConstantSDNode>(UI->getOperand(1)) && 11555 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() - 11556 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) % 11557 (1ULL << Bits) == 0) { 11558 SDNode *OtherAdd = *UI; 11559 for (SDNode::use_iterator VI = OtherAdd->use_begin(), 11560 VE = OtherAdd->use_end(); VI != VE; ++VI) { 11561 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 11562 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) { 11563 return SDValue(*VI, 0); 11564 } 11565 } 11566 } 11567 } 11568 } 11569 } 11570 } 11571 11572 break; 11573 case ISD::INTRINSIC_W_CHAIN: { 11574 // For little endian, VSX loads require generating lxvd2x/xxswapd. 11575 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. 11576 if (Subtarget.needsSwapsForVSXMemOps()) { 11577 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 11578 default: 11579 break; 11580 case Intrinsic::ppc_vsx_lxvw4x: 11581 case Intrinsic::ppc_vsx_lxvd2x: 11582 return expandVSXLoadForLE(N, DCI); 11583 } 11584 } 11585 break; 11586 } 11587 case ISD::INTRINSIC_VOID: { 11588 // For little endian, VSX stores require generating xxswapd/stxvd2x. 11589 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. 11590 if (Subtarget.needsSwapsForVSXMemOps()) { 11591 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 11592 default: 11593 break; 11594 case Intrinsic::ppc_vsx_stxvw4x: 11595 case Intrinsic::ppc_vsx_stxvd2x: 11596 return expandVSXStoreForLE(N, DCI); 11597 } 11598 } 11599 break; 11600 } 11601 case ISD::BSWAP: 11602 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 11603 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 11604 N->getOperand(0).hasOneUse() && 11605 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 11606 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && 11607 N->getValueType(0) == MVT::i64))) { 11608 SDValue Load = N->getOperand(0); 11609 LoadSDNode *LD = cast<LoadSDNode>(Load); 11610 // Create the byte-swapping load. 11611 SDValue Ops[] = { 11612 LD->getChain(), // Chain 11613 LD->getBasePtr(), // Ptr 11614 DAG.getValueType(N->getValueType(0)) // VT 11615 }; 11616 SDValue BSLoad = 11617 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 11618 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 11619 MVT::i64 : MVT::i32, MVT::Other), 11620 Ops, LD->getMemoryVT(), LD->getMemOperand()); 11621 11622 // If this is an i16 load, insert the truncate. 11623 SDValue ResVal = BSLoad; 11624 if (N->getValueType(0) == MVT::i16) 11625 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 11626 11627 // First, combine the bswap away. This makes the value produced by the 11628 // load dead. 11629 DCI.CombineTo(N, ResVal); 11630 11631 // Next, combine the load away, we give it a bogus result value but a real 11632 // chain result. The result value is dead because the bswap is dead. 11633 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 11634 11635 // Return N so it doesn't get rechecked! 11636 return SDValue(N, 0); 11637 } 11638 11639 break; 11640 case PPCISD::VCMP: { 11641 // If a VCMPo node already exists with exactly the same operands as this 11642 // node, use its result instead of this node (VCMPo computes both a CR6 and 11643 // a normal output). 11644 // 11645 if (!N->getOperand(0).hasOneUse() && 11646 !N->getOperand(1).hasOneUse() && 11647 !N->getOperand(2).hasOneUse()) { 11648 11649 // Scan all of the users of the LHS, looking for VCMPo's that match. 11650 SDNode *VCMPoNode = nullptr; 11651 11652 SDNode *LHSN = N->getOperand(0).getNode(); 11653 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 11654 UI != E; ++UI) 11655 if (UI->getOpcode() == PPCISD::VCMPo && 11656 UI->getOperand(1) == N->getOperand(1) && 11657 UI->getOperand(2) == N->getOperand(2) && 11658 UI->getOperand(0) == N->getOperand(0)) { 11659 VCMPoNode = *UI; 11660 break; 11661 } 11662 11663 // If there is no VCMPo node, or if the flag value has a single use, don't 11664 // transform this. 11665 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 11666 break; 11667 11668 // Look at the (necessarily single) use of the flag value. If it has a 11669 // chain, this transformation is more complex. Note that multiple things 11670 // could use the value result, which we should ignore. 11671 SDNode *FlagUser = nullptr; 11672 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 11673 FlagUser == nullptr; ++UI) { 11674 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 11675 SDNode *User = *UI; 11676 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 11677 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 11678 FlagUser = User; 11679 break; 11680 } 11681 } 11682 } 11683 11684 // If the user is a MFOCRF instruction, we know this is safe. 11685 // Otherwise we give up for right now. 11686 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 11687 return SDValue(VCMPoNode, 0); 11688 } 11689 break; 11690 } 11691 case ISD::BRCOND: { 11692 SDValue Cond = N->getOperand(1); 11693 SDValue Target = N->getOperand(2); 11694 11695 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 11696 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 11697 Intrinsic::ppc_is_decremented_ctr_nonzero) { 11698 11699 // We now need to make the intrinsic dead (it cannot be instruction 11700 // selected). 11701 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 11702 assert(Cond.getNode()->hasOneUse() && 11703 "Counter decrement has more than one use"); 11704 11705 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 11706 N->getOperand(0), Target); 11707 } 11708 } 11709 break; 11710 case ISD::BR_CC: { 11711 // If this is a branch on an altivec predicate comparison, lower this so 11712 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 11713 // lowering is done pre-legalize, because the legalizer lowers the predicate 11714 // compare down to code that is difficult to reassemble. 11715 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 11716 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 11717 11718 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 11719 // value. If so, pass-through the AND to get to the intrinsic. 11720 if (LHS.getOpcode() == ISD::AND && 11721 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 11722 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 11723 Intrinsic::ppc_is_decremented_ctr_nonzero && 11724 isa<ConstantSDNode>(LHS.getOperand(1)) && 11725 !isNullConstant(LHS.getOperand(1))) 11726 LHS = LHS.getOperand(0); 11727 11728 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 11729 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 11730 Intrinsic::ppc_is_decremented_ctr_nonzero && 11731 isa<ConstantSDNode>(RHS)) { 11732 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 11733 "Counter decrement comparison is not EQ or NE"); 11734 11735 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 11736 bool isBDNZ = (CC == ISD::SETEQ && Val) || 11737 (CC == ISD::SETNE && !Val); 11738 11739 // We now need to make the intrinsic dead (it cannot be instruction 11740 // selected). 11741 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 11742 assert(LHS.getNode()->hasOneUse() && 11743 "Counter decrement has more than one use"); 11744 11745 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 11746 N->getOperand(0), N->getOperand(4)); 11747 } 11748 11749 int CompareOpc; 11750 bool isDot; 11751 11752 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 11753 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 11754 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { 11755 assert(isDot && "Can't compare against a vector result!"); 11756 11757 // If this is a comparison against something other than 0/1, then we know 11758 // that the condition is never/always true. 11759 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 11760 if (Val != 0 && Val != 1) { 11761 if (CC == ISD::SETEQ) // Cond never true, remove branch. 11762 return N->getOperand(0); 11763 // Always !=, turn it into an unconditional branch. 11764 return DAG.getNode(ISD::BR, dl, MVT::Other, 11765 N->getOperand(0), N->getOperand(4)); 11766 } 11767 11768 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 11769 11770 // Create the PPCISD altivec 'dot' comparison node. 11771 SDValue Ops[] = { 11772 LHS.getOperand(2), // LHS of compare 11773 LHS.getOperand(3), // RHS of compare 11774 DAG.getConstant(CompareOpc, dl, MVT::i32) 11775 }; 11776 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 11777 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 11778 11779 // Unpack the result based on how the target uses it. 11780 PPC::Predicate CompOpc; 11781 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 11782 default: // Can't happen, don't crash on invalid number though. 11783 case 0: // Branch on the value of the EQ bit of CR6. 11784 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 11785 break; 11786 case 1: // Branch on the inverted value of the EQ bit of CR6. 11787 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 11788 break; 11789 case 2: // Branch on the value of the LT bit of CR6. 11790 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 11791 break; 11792 case 3: // Branch on the inverted value of the LT bit of CR6. 11793 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 11794 break; 11795 } 11796 11797 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 11798 DAG.getConstant(CompOpc, dl, MVT::i32), 11799 DAG.getRegister(PPC::CR6, MVT::i32), 11800 N->getOperand(4), CompNode.getValue(1)); 11801 } 11802 break; 11803 } 11804 case ISD::BUILD_VECTOR: 11805 return DAGCombineBuildVector(N, DCI); 11806 } 11807 11808 return SDValue(); 11809 } 11810 11811 SDValue 11812 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 11813 SelectionDAG &DAG, 11814 std::vector<SDNode *> *Created) const { 11815 // fold (sdiv X, pow2) 11816 EVT VT = N->getValueType(0); 11817 if (VT == MVT::i64 && !Subtarget.isPPC64()) 11818 return SDValue(); 11819 if ((VT != MVT::i32 && VT != MVT::i64) || 11820 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) 11821 return SDValue(); 11822 11823 SDLoc DL(N); 11824 SDValue N0 = N->getOperand(0); 11825 11826 bool IsNegPow2 = (-Divisor).isPowerOf2(); 11827 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); 11828 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); 11829 11830 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); 11831 if (Created) 11832 Created->push_back(Op.getNode()); 11833 11834 if (IsNegPow2) { 11835 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); 11836 if (Created) 11837 Created->push_back(Op.getNode()); 11838 } 11839 11840 return Op; 11841 } 11842 11843 //===----------------------------------------------------------------------===// 11844 // Inline Assembly Support 11845 //===----------------------------------------------------------------------===// 11846 11847 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 11848 APInt &KnownZero, 11849 APInt &KnownOne, 11850 const SelectionDAG &DAG, 11851 unsigned Depth) const { 11852 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 11853 switch (Op.getOpcode()) { 11854 default: break; 11855 case PPCISD::LBRX: { 11856 // lhbrx is known to have the top bits cleared out. 11857 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 11858 KnownZero = 0xFFFF0000; 11859 break; 11860 } 11861 case ISD::INTRINSIC_WO_CHAIN: { 11862 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 11863 default: break; 11864 case Intrinsic::ppc_altivec_vcmpbfp_p: 11865 case Intrinsic::ppc_altivec_vcmpeqfp_p: 11866 case Intrinsic::ppc_altivec_vcmpequb_p: 11867 case Intrinsic::ppc_altivec_vcmpequh_p: 11868 case Intrinsic::ppc_altivec_vcmpequw_p: 11869 case Intrinsic::ppc_altivec_vcmpequd_p: 11870 case Intrinsic::ppc_altivec_vcmpgefp_p: 11871 case Intrinsic::ppc_altivec_vcmpgtfp_p: 11872 case Intrinsic::ppc_altivec_vcmpgtsb_p: 11873 case Intrinsic::ppc_altivec_vcmpgtsh_p: 11874 case Intrinsic::ppc_altivec_vcmpgtsw_p: 11875 case Intrinsic::ppc_altivec_vcmpgtsd_p: 11876 case Intrinsic::ppc_altivec_vcmpgtub_p: 11877 case Intrinsic::ppc_altivec_vcmpgtuh_p: 11878 case Intrinsic::ppc_altivec_vcmpgtuw_p: 11879 case Intrinsic::ppc_altivec_vcmpgtud_p: 11880 KnownZero = ~1U; // All bits but the low one are known to be zero. 11881 break; 11882 } 11883 } 11884 } 11885 } 11886 11887 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { 11888 switch (Subtarget.getDarwinDirective()) { 11889 default: break; 11890 case PPC::DIR_970: 11891 case PPC::DIR_PWR4: 11892 case PPC::DIR_PWR5: 11893 case PPC::DIR_PWR5X: 11894 case PPC::DIR_PWR6: 11895 case PPC::DIR_PWR6X: 11896 case PPC::DIR_PWR7: 11897 case PPC::DIR_PWR8: 11898 case PPC::DIR_PWR9: { 11899 if (!ML) 11900 break; 11901 11902 const PPCInstrInfo *TII = Subtarget.getInstrInfo(); 11903 11904 // For small loops (between 5 and 8 instructions), align to a 32-byte 11905 // boundary so that the entire loop fits in one instruction-cache line. 11906 uint64_t LoopSize = 0; 11907 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) 11908 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { 11909 LoopSize += TII->getInstSizeInBytes(*J); 11910 if (LoopSize > 32) 11911 break; 11912 } 11913 11914 if (LoopSize > 16 && LoopSize <= 32) 11915 return 5; 11916 11917 break; 11918 } 11919 } 11920 11921 return TargetLowering::getPrefLoopAlignment(ML); 11922 } 11923 11924 /// getConstraintType - Given a constraint, return the type of 11925 /// constraint it is for this target. 11926 PPCTargetLowering::ConstraintType 11927 PPCTargetLowering::getConstraintType(StringRef Constraint) const { 11928 if (Constraint.size() == 1) { 11929 switch (Constraint[0]) { 11930 default: break; 11931 case 'b': 11932 case 'r': 11933 case 'f': 11934 case 'd': 11935 case 'v': 11936 case 'y': 11937 return C_RegisterClass; 11938 case 'Z': 11939 // FIXME: While Z does indicate a memory constraint, it specifically 11940 // indicates an r+r address (used in conjunction with the 'y' modifier 11941 // in the replacement string). Currently, we're forcing the base 11942 // register to be r0 in the asm printer (which is interpreted as zero) 11943 // and forming the complete address in the second register. This is 11944 // suboptimal. 11945 return C_Memory; 11946 } 11947 } else if (Constraint == "wc") { // individual CR bits. 11948 return C_RegisterClass; 11949 } else if (Constraint == "wa" || Constraint == "wd" || 11950 Constraint == "wf" || Constraint == "ws") { 11951 return C_RegisterClass; // VSX registers. 11952 } 11953 return TargetLowering::getConstraintType(Constraint); 11954 } 11955 11956 /// Examine constraint type and operand type and determine a weight value. 11957 /// This object must already have been set up with the operand type 11958 /// and the current alternative constraint selected. 11959 TargetLowering::ConstraintWeight 11960 PPCTargetLowering::getSingleConstraintMatchWeight( 11961 AsmOperandInfo &info, const char *constraint) const { 11962 ConstraintWeight weight = CW_Invalid; 11963 Value *CallOperandVal = info.CallOperandVal; 11964 // If we don't have a value, we can't do a match, 11965 // but allow it at the lowest weight. 11966 if (!CallOperandVal) 11967 return CW_Default; 11968 Type *type = CallOperandVal->getType(); 11969 11970 // Look at the constraint type. 11971 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 11972 return CW_Register; // an individual CR bit. 11973 else if ((StringRef(constraint) == "wa" || 11974 StringRef(constraint) == "wd" || 11975 StringRef(constraint) == "wf") && 11976 type->isVectorTy()) 11977 return CW_Register; 11978 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 11979 return CW_Register; 11980 11981 switch (*constraint) { 11982 default: 11983 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 11984 break; 11985 case 'b': 11986 if (type->isIntegerTy()) 11987 weight = CW_Register; 11988 break; 11989 case 'f': 11990 if (type->isFloatTy()) 11991 weight = CW_Register; 11992 break; 11993 case 'd': 11994 if (type->isDoubleTy()) 11995 weight = CW_Register; 11996 break; 11997 case 'v': 11998 if (type->isVectorTy()) 11999 weight = CW_Register; 12000 break; 12001 case 'y': 12002 weight = CW_Register; 12003 break; 12004 case 'Z': 12005 weight = CW_Memory; 12006 break; 12007 } 12008 return weight; 12009 } 12010 12011 std::pair<unsigned, const TargetRegisterClass *> 12012 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 12013 StringRef Constraint, 12014 MVT VT) const { 12015 if (Constraint.size() == 1) { 12016 // GCC RS6000 Constraint Letters 12017 switch (Constraint[0]) { 12018 case 'b': // R1-R31 12019 if (VT == MVT::i64 && Subtarget.isPPC64()) 12020 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 12021 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 12022 case 'r': // R0-R31 12023 if (VT == MVT::i64 && Subtarget.isPPC64()) 12024 return std::make_pair(0U, &PPC::G8RCRegClass); 12025 return std::make_pair(0U, &PPC::GPRCRegClass); 12026 // 'd' and 'f' constraints are both defined to be "the floating point 12027 // registers", where one is for 32-bit and the other for 64-bit. We don't 12028 // really care overly much here so just give them all the same reg classes. 12029 case 'd': 12030 case 'f': 12031 if (VT == MVT::f32 || VT == MVT::i32) 12032 return std::make_pair(0U, &PPC::F4RCRegClass); 12033 if (VT == MVT::f64 || VT == MVT::i64) 12034 return std::make_pair(0U, &PPC::F8RCRegClass); 12035 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 12036 return std::make_pair(0U, &PPC::QFRCRegClass); 12037 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 12038 return std::make_pair(0U, &PPC::QSRCRegClass); 12039 break; 12040 case 'v': 12041 if (VT == MVT::v4f64 && Subtarget.hasQPX()) 12042 return std::make_pair(0U, &PPC::QFRCRegClass); 12043 if (VT == MVT::v4f32 && Subtarget.hasQPX()) 12044 return std::make_pair(0U, &PPC::QSRCRegClass); 12045 if (Subtarget.hasAltivec()) 12046 return std::make_pair(0U, &PPC::VRRCRegClass); 12047 case 'y': // crrc 12048 return std::make_pair(0U, &PPC::CRRCRegClass); 12049 } 12050 } else if (Constraint == "wc" && Subtarget.useCRBits()) { 12051 // An individual CR bit. 12052 return std::make_pair(0U, &PPC::CRBITRCRegClass); 12053 } else if ((Constraint == "wa" || Constraint == "wd" || 12054 Constraint == "wf") && Subtarget.hasVSX()) { 12055 return std::make_pair(0U, &PPC::VSRCRegClass); 12056 } else if (Constraint == "ws" && Subtarget.hasVSX()) { 12057 if (VT == MVT::f32 && Subtarget.hasP8Vector()) 12058 return std::make_pair(0U, &PPC::VSSRCRegClass); 12059 else 12060 return std::make_pair(0U, &PPC::VSFRCRegClass); 12061 } 12062 12063 std::pair<unsigned, const TargetRegisterClass *> R = 12064 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 12065 12066 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 12067 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 12068 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 12069 // register. 12070 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 12071 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 12072 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 12073 PPC::GPRCRegClass.contains(R.first)) 12074 return std::make_pair(TRI->getMatchingSuperReg(R.first, 12075 PPC::sub_32, &PPC::G8RCRegClass), 12076 &PPC::G8RCRegClass); 12077 12078 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 12079 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 12080 R.first = PPC::CR0; 12081 R.second = &PPC::CRRCRegClass; 12082 } 12083 12084 return R; 12085 } 12086 12087 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 12088 /// vector. If it is invalid, don't add anything to Ops. 12089 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 12090 std::string &Constraint, 12091 std::vector<SDValue>&Ops, 12092 SelectionDAG &DAG) const { 12093 SDValue Result; 12094 12095 // Only support length 1 constraints. 12096 if (Constraint.length() > 1) return; 12097 12098 char Letter = Constraint[0]; 12099 switch (Letter) { 12100 default: break; 12101 case 'I': 12102 case 'J': 12103 case 'K': 12104 case 'L': 12105 case 'M': 12106 case 'N': 12107 case 'O': 12108 case 'P': { 12109 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 12110 if (!CST) return; // Must be an immediate to match. 12111 SDLoc dl(Op); 12112 int64_t Value = CST->getSExtValue(); 12113 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 12114 // numbers are printed as such. 12115 switch (Letter) { 12116 default: llvm_unreachable("Unknown constraint letter!"); 12117 case 'I': // "I" is a signed 16-bit constant. 12118 if (isInt<16>(Value)) 12119 Result = DAG.getTargetConstant(Value, dl, TCVT); 12120 break; 12121 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 12122 if (isShiftedUInt<16, 16>(Value)) 12123 Result = DAG.getTargetConstant(Value, dl, TCVT); 12124 break; 12125 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 12126 if (isShiftedInt<16, 16>(Value)) 12127 Result = DAG.getTargetConstant(Value, dl, TCVT); 12128 break; 12129 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 12130 if (isUInt<16>(Value)) 12131 Result = DAG.getTargetConstant(Value, dl, TCVT); 12132 break; 12133 case 'M': // "M" is a constant that is greater than 31. 12134 if (Value > 31) 12135 Result = DAG.getTargetConstant(Value, dl, TCVT); 12136 break; 12137 case 'N': // "N" is a positive constant that is an exact power of two. 12138 if (Value > 0 && isPowerOf2_64(Value)) 12139 Result = DAG.getTargetConstant(Value, dl, TCVT); 12140 break; 12141 case 'O': // "O" is the constant zero. 12142 if (Value == 0) 12143 Result = DAG.getTargetConstant(Value, dl, TCVT); 12144 break; 12145 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 12146 if (isInt<16>(-Value)) 12147 Result = DAG.getTargetConstant(Value, dl, TCVT); 12148 break; 12149 } 12150 break; 12151 } 12152 } 12153 12154 if (Result.getNode()) { 12155 Ops.push_back(Result); 12156 return; 12157 } 12158 12159 // Handle standard constraint letters. 12160 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 12161 } 12162 12163 // isLegalAddressingMode - Return true if the addressing mode represented 12164 // by AM is legal for this target, for a load/store of the specified type. 12165 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, 12166 const AddrMode &AM, Type *Ty, 12167 unsigned AS) const { 12168 // PPC does not allow r+i addressing modes for vectors! 12169 if (Ty->isVectorTy() && AM.BaseOffs != 0) 12170 return false; 12171 12172 // PPC allows a sign-extended 16-bit immediate field. 12173 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 12174 return false; 12175 12176 // No global is ever allowed as a base. 12177 if (AM.BaseGV) 12178 return false; 12179 12180 // PPC only support r+r, 12181 switch (AM.Scale) { 12182 case 0: // "r+i" or just "i", depending on HasBaseReg. 12183 break; 12184 case 1: 12185 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 12186 return false; 12187 // Otherwise we have r+r or r+i. 12188 break; 12189 case 2: 12190 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 12191 return false; 12192 // Allow 2*r as r+r. 12193 break; 12194 default: 12195 // No other scales are supported. 12196 return false; 12197 } 12198 12199 return true; 12200 } 12201 12202 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 12203 SelectionDAG &DAG) const { 12204 MachineFunction &MF = DAG.getMachineFunction(); 12205 MachineFrameInfo &MFI = MF.getFrameInfo(); 12206 MFI.setReturnAddressIsTaken(true); 12207 12208 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 12209 return SDValue(); 12210 12211 SDLoc dl(Op); 12212 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 12213 12214 // Make sure the function does not optimize away the store of the RA to 12215 // the stack. 12216 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 12217 FuncInfo->setLRStoreRequired(); 12218 bool isPPC64 = Subtarget.isPPC64(); 12219 auto PtrVT = getPointerTy(MF.getDataLayout()); 12220 12221 if (Depth > 0) { 12222 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 12223 SDValue Offset = 12224 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, 12225 isPPC64 ? MVT::i64 : MVT::i32); 12226 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 12227 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), 12228 MachinePointerInfo()); 12229 } 12230 12231 // Just load the return address off the stack. 12232 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 12233 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, 12234 MachinePointerInfo()); 12235 } 12236 12237 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 12238 SelectionDAG &DAG) const { 12239 SDLoc dl(Op); 12240 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 12241 12242 MachineFunction &MF = DAG.getMachineFunction(); 12243 MachineFrameInfo &MFI = MF.getFrameInfo(); 12244 MFI.setFrameAddressIsTaken(true); 12245 12246 EVT PtrVT = getPointerTy(MF.getDataLayout()); 12247 bool isPPC64 = PtrVT == MVT::i64; 12248 12249 // Naked functions never have a frame pointer, and so we use r1. For all 12250 // other functions, this decision must be delayed until during PEI. 12251 unsigned FrameReg; 12252 if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) 12253 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 12254 else 12255 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 12256 12257 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 12258 PtrVT); 12259 while (Depth--) 12260 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 12261 FrameAddr, MachinePointerInfo()); 12262 return FrameAddr; 12263 } 12264 12265 // FIXME? Maybe this could be a TableGen attribute on some registers and 12266 // this table could be generated automatically from RegInfo. 12267 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, 12268 SelectionDAG &DAG) const { 12269 bool isPPC64 = Subtarget.isPPC64(); 12270 bool isDarwinABI = Subtarget.isDarwinABI(); 12271 12272 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 12273 (!isPPC64 && VT != MVT::i32)) 12274 report_fatal_error("Invalid register global variable type"); 12275 12276 bool is64Bit = isPPC64 && VT == MVT::i64; 12277 unsigned Reg = StringSwitch<unsigned>(RegName) 12278 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 12279 .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) 12280 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 12281 (is64Bit ? PPC::X13 : PPC::R13)) 12282 .Default(0); 12283 12284 if (Reg) 12285 return Reg; 12286 report_fatal_error("Invalid register name global variable"); 12287 } 12288 12289 bool 12290 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 12291 // The PowerPC target isn't yet aware of offsets. 12292 return false; 12293 } 12294 12295 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 12296 const CallInst &I, 12297 unsigned Intrinsic) const { 12298 12299 switch (Intrinsic) { 12300 case Intrinsic::ppc_qpx_qvlfd: 12301 case Intrinsic::ppc_qpx_qvlfs: 12302 case Intrinsic::ppc_qpx_qvlfcd: 12303 case Intrinsic::ppc_qpx_qvlfcs: 12304 case Intrinsic::ppc_qpx_qvlfiwa: 12305 case Intrinsic::ppc_qpx_qvlfiwz: 12306 case Intrinsic::ppc_altivec_lvx: 12307 case Intrinsic::ppc_altivec_lvxl: 12308 case Intrinsic::ppc_altivec_lvebx: 12309 case Intrinsic::ppc_altivec_lvehx: 12310 case Intrinsic::ppc_altivec_lvewx: 12311 case Intrinsic::ppc_vsx_lxvd2x: 12312 case Intrinsic::ppc_vsx_lxvw4x: { 12313 EVT VT; 12314 switch (Intrinsic) { 12315 case Intrinsic::ppc_altivec_lvebx: 12316 VT = MVT::i8; 12317 break; 12318 case Intrinsic::ppc_altivec_lvehx: 12319 VT = MVT::i16; 12320 break; 12321 case Intrinsic::ppc_altivec_lvewx: 12322 VT = MVT::i32; 12323 break; 12324 case Intrinsic::ppc_vsx_lxvd2x: 12325 VT = MVT::v2f64; 12326 break; 12327 case Intrinsic::ppc_qpx_qvlfd: 12328 VT = MVT::v4f64; 12329 break; 12330 case Intrinsic::ppc_qpx_qvlfs: 12331 VT = MVT::v4f32; 12332 break; 12333 case Intrinsic::ppc_qpx_qvlfcd: 12334 VT = MVT::v2f64; 12335 break; 12336 case Intrinsic::ppc_qpx_qvlfcs: 12337 VT = MVT::v2f32; 12338 break; 12339 default: 12340 VT = MVT::v4i32; 12341 break; 12342 } 12343 12344 Info.opc = ISD::INTRINSIC_W_CHAIN; 12345 Info.memVT = VT; 12346 Info.ptrVal = I.getArgOperand(0); 12347 Info.offset = -VT.getStoreSize()+1; 12348 Info.size = 2*VT.getStoreSize()-1; 12349 Info.align = 1; 12350 Info.vol = false; 12351 Info.readMem = true; 12352 Info.writeMem = false; 12353 return true; 12354 } 12355 case Intrinsic::ppc_qpx_qvlfda: 12356 case Intrinsic::ppc_qpx_qvlfsa: 12357 case Intrinsic::ppc_qpx_qvlfcda: 12358 case Intrinsic::ppc_qpx_qvlfcsa: 12359 case Intrinsic::ppc_qpx_qvlfiwaa: 12360 case Intrinsic::ppc_qpx_qvlfiwza: { 12361 EVT VT; 12362 switch (Intrinsic) { 12363 case Intrinsic::ppc_qpx_qvlfda: 12364 VT = MVT::v4f64; 12365 break; 12366 case Intrinsic::ppc_qpx_qvlfsa: 12367 VT = MVT::v4f32; 12368 break; 12369 case Intrinsic::ppc_qpx_qvlfcda: 12370 VT = MVT::v2f64; 12371 break; 12372 case Intrinsic::ppc_qpx_qvlfcsa: 12373 VT = MVT::v2f32; 12374 break; 12375 default: 12376 VT = MVT::v4i32; 12377 break; 12378 } 12379 12380 Info.opc = ISD::INTRINSIC_W_CHAIN; 12381 Info.memVT = VT; 12382 Info.ptrVal = I.getArgOperand(0); 12383 Info.offset = 0; 12384 Info.size = VT.getStoreSize(); 12385 Info.align = 1; 12386 Info.vol = false; 12387 Info.readMem = true; 12388 Info.writeMem = false; 12389 return true; 12390 } 12391 case Intrinsic::ppc_qpx_qvstfd: 12392 case Intrinsic::ppc_qpx_qvstfs: 12393 case Intrinsic::ppc_qpx_qvstfcd: 12394 case Intrinsic::ppc_qpx_qvstfcs: 12395 case Intrinsic::ppc_qpx_qvstfiw: 12396 case Intrinsic::ppc_altivec_stvx: 12397 case Intrinsic::ppc_altivec_stvxl: 12398 case Intrinsic::ppc_altivec_stvebx: 12399 case Intrinsic::ppc_altivec_stvehx: 12400 case Intrinsic::ppc_altivec_stvewx: 12401 case Intrinsic::ppc_vsx_stxvd2x: 12402 case Intrinsic::ppc_vsx_stxvw4x: { 12403 EVT VT; 12404 switch (Intrinsic) { 12405 case Intrinsic::ppc_altivec_stvebx: 12406 VT = MVT::i8; 12407 break; 12408 case Intrinsic::ppc_altivec_stvehx: 12409 VT = MVT::i16; 12410 break; 12411 case Intrinsic::ppc_altivec_stvewx: 12412 VT = MVT::i32; 12413 break; 12414 case Intrinsic::ppc_vsx_stxvd2x: 12415 VT = MVT::v2f64; 12416 break; 12417 case Intrinsic::ppc_qpx_qvstfd: 12418 VT = MVT::v4f64; 12419 break; 12420 case Intrinsic::ppc_qpx_qvstfs: 12421 VT = MVT::v4f32; 12422 break; 12423 case Intrinsic::ppc_qpx_qvstfcd: 12424 VT = MVT::v2f64; 12425 break; 12426 case Intrinsic::ppc_qpx_qvstfcs: 12427 VT = MVT::v2f32; 12428 break; 12429 default: 12430 VT = MVT::v4i32; 12431 break; 12432 } 12433 12434 Info.opc = ISD::INTRINSIC_VOID; 12435 Info.memVT = VT; 12436 Info.ptrVal = I.getArgOperand(1); 12437 Info.offset = -VT.getStoreSize()+1; 12438 Info.size = 2*VT.getStoreSize()-1; 12439 Info.align = 1; 12440 Info.vol = false; 12441 Info.readMem = false; 12442 Info.writeMem = true; 12443 return true; 12444 } 12445 case Intrinsic::ppc_qpx_qvstfda: 12446 case Intrinsic::ppc_qpx_qvstfsa: 12447 case Intrinsic::ppc_qpx_qvstfcda: 12448 case Intrinsic::ppc_qpx_qvstfcsa: 12449 case Intrinsic::ppc_qpx_qvstfiwa: { 12450 EVT VT; 12451 switch (Intrinsic) { 12452 case Intrinsic::ppc_qpx_qvstfda: 12453 VT = MVT::v4f64; 12454 break; 12455 case Intrinsic::ppc_qpx_qvstfsa: 12456 VT = MVT::v4f32; 12457 break; 12458 case Intrinsic::ppc_qpx_qvstfcda: 12459 VT = MVT::v2f64; 12460 break; 12461 case Intrinsic::ppc_qpx_qvstfcsa: 12462 VT = MVT::v2f32; 12463 break; 12464 default: 12465 VT = MVT::v4i32; 12466 break; 12467 } 12468 12469 Info.opc = ISD::INTRINSIC_VOID; 12470 Info.memVT = VT; 12471 Info.ptrVal = I.getArgOperand(1); 12472 Info.offset = 0; 12473 Info.size = VT.getStoreSize(); 12474 Info.align = 1; 12475 Info.vol = false; 12476 Info.readMem = false; 12477 Info.writeMem = true; 12478 return true; 12479 } 12480 default: 12481 break; 12482 } 12483 12484 return false; 12485 } 12486 12487 /// getOptimalMemOpType - Returns the target specific optimal type for load 12488 /// and store operations as a result of memset, memcpy, and memmove 12489 /// lowering. If DstAlign is zero that means it's safe to destination 12490 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 12491 /// means there isn't a need to check it against alignment requirement, 12492 /// probably because the source does not need to be loaded. If 'IsMemset' is 12493 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 12494 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 12495 /// source is constant so it does not need to be loaded. 12496 /// It returns EVT::Other if the type should be determined using generic 12497 /// target-independent logic. 12498 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 12499 unsigned DstAlign, unsigned SrcAlign, 12500 bool IsMemset, bool ZeroMemset, 12501 bool MemcpyStrSrc, 12502 MachineFunction &MF) const { 12503 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { 12504 const Function *F = MF.getFunction(); 12505 // When expanding a memset, require at least two QPX instructions to cover 12506 // the cost of loading the value to be stored from the constant pool. 12507 if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && 12508 (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && 12509 !F->hasFnAttribute(Attribute::NoImplicitFloat)) { 12510 return MVT::v4f64; 12511 } 12512 12513 // We should use Altivec/VSX loads and stores when available. For unaligned 12514 // addresses, unaligned VSX loads are only fast starting with the P8. 12515 if (Subtarget.hasAltivec() && Size >= 16 && 12516 (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || 12517 ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) 12518 return MVT::v4i32; 12519 } 12520 12521 if (Subtarget.isPPC64()) { 12522 return MVT::i64; 12523 } 12524 12525 return MVT::i32; 12526 } 12527 12528 /// \brief Returns true if it is beneficial to convert a load of a constant 12529 /// to just the constant itself. 12530 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 12531 Type *Ty) const { 12532 assert(Ty->isIntegerTy()); 12533 12534 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 12535 return !(BitSize == 0 || BitSize > 64); 12536 } 12537 12538 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 12539 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 12540 return false; 12541 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 12542 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 12543 return NumBits1 == 64 && NumBits2 == 32; 12544 } 12545 12546 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 12547 if (!VT1.isInteger() || !VT2.isInteger()) 12548 return false; 12549 unsigned NumBits1 = VT1.getSizeInBits(); 12550 unsigned NumBits2 = VT2.getSizeInBits(); 12551 return NumBits1 == 64 && NumBits2 == 32; 12552 } 12553 12554 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 12555 // Generally speaking, zexts are not free, but they are free when they can be 12556 // folded with other operations. 12557 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) { 12558 EVT MemVT = LD->getMemoryVT(); 12559 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || 12560 (Subtarget.isPPC64() && MemVT == MVT::i32)) && 12561 (LD->getExtensionType() == ISD::NON_EXTLOAD || 12562 LD->getExtensionType() == ISD::ZEXTLOAD)) 12563 return true; 12564 } 12565 12566 // FIXME: Add other cases... 12567 // - 32-bit shifts with a zext to i64 12568 // - zext after ctlz, bswap, etc. 12569 // - zext after and by a constant mask 12570 12571 return TargetLowering::isZExtFree(Val, VT2); 12572 } 12573 12574 bool PPCTargetLowering::isFPExtFree(EVT VT) const { 12575 assert(VT.isFloatingPoint()); 12576 return true; 12577 } 12578 12579 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 12580 return isInt<16>(Imm) || isUInt<16>(Imm); 12581 } 12582 12583 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 12584 return isInt<16>(Imm) || isUInt<16>(Imm); 12585 } 12586 12587 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 12588 unsigned, 12589 unsigned, 12590 bool *Fast) const { 12591 if (DisablePPCUnaligned) 12592 return false; 12593 12594 // PowerPC supports unaligned memory access for simple non-vector types. 12595 // Although accessing unaligned addresses is not as efficient as accessing 12596 // aligned addresses, it is generally more efficient than manual expansion, 12597 // and generally only traps for software emulation when crossing page 12598 // boundaries. 12599 12600 if (!VT.isSimple()) 12601 return false; 12602 12603 if (VT.getSimpleVT().isVector()) { 12604 if (Subtarget.hasVSX()) { 12605 if (VT != MVT::v2f64 && VT != MVT::v2i64 && 12606 VT != MVT::v4f32 && VT != MVT::v4i32) 12607 return false; 12608 } else { 12609 return false; 12610 } 12611 } 12612 12613 if (VT == MVT::ppcf128) 12614 return false; 12615 12616 if (Fast) 12617 *Fast = true; 12618 12619 return true; 12620 } 12621 12622 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 12623 VT = VT.getScalarType(); 12624 12625 if (!VT.isSimple()) 12626 return false; 12627 12628 switch (VT.getSimpleVT().SimpleTy) { 12629 case MVT::f32: 12630 case MVT::f64: 12631 return true; 12632 default: 12633 break; 12634 } 12635 12636 return false; 12637 } 12638 12639 const MCPhysReg * 12640 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { 12641 // LR is a callee-save register, but we must treat it as clobbered by any call 12642 // site. Hence we include LR in the scratch registers, which are in turn added 12643 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies 12644 // to CTR, which is used by any indirect call. 12645 static const MCPhysReg ScratchRegs[] = { 12646 PPC::X12, PPC::LR8, PPC::CTR8, 0 12647 }; 12648 12649 return ScratchRegs; 12650 } 12651 12652 unsigned PPCTargetLowering::getExceptionPointerRegister( 12653 const Constant *PersonalityFn) const { 12654 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; 12655 } 12656 12657 unsigned PPCTargetLowering::getExceptionSelectorRegister( 12658 const Constant *PersonalityFn) const { 12659 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; 12660 } 12661 12662 bool 12663 PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 12664 EVT VT , unsigned DefinedValues) const { 12665 if (VT == MVT::v2i64) 12666 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves 12667 12668 if (Subtarget.hasVSX() || Subtarget.hasQPX()) 12669 return true; 12670 12671 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 12672 } 12673 12674 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 12675 if (DisableILPPref || Subtarget.enableMachineScheduler()) 12676 return TargetLowering::getSchedulingPreference(N); 12677 12678 return Sched::ILP; 12679 } 12680 12681 // Create a fast isel object. 12682 FastISel * 12683 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 12684 const TargetLibraryInfo *LibInfo) const { 12685 return PPC::createFastISel(FuncInfo, LibInfo); 12686 } 12687 12688 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { 12689 if (Subtarget.isDarwinABI()) return; 12690 if (!Subtarget.isPPC64()) return; 12691 12692 // Update IsSplitCSR in PPCFunctionInfo 12693 PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>(); 12694 PFI->setIsSplitCSR(true); 12695 } 12696 12697 void PPCTargetLowering::insertCopiesSplitCSR( 12698 MachineBasicBlock *Entry, 12699 const SmallVectorImpl<MachineBasicBlock *> &Exits) const { 12700 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); 12701 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); 12702 if (!IStart) 12703 return; 12704 12705 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 12706 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); 12707 MachineBasicBlock::iterator MBBI = Entry->begin(); 12708 for (const MCPhysReg *I = IStart; *I; ++I) { 12709 const TargetRegisterClass *RC = nullptr; 12710 if (PPC::G8RCRegClass.contains(*I)) 12711 RC = &PPC::G8RCRegClass; 12712 else if (PPC::F8RCRegClass.contains(*I)) 12713 RC = &PPC::F8RCRegClass; 12714 else if (PPC::CRRCRegClass.contains(*I)) 12715 RC = &PPC::CRRCRegClass; 12716 else if (PPC::VRRCRegClass.contains(*I)) 12717 RC = &PPC::VRRCRegClass; 12718 else 12719 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 12720 12721 unsigned NewVR = MRI->createVirtualRegister(RC); 12722 // Create copy from CSR to a virtual register. 12723 // FIXME: this currently does not emit CFI pseudo-instructions, it works 12724 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be 12725 // nounwind. If we want to generalize this later, we may need to emit 12726 // CFI pseudo-instructions. 12727 assert(Entry->getParent()->getFunction()->hasFnAttribute( 12728 Attribute::NoUnwind) && 12729 "Function should be nounwind in insertCopiesSplitCSR!"); 12730 Entry->addLiveIn(*I); 12731 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) 12732 .addReg(*I); 12733 12734 // Insert the copy-back instructions right before the terminator 12735 for (auto *Exit : Exits) 12736 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), 12737 TII->get(TargetOpcode::COPY), *I) 12738 .addReg(NewVR); 12739 } 12740 } 12741 12742 // Override to enable LOAD_STACK_GUARD lowering on Linux. 12743 bool PPCTargetLowering::useLoadStackGuardNode() const { 12744 if (!Subtarget.isTargetLinux()) 12745 return TargetLowering::useLoadStackGuardNode(); 12746 return true; 12747 } 12748 12749 // Override to disable global variable loading on Linux. 12750 void PPCTargetLowering::insertSSPDeclarations(Module &M) const { 12751 if (!Subtarget.isTargetLinux()) 12752 return TargetLowering::insertSSPDeclarations(M); 12753 } 12754 12755 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 12756 12757 if (!VT.isSimple() || !Subtarget.hasVSX()) 12758 return false; 12759 12760 switch(VT.getSimpleVT().SimpleTy) { 12761 default: 12762 // For FP types that are currently not supported by PPC backend, return 12763 // false. Examples: f16, f80. 12764 return false; 12765 case MVT::f32: 12766 case MVT::f64: 12767 case MVT::ppcf128: 12768 return Imm.isPosZero(); 12769 } 12770 } 12771