1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PPCISelLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCISelLowering.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPCMachineFunctionInfo.h" 17 #include "PPCPerfectShuffle.h" 18 #include "PPCTargetMachine.h" 19 #include "PPCTargetObjectFile.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/SelectionDAG.h" 27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28 #include "llvm/IR/CallingConv.h" 29 #include "llvm/IR/Constants.h" 30 #include "llvm/IR/DerivedTypes.h" 31 #include "llvm/IR/Function.h" 32 #include "llvm/IR/Intrinsics.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/MathExtras.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "llvm/Target/TargetOptions.h" 38 using namespace llvm; 39 40 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 41 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 42 43 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 44 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 45 46 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 47 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 48 49 // FIXME: Remove this once the bug has been fixed! 50 extern cl::opt<bool> ANDIGlueBug; 51 52 static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { 53 if (TM.getSubtargetImpl()->isDarwin()) 54 return new TargetLoweringObjectFileMachO(); 55 56 if (TM.getSubtargetImpl()->isSVR4ABI()) 57 return new PPC64LinuxTargetObjectFile(); 58 59 return new TargetLoweringObjectFileELF(); 60 } 61 62 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 63 : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { 64 const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); 65 66 setPow2DivIsCheap(); 67 68 // Use _setjmp/_longjmp instead of setjmp/longjmp. 69 setUseUnderscoreSetJmp(true); 70 setUseUnderscoreLongJmp(true); 71 72 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 73 // arguments are at least 4/8 bytes aligned. 74 bool isPPC64 = Subtarget->isPPC64(); 75 setMinStackArgumentAlignment(isPPC64 ? 8:4); 76 77 // Set up the register classes. 78 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 79 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 80 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 81 82 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 83 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 84 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); 85 86 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 87 88 // PowerPC has pre-inc load and store's. 89 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 90 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 91 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 92 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 93 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 94 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 95 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 96 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 97 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 98 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 99 100 if (Subtarget->useCRBits()) { 101 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 102 103 if (isPPC64 || Subtarget->hasFPCVT()) { 104 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 105 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 106 isPPC64 ? MVT::i64 : MVT::i32); 107 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 108 AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 109 isPPC64 ? MVT::i64 : MVT::i32); 110 } else { 111 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 112 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 113 } 114 115 // PowerPC does not support direct load / store of condition registers 116 setOperationAction(ISD::LOAD, MVT::i1, Custom); 117 setOperationAction(ISD::STORE, MVT::i1, Custom); 118 119 // FIXME: Remove this once the ANDI glue bug is fixed: 120 if (ANDIGlueBug) 121 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 122 123 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 124 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 125 setTruncStoreAction(MVT::i64, MVT::i1, Expand); 126 setTruncStoreAction(MVT::i32, MVT::i1, Expand); 127 setTruncStoreAction(MVT::i16, MVT::i1, Expand); 128 setTruncStoreAction(MVT::i8, MVT::i1, Expand); 129 130 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 131 } 132 133 // This is used in the ppcf128->int sequence. Note it has different semantics 134 // from FP_ROUND: that rounds to nearest, this rounds to zero. 135 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 136 137 // We do not currently implement these libm ops for PowerPC. 138 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 139 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 140 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 141 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 142 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 143 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 144 145 // PowerPC has no SREM/UREM instructions 146 setOperationAction(ISD::SREM, MVT::i32, Expand); 147 setOperationAction(ISD::UREM, MVT::i32, Expand); 148 setOperationAction(ISD::SREM, MVT::i64, Expand); 149 setOperationAction(ISD::UREM, MVT::i64, Expand); 150 151 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 152 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 153 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 154 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 155 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 156 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 157 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 158 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 159 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 160 161 // We don't support sin/cos/sqrt/fmod/pow 162 setOperationAction(ISD::FSIN , MVT::f64, Expand); 163 setOperationAction(ISD::FCOS , MVT::f64, Expand); 164 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 165 setOperationAction(ISD::FREM , MVT::f64, Expand); 166 setOperationAction(ISD::FPOW , MVT::f64, Expand); 167 setOperationAction(ISD::FMA , MVT::f64, Legal); 168 setOperationAction(ISD::FSIN , MVT::f32, Expand); 169 setOperationAction(ISD::FCOS , MVT::f32, Expand); 170 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 171 setOperationAction(ISD::FREM , MVT::f32, Expand); 172 setOperationAction(ISD::FPOW , MVT::f32, Expand); 173 setOperationAction(ISD::FMA , MVT::f32, Legal); 174 175 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 176 177 // If we're enabling GP optimizations, use hardware square root 178 if (!Subtarget->hasFSQRT() && 179 !(TM.Options.UnsafeFPMath && 180 Subtarget->hasFRSQRTE() && Subtarget->hasFRE())) 181 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 182 183 if (!Subtarget->hasFSQRT() && 184 !(TM.Options.UnsafeFPMath && 185 Subtarget->hasFRSQRTES() && Subtarget->hasFRES())) 186 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 187 188 if (Subtarget->hasFCPSGN()) { 189 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 190 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 191 } else { 192 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 193 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 194 } 195 196 if (Subtarget->hasFPRND()) { 197 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 198 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 199 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 200 setOperationAction(ISD::FROUND, MVT::f64, Legal); 201 202 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 203 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 204 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 205 setOperationAction(ISD::FROUND, MVT::f32, Legal); 206 } 207 208 // PowerPC does not have BSWAP, CTPOP or CTTZ 209 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 210 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 211 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 212 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 213 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 214 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 215 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 216 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 217 218 if (Subtarget->hasPOPCNTD()) { 219 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 220 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 221 } else { 222 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 223 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 224 } 225 226 // PowerPC does not have ROTR 227 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 228 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 229 230 if (!Subtarget->useCRBits()) { 231 // PowerPC does not have Select 232 setOperationAction(ISD::SELECT, MVT::i32, Expand); 233 setOperationAction(ISD::SELECT, MVT::i64, Expand); 234 setOperationAction(ISD::SELECT, MVT::f32, Expand); 235 setOperationAction(ISD::SELECT, MVT::f64, Expand); 236 } 237 238 // PowerPC wants to turn select_cc of FP into fsel when possible. 239 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 240 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 241 242 // PowerPC wants to optimize integer setcc a bit 243 if (!Subtarget->useCRBits()) 244 setOperationAction(ISD::SETCC, MVT::i32, Custom); 245 246 // PowerPC does not have BRCOND which requires SetCC 247 if (!Subtarget->useCRBits()) 248 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 249 250 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 251 252 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 253 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 254 255 // PowerPC does not have [U|S]INT_TO_FP 256 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 257 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 258 259 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 260 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 261 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 262 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 263 264 // We cannot sextinreg(i1). Expand to shifts. 265 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 266 267 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 268 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 269 // support continuation, user-level threading, and etc.. As a result, no 270 // other SjLj exception interfaces are implemented and please don't build 271 // your own exception handling based on them. 272 // LLVM/Clang supports zero-cost DWARF exception handling. 273 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 274 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 275 276 // We want to legalize GlobalAddress and ConstantPool nodes into the 277 // appropriate instructions to materialize the address. 278 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 279 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 280 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 282 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 283 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 284 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 285 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 286 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 287 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 288 289 // TRAP is legal. 290 setOperationAction(ISD::TRAP, MVT::Other, Legal); 291 292 // TRAMPOLINE is custom lowered. 293 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 294 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 295 296 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 297 setOperationAction(ISD::VASTART , MVT::Other, Custom); 298 299 if (Subtarget->isSVR4ABI()) { 300 if (isPPC64) { 301 // VAARG always uses double-word chunks, so promote anything smaller. 302 setOperationAction(ISD::VAARG, MVT::i1, Promote); 303 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 304 setOperationAction(ISD::VAARG, MVT::i8, Promote); 305 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 306 setOperationAction(ISD::VAARG, MVT::i16, Promote); 307 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 308 setOperationAction(ISD::VAARG, MVT::i32, Promote); 309 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 310 setOperationAction(ISD::VAARG, MVT::Other, Expand); 311 } else { 312 // VAARG is custom lowered with the 32-bit SVR4 ABI. 313 setOperationAction(ISD::VAARG, MVT::Other, Custom); 314 setOperationAction(ISD::VAARG, MVT::i64, Custom); 315 } 316 } else 317 setOperationAction(ISD::VAARG, MVT::Other, Expand); 318 319 if (Subtarget->isSVR4ABI() && !isPPC64) 320 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 321 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 322 else 323 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 324 325 // Use the default implementation. 326 setOperationAction(ISD::VAEND , MVT::Other, Expand); 327 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 328 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 329 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 330 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 331 332 // We want to custom lower some of our intrinsics. 333 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 334 335 // To handle counter-based loop conditions. 336 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 337 338 // Comparisons that require checking two conditions. 339 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 340 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 341 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 342 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 343 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 344 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 345 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 346 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 347 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 348 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 349 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 350 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 351 352 if (Subtarget->has64BitSupport()) { 353 // They also have instructions for converting between i64 and fp. 354 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 355 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 356 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 357 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 358 // This is just the low 32 bits of a (signed) fp->i64 conversion. 359 // We cannot do this with Promote because i64 is not a legal type. 360 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 361 362 if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64()) 363 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 364 } else { 365 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 366 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 367 } 368 369 // With the instructions enabled under FPCVT, we can do everything. 370 if (PPCSubTarget.hasFPCVT()) { 371 if (Subtarget->has64BitSupport()) { 372 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 373 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 374 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 375 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 376 } 377 378 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 379 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 380 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 381 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 382 } 383 384 if (Subtarget->use64BitRegs()) { 385 // 64-bit PowerPC implementations can support i64 types directly 386 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 387 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 388 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 389 // 64-bit PowerPC wants to expand i128 shifts itself. 390 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 391 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 392 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 393 } else { 394 // 32-bit PowerPC wants to expand i64 shifts itself. 395 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 396 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 397 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 398 } 399 400 if (Subtarget->hasAltivec()) { 401 // First set operation action for all vector types to expand. Then we 402 // will selectively turn on ones that can be effectively codegen'd. 403 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 404 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 405 MVT::SimpleValueType VT = (MVT::SimpleValueType)i; 406 407 // add/sub are legal for all supported vector VT's. 408 setOperationAction(ISD::ADD , VT, Legal); 409 setOperationAction(ISD::SUB , VT, Legal); 410 411 // We promote all shuffles to v16i8. 412 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 413 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 414 415 // We promote all non-typed operations to v4i32. 416 setOperationAction(ISD::AND , VT, Promote); 417 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 418 setOperationAction(ISD::OR , VT, Promote); 419 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 420 setOperationAction(ISD::XOR , VT, Promote); 421 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 422 setOperationAction(ISD::LOAD , VT, Promote); 423 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 424 setOperationAction(ISD::SELECT, VT, Promote); 425 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 426 setOperationAction(ISD::STORE, VT, Promote); 427 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 428 429 // No other operations are legal. 430 setOperationAction(ISD::MUL , VT, Expand); 431 setOperationAction(ISD::SDIV, VT, Expand); 432 setOperationAction(ISD::SREM, VT, Expand); 433 setOperationAction(ISD::UDIV, VT, Expand); 434 setOperationAction(ISD::UREM, VT, Expand); 435 setOperationAction(ISD::FDIV, VT, Expand); 436 setOperationAction(ISD::FREM, VT, Expand); 437 setOperationAction(ISD::FNEG, VT, Expand); 438 setOperationAction(ISD::FSQRT, VT, Expand); 439 setOperationAction(ISD::FLOG, VT, Expand); 440 setOperationAction(ISD::FLOG10, VT, Expand); 441 setOperationAction(ISD::FLOG2, VT, Expand); 442 setOperationAction(ISD::FEXP, VT, Expand); 443 setOperationAction(ISD::FEXP2, VT, Expand); 444 setOperationAction(ISD::FSIN, VT, Expand); 445 setOperationAction(ISD::FCOS, VT, Expand); 446 setOperationAction(ISD::FABS, VT, Expand); 447 setOperationAction(ISD::FPOWI, VT, Expand); 448 setOperationAction(ISD::FFLOOR, VT, Expand); 449 setOperationAction(ISD::FCEIL, VT, Expand); 450 setOperationAction(ISD::FTRUNC, VT, Expand); 451 setOperationAction(ISD::FRINT, VT, Expand); 452 setOperationAction(ISD::FNEARBYINT, VT, Expand); 453 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 454 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 455 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 456 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 457 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 458 setOperationAction(ISD::UDIVREM, VT, Expand); 459 setOperationAction(ISD::SDIVREM, VT, Expand); 460 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 461 setOperationAction(ISD::FPOW, VT, Expand); 462 setOperationAction(ISD::CTPOP, VT, Expand); 463 setOperationAction(ISD::CTLZ, VT, Expand); 464 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 465 setOperationAction(ISD::CTTZ, VT, Expand); 466 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 467 setOperationAction(ISD::VSELECT, VT, Expand); 468 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 469 470 for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 471 j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { 472 MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j; 473 setTruncStoreAction(VT, InnerVT, Expand); 474 } 475 setLoadExtAction(ISD::SEXTLOAD, VT, Expand); 476 setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); 477 setLoadExtAction(ISD::EXTLOAD, VT, Expand); 478 } 479 480 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 481 // with merges, splats, etc. 482 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 483 484 setOperationAction(ISD::AND , MVT::v4i32, Legal); 485 setOperationAction(ISD::OR , MVT::v4i32, Legal); 486 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 487 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 488 setOperationAction(ISD::SELECT, MVT::v4i32, 489 Subtarget->useCRBits() ? Legal : Expand); 490 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 491 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 492 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 493 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 494 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 495 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 496 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 497 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 498 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 499 500 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 501 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 502 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 503 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 504 505 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 506 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 507 508 if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) { 509 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 510 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 511 } 512 513 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 514 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 515 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 516 517 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 518 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 519 520 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 521 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 522 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 523 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 524 525 // Altivec does not contain unordered floating-point compare instructions 526 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 527 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 528 setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); 529 setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); 530 setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); 531 setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); 532 533 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 534 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 535 536 if (Subtarget->hasVSX()) { 537 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 538 539 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 540 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 541 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 542 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 543 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 544 545 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 546 547 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 548 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 549 550 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 551 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 552 553 // Share the Altivec comparison restrictions. 554 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 555 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 556 setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand); 557 setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand); 558 setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand); 559 setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand); 560 561 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 562 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 563 564 addRegisterClass(MVT::f64, &PPC::VSRCRegClass); 565 566 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 567 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 568 } 569 } 570 571 if (Subtarget->has64BitSupport()) { 572 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 573 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); 574 } 575 576 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); 577 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); 578 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 579 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 580 581 setBooleanContents(ZeroOrOneBooleanContent); 582 // Altivec instructions set fields to all zeros or all ones. 583 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 584 585 if (isPPC64) { 586 setStackPointerRegisterToSaveRestore(PPC::X1); 587 setExceptionPointerRegister(PPC::X3); 588 setExceptionSelectorRegister(PPC::X4); 589 } else { 590 setStackPointerRegisterToSaveRestore(PPC::R1); 591 setExceptionPointerRegister(PPC::R3); 592 setExceptionSelectorRegister(PPC::R4); 593 } 594 595 // We have target-specific dag combine patterns for the following nodes: 596 setTargetDAGCombine(ISD::SINT_TO_FP); 597 setTargetDAGCombine(ISD::LOAD); 598 setTargetDAGCombine(ISD::STORE); 599 setTargetDAGCombine(ISD::BR_CC); 600 if (Subtarget->useCRBits()) 601 setTargetDAGCombine(ISD::BRCOND); 602 setTargetDAGCombine(ISD::BSWAP); 603 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 604 605 setTargetDAGCombine(ISD::SIGN_EXTEND); 606 setTargetDAGCombine(ISD::ZERO_EXTEND); 607 setTargetDAGCombine(ISD::ANY_EXTEND); 608 609 if (Subtarget->useCRBits()) { 610 setTargetDAGCombine(ISD::TRUNCATE); 611 setTargetDAGCombine(ISD::SETCC); 612 setTargetDAGCombine(ISD::SELECT_CC); 613 } 614 615 // Use reciprocal estimates. 616 if (TM.Options.UnsafeFPMath) { 617 setTargetDAGCombine(ISD::FDIV); 618 setTargetDAGCombine(ISD::FSQRT); 619 } 620 621 // Darwin long double math library functions have $LDBL128 appended. 622 if (Subtarget->isDarwin()) { 623 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 624 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 625 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 626 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 627 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 628 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 629 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 630 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 631 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 632 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 633 } 634 635 // With 32 condition bits, we don't need to sink (and duplicate) compares 636 // aggressively in CodeGenPrep. 637 if (Subtarget->useCRBits()) 638 setHasMultipleConditionRegisters(); 639 640 setMinFunctionAlignment(2); 641 if (PPCSubTarget.isDarwin()) 642 setPrefFunctionAlignment(4); 643 644 if (isPPC64 && Subtarget->isJITCodeModel()) 645 // Temporary workaround for the inability of PPC64 JIT to handle jump 646 // tables. 647 setSupportJumpTables(false); 648 649 setInsertFencesForAtomic(true); 650 651 if (Subtarget->enableMachineScheduler()) 652 setSchedulingPreference(Sched::Source); 653 else 654 setSchedulingPreference(Sched::Hybrid); 655 656 computeRegisterProperties(); 657 658 // The Freescale cores does better with aggressive inlining of memcpy and 659 // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). 660 if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || 661 Subtarget->getDarwinDirective() == PPC::DIR_E5500) { 662 MaxStoresPerMemset = 32; 663 MaxStoresPerMemsetOptSize = 16; 664 MaxStoresPerMemcpy = 32; 665 MaxStoresPerMemcpyOptSize = 8; 666 MaxStoresPerMemmove = 32; 667 MaxStoresPerMemmoveOptSize = 8; 668 669 setPrefFunctionAlignment(4); 670 } 671 } 672 673 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 674 /// the desired ByVal argument alignment. 675 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 676 unsigned MaxMaxAlign) { 677 if (MaxAlign == MaxMaxAlign) 678 return; 679 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 680 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 681 MaxAlign = 32; 682 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 683 MaxAlign = 16; 684 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 685 unsigned EltAlign = 0; 686 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 687 if (EltAlign > MaxAlign) 688 MaxAlign = EltAlign; 689 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 690 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 691 unsigned EltAlign = 0; 692 getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); 693 if (EltAlign > MaxAlign) 694 MaxAlign = EltAlign; 695 if (MaxAlign == MaxMaxAlign) 696 break; 697 } 698 } 699 } 700 701 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 702 /// function arguments in the caller parameter area. 703 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { 704 // Darwin passes everything on 4 byte boundary. 705 if (PPCSubTarget.isDarwin()) 706 return 4; 707 708 // 16byte and wider vectors are passed on 16byte boundary. 709 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 710 unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4; 711 if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX()) 712 getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16); 713 return Align; 714 } 715 716 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 717 switch (Opcode) { 718 default: return 0; 719 case PPCISD::FSEL: return "PPCISD::FSEL"; 720 case PPCISD::FCFID: return "PPCISD::FCFID"; 721 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 722 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 723 case PPCISD::FRE: return "PPCISD::FRE"; 724 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 725 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 726 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 727 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 728 case PPCISD::VPERM: return "PPCISD::VPERM"; 729 case PPCISD::Hi: return "PPCISD::Hi"; 730 case PPCISD::Lo: return "PPCISD::Lo"; 731 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 732 case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE"; 733 case PPCISD::LOAD: return "PPCISD::LOAD"; 734 case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; 735 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 736 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 737 case PPCISD::SRL: return "PPCISD::SRL"; 738 case PPCISD::SRA: return "PPCISD::SRA"; 739 case PPCISD::SHL: return "PPCISD::SHL"; 740 case PPCISD::CALL: return "PPCISD::CALL"; 741 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 742 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 743 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 744 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 745 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 746 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 747 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 748 case PPCISD::VCMP: return "PPCISD::VCMP"; 749 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 750 case PPCISD::LBRX: return "PPCISD::LBRX"; 751 case PPCISD::STBRX: return "PPCISD::STBRX"; 752 case PPCISD::LARX: return "PPCISD::LARX"; 753 case PPCISD::STCX: return "PPCISD::STCX"; 754 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 755 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 756 case PPCISD::BDZ: return "PPCISD::BDZ"; 757 case PPCISD::MFFS: return "PPCISD::MFFS"; 758 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 759 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 760 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 761 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 762 case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; 763 case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; 764 case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; 765 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 766 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 767 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 768 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 769 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 770 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 771 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 772 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 773 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 774 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 775 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 776 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 777 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 778 case PPCISD::SC: return "PPCISD::SC"; 779 } 780 } 781 782 EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 783 if (!VT.isVector()) 784 return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32; 785 return VT.changeVectorElementTypeToInteger(); 786 } 787 788 //===----------------------------------------------------------------------===// 789 // Node matching predicates, for use by the tblgen matching code. 790 //===----------------------------------------------------------------------===// 791 792 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 793 static bool isFloatingPointZero(SDValue Op) { 794 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 795 return CFP->getValueAPF().isZero(); 796 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 797 // Maybe this has already been legalized into the constant pool? 798 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 799 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 800 return CFP->getValueAPF().isZero(); 801 } 802 return false; 803 } 804 805 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 806 /// true if Op is undef or if it matches the specified value. 807 static bool isConstantOrUndef(int Op, int Val) { 808 return Op < 0 || Op == Val; 809 } 810 811 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 812 /// VPKUHUM instruction. 813 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 814 if (!isUnary) { 815 for (unsigned i = 0; i != 16; ++i) 816 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 817 return false; 818 } else { 819 for (unsigned i = 0; i != 8; ++i) 820 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || 821 !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) 822 return false; 823 } 824 return true; 825 } 826 827 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 828 /// VPKUWUM instruction. 829 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 830 if (!isUnary) { 831 for (unsigned i = 0; i != 16; i += 2) 832 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 833 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 834 return false; 835 } else { 836 for (unsigned i = 0; i != 8; i += 2) 837 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 838 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || 839 !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || 840 !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) 841 return false; 842 } 843 return true; 844 } 845 846 /// isVMerge - Common function, used to match vmrg* shuffles. 847 /// 848 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 849 unsigned LHSStart, unsigned RHSStart) { 850 assert(N->getValueType(0) == MVT::v16i8 && 851 "PPC only supports shuffles by bytes!"); 852 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 853 "Unsupported merge size!"); 854 855 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 856 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 857 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 858 LHSStart+j+i*UnitSize) || 859 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 860 RHSStart+j+i*UnitSize)) 861 return false; 862 } 863 return true; 864 } 865 866 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 867 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 868 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 869 bool isUnary) { 870 if (!isUnary) 871 return isVMerge(N, UnitSize, 8, 24); 872 return isVMerge(N, UnitSize, 8, 8); 873 } 874 875 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 876 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 877 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 878 bool isUnary) { 879 if (!isUnary) 880 return isVMerge(N, UnitSize, 0, 16); 881 return isVMerge(N, UnitSize, 0, 0); 882 } 883 884 885 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 886 /// amount, otherwise return -1. 887 int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 888 assert(N->getValueType(0) == MVT::v16i8 && 889 "PPC only supports shuffles by bytes!"); 890 891 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 892 893 // Find the first non-undef value in the shuffle mask. 894 unsigned i; 895 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 896 /*search*/; 897 898 if (i == 16) return -1; // all undef. 899 900 // Otherwise, check to see if the rest of the elements are consecutively 901 // numbered from this value. 902 unsigned ShiftAmt = SVOp->getMaskElt(i); 903 if (ShiftAmt < i) return -1; 904 ShiftAmt -= i; 905 906 if (!isUnary) { 907 // Check the rest of the elements to see if they are consecutive. 908 for (++i; i != 16; ++i) 909 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 910 return -1; 911 } else { 912 // Check the rest of the elements to see if they are consecutive. 913 for (++i; i != 16; ++i) 914 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 915 return -1; 916 } 917 return ShiftAmt; 918 } 919 920 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 921 /// specifies a splat of a single element that is suitable for input to 922 /// VSPLTB/VSPLTH/VSPLTW. 923 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 924 assert(N->getValueType(0) == MVT::v16i8 && 925 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 926 927 // This is a splat operation if each element of the permute is the same, and 928 // if the value doesn't reference the second vector. 929 unsigned ElementBase = N->getMaskElt(0); 930 931 // FIXME: Handle UNDEF elements too! 932 if (ElementBase >= 16) 933 return false; 934 935 // Check that the indices are consecutive, in the case of a multi-byte element 936 // splatted with a v16i8 mask. 937 for (unsigned i = 1; i != EltSize; ++i) 938 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 939 return false; 940 941 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 942 if (N->getMaskElt(i) < 0) continue; 943 for (unsigned j = 0; j != EltSize; ++j) 944 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 945 return false; 946 } 947 return true; 948 } 949 950 /// isAllNegativeZeroVector - Returns true if all elements of build_vector 951 /// are -0.0. 952 bool PPC::isAllNegativeZeroVector(SDNode *N) { 953 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); 954 955 APInt APVal, APUndef; 956 unsigned BitSize; 957 bool HasAnyUndefs; 958 959 if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) 960 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 961 return CFP->getValueAPF().isNegZero(); 962 963 return false; 964 } 965 966 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 967 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 968 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 969 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 970 assert(isSplatShuffleMask(SVOp, EltSize)); 971 return SVOp->getMaskElt(0) / EltSize; 972 } 973 974 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 975 /// by using a vspltis[bhw] instruction of the specified element size, return 976 /// the constant being splatted. The ByteSize field indicates the number of 977 /// bytes of each element [124] -> [bhw]. 978 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 979 SDValue OpVal(0, 0); 980 981 // If ByteSize of the splat is bigger than the element size of the 982 // build_vector, then we have a case where we are checking for a splat where 983 // multiple elements of the buildvector are folded together into a single 984 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 985 unsigned EltSize = 16/N->getNumOperands(); 986 if (EltSize < ByteSize) { 987 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 988 SDValue UniquedVals[4]; 989 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 990 991 // See if all of the elements in the buildvector agree across. 992 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 993 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 994 // If the element isn't a constant, bail fully out. 995 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 996 997 998 if (UniquedVals[i&(Multiple-1)].getNode() == 0) 999 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 1000 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 1001 return SDValue(); // no match. 1002 } 1003 1004 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 1005 // either constant or undef values that are identical for each chunk. See 1006 // if these chunks can form into a larger vspltis*. 1007 1008 // Check to see if all of the leading entries are either 0 or -1. If 1009 // neither, then this won't fit into the immediate field. 1010 bool LeadingZero = true; 1011 bool LeadingOnes = true; 1012 for (unsigned i = 0; i != Multiple-1; ++i) { 1013 if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. 1014 1015 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 1016 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 1017 } 1018 // Finally, check the least significant entry. 1019 if (LeadingZero) { 1020 if (UniquedVals[Multiple-1].getNode() == 0) 1021 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 1022 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 1023 if (Val < 16) 1024 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 1025 } 1026 if (LeadingOnes) { 1027 if (UniquedVals[Multiple-1].getNode() == 0) 1028 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 1029 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 1030 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 1031 return DAG.getTargetConstant(Val, MVT::i32); 1032 } 1033 1034 return SDValue(); 1035 } 1036 1037 // Check to see if this buildvec has a single non-undef value in its elements. 1038 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1039 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1040 if (OpVal.getNode() == 0) 1041 OpVal = N->getOperand(i); 1042 else if (OpVal != N->getOperand(i)) 1043 return SDValue(); 1044 } 1045 1046 if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. 1047 1048 unsigned ValSizeInBytes = EltSize; 1049 uint64_t Value = 0; 1050 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1051 Value = CN->getZExtValue(); 1052 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1053 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 1054 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 1055 } 1056 1057 // If the splat value is larger than the element value, then we can never do 1058 // this splat. The only case that we could fit the replicated bits into our 1059 // immediate field for would be zero, and we prefer to use vxor for it. 1060 if (ValSizeInBytes < ByteSize) return SDValue(); 1061 1062 // If the element value is larger than the splat value, cut it in half and 1063 // check to see if the two halves are equal. Continue doing this until we 1064 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 1065 while (ValSizeInBytes > ByteSize) { 1066 ValSizeInBytes >>= 1; 1067 1068 // If the top half equals the bottom half, we're still ok. 1069 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 1070 (Value & ((1 << (8*ValSizeInBytes))-1))) 1071 return SDValue(); 1072 } 1073 1074 // Properly sign extend the value. 1075 int MaskVal = SignExtend32(Value, ByteSize * 8); 1076 1077 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 1078 if (MaskVal == 0) return SDValue(); 1079 1080 // Finally, if this value fits in a 5 bit sext field, return it 1081 if (SignExtend32<5>(MaskVal) == MaskVal) 1082 return DAG.getTargetConstant(MaskVal, MVT::i32); 1083 return SDValue(); 1084 } 1085 1086 //===----------------------------------------------------------------------===// 1087 // Addressing Mode Selection 1088 //===----------------------------------------------------------------------===// 1089 1090 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 1091 /// or 64-bit immediate, and if the value can be accurately represented as a 1092 /// sign extension from a 16-bit value. If so, this returns true and the 1093 /// immediate. 1094 static bool isIntS16Immediate(SDNode *N, short &Imm) { 1095 if (N->getOpcode() != ISD::Constant) 1096 return false; 1097 1098 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1099 if (N->getValueType(0) == MVT::i32) 1100 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1101 else 1102 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1103 } 1104 static bool isIntS16Immediate(SDValue Op, short &Imm) { 1105 return isIntS16Immediate(Op.getNode(), Imm); 1106 } 1107 1108 1109 /// SelectAddressRegReg - Given the specified addressed, check to see if it 1110 /// can be represented as an indexed [r+r] operation. Returns false if it 1111 /// can be more efficiently represented with [r+imm]. 1112 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1113 SDValue &Index, 1114 SelectionDAG &DAG) const { 1115 short imm = 0; 1116 if (N.getOpcode() == ISD::ADD) { 1117 if (isIntS16Immediate(N.getOperand(1), imm)) 1118 return false; // r+i 1119 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1120 return false; // r+i 1121 1122 Base = N.getOperand(0); 1123 Index = N.getOperand(1); 1124 return true; 1125 } else if (N.getOpcode() == ISD::OR) { 1126 if (isIntS16Immediate(N.getOperand(1), imm)) 1127 return false; // r+i can fold it if we can. 1128 1129 // If this is an or of disjoint bitfields, we can codegen this as an add 1130 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1131 // disjoint. 1132 APInt LHSKnownZero, LHSKnownOne; 1133 APInt RHSKnownZero, RHSKnownOne; 1134 DAG.ComputeMaskedBits(N.getOperand(0), 1135 LHSKnownZero, LHSKnownOne); 1136 1137 if (LHSKnownZero.getBoolValue()) { 1138 DAG.ComputeMaskedBits(N.getOperand(1), 1139 RHSKnownZero, RHSKnownOne); 1140 // If all of the bits are known zero on the LHS or RHS, the add won't 1141 // carry. 1142 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1143 Base = N.getOperand(0); 1144 Index = N.getOperand(1); 1145 return true; 1146 } 1147 } 1148 } 1149 1150 return false; 1151 } 1152 1153 // If we happen to be doing an i64 load or store into a stack slot that has 1154 // less than a 4-byte alignment, then the frame-index elimination may need to 1155 // use an indexed load or store instruction (because the offset may not be a 1156 // multiple of 4). The extra register needed to hold the offset comes from the 1157 // register scavenger, and it is possible that the scavenger will need to use 1158 // an emergency spill slot. As a result, we need to make sure that a spill slot 1159 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1160 // stack slot. 1161 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1162 // FIXME: This does not handle the LWA case. 1163 if (VT != MVT::i64) 1164 return; 1165 1166 // NOTE: We'll exclude negative FIs here, which come from argument 1167 // lowering, because there are no known test cases triggering this problem 1168 // using packed structures (or similar). We can remove this exclusion if 1169 // we find such a test case. The reason why this is so test-case driven is 1170 // because this entire 'fixup' is only to prevent crashes (from the 1171 // register scavenger) on not-really-valid inputs. For example, if we have: 1172 // %a = alloca i1 1173 // %b = bitcast i1* %a to i64* 1174 // store i64* a, i64 b 1175 // then the store should really be marked as 'align 1', but is not. If it 1176 // were marked as 'align 1' then the indexed form would have been 1177 // instruction-selected initially, and the problem this 'fixup' is preventing 1178 // won't happen regardless. 1179 if (FrameIdx < 0) 1180 return; 1181 1182 MachineFunction &MF = DAG.getMachineFunction(); 1183 MachineFrameInfo *MFI = MF.getFrameInfo(); 1184 1185 unsigned Align = MFI->getObjectAlignment(FrameIdx); 1186 if (Align >= 4) 1187 return; 1188 1189 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1190 FuncInfo->setHasNonRISpills(); 1191 } 1192 1193 /// Returns true if the address N can be represented by a base register plus 1194 /// a signed 16-bit displacement [r+imm], and if it is not better 1195 /// represented as reg+reg. If Aligned is true, only accept displacements 1196 /// suitable for STD and friends, i.e. multiples of 4. 1197 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1198 SDValue &Base, 1199 SelectionDAG &DAG, 1200 bool Aligned) const { 1201 // FIXME dl should come from parent load or store, not from address 1202 SDLoc dl(N); 1203 // If this can be more profitably realized as r+r, fail. 1204 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1205 return false; 1206 1207 if (N.getOpcode() == ISD::ADD) { 1208 short imm = 0; 1209 if (isIntS16Immediate(N.getOperand(1), imm) && 1210 (!Aligned || (imm & 3) == 0)) { 1211 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1212 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1213 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1214 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1215 } else { 1216 Base = N.getOperand(0); 1217 } 1218 return true; // [r+i] 1219 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1220 // Match LOAD (ADD (X, Lo(G))). 1221 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1222 && "Cannot handle constant offsets yet!"); 1223 Disp = N.getOperand(1).getOperand(0); // The global address. 1224 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1225 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1226 Disp.getOpcode() == ISD::TargetConstantPool || 1227 Disp.getOpcode() == ISD::TargetJumpTable); 1228 Base = N.getOperand(0); 1229 return true; // [&g+r] 1230 } 1231 } else if (N.getOpcode() == ISD::OR) { 1232 short imm = 0; 1233 if (isIntS16Immediate(N.getOperand(1), imm) && 1234 (!Aligned || (imm & 3) == 0)) { 1235 // If this is an or of disjoint bitfields, we can codegen this as an add 1236 // (for better address arithmetic) if the LHS and RHS of the OR are 1237 // provably disjoint. 1238 APInt LHSKnownZero, LHSKnownOne; 1239 DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1240 1241 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1242 // If all of the bits are known zero on the LHS or RHS, the add won't 1243 // carry. 1244 Base = N.getOperand(0); 1245 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1246 return true; 1247 } 1248 } 1249 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1250 // Loading from a constant address. 1251 1252 // If this address fits entirely in a 16-bit sext immediate field, codegen 1253 // this as "d, 0" 1254 short Imm; 1255 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1256 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 1257 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1258 CN->getValueType(0)); 1259 return true; 1260 } 1261 1262 // Handle 32-bit sext immediates with LIS + addr mode. 1263 if ((CN->getValueType(0) == MVT::i32 || 1264 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1265 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1266 int Addr = (int)CN->getZExtValue(); 1267 1268 // Otherwise, break this down into an LIS + disp. 1269 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 1270 1271 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 1272 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1273 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1274 return true; 1275 } 1276 } 1277 1278 Disp = DAG.getTargetConstant(0, getPointerTy()); 1279 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1280 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1281 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1282 } else 1283 Base = N; 1284 return true; // [r+0] 1285 } 1286 1287 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1288 /// represented as an indexed [r+r] operation. 1289 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1290 SDValue &Index, 1291 SelectionDAG &DAG) const { 1292 // Check to see if we can easily represent this as an [r+r] address. This 1293 // will fail if it thinks that the address is more profitably represented as 1294 // reg+imm, e.g. where imm = 0. 1295 if (SelectAddressRegReg(N, Base, Index, DAG)) 1296 return true; 1297 1298 // If the operand is an addition, always emit this as [r+r], since this is 1299 // better (for code size, and execution, as the memop does the add for free) 1300 // than emitting an explicit add. 1301 if (N.getOpcode() == ISD::ADD) { 1302 Base = N.getOperand(0); 1303 Index = N.getOperand(1); 1304 return true; 1305 } 1306 1307 // Otherwise, do it the hard way, using R0 as the base register. 1308 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1309 N.getValueType()); 1310 Index = N; 1311 return true; 1312 } 1313 1314 /// getPreIndexedAddressParts - returns true by value, base pointer and 1315 /// offset pointer and addressing mode by reference if the node's address 1316 /// can be legally represented as pre-indexed load / store address. 1317 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1318 SDValue &Offset, 1319 ISD::MemIndexedMode &AM, 1320 SelectionDAG &DAG) const { 1321 if (DisablePPCPreinc) return false; 1322 1323 bool isLoad = true; 1324 SDValue Ptr; 1325 EVT VT; 1326 unsigned Alignment; 1327 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1328 Ptr = LD->getBasePtr(); 1329 VT = LD->getMemoryVT(); 1330 Alignment = LD->getAlignment(); 1331 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1332 Ptr = ST->getBasePtr(); 1333 VT = ST->getMemoryVT(); 1334 Alignment = ST->getAlignment(); 1335 isLoad = false; 1336 } else 1337 return false; 1338 1339 // PowerPC doesn't have preinc load/store instructions for vectors. 1340 if (VT.isVector()) 1341 return false; 1342 1343 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 1344 1345 // Common code will reject creating a pre-inc form if the base pointer 1346 // is a frame index, or if N is a store and the base pointer is either 1347 // the same as or a predecessor of the value being stored. Check for 1348 // those situations here, and try with swapped Base/Offset instead. 1349 bool Swap = false; 1350 1351 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 1352 Swap = true; 1353 else if (!isLoad) { 1354 SDValue Val = cast<StoreSDNode>(N)->getValue(); 1355 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 1356 Swap = true; 1357 } 1358 1359 if (Swap) 1360 std::swap(Base, Offset); 1361 1362 AM = ISD::PRE_INC; 1363 return true; 1364 } 1365 1366 // LDU/STU can only handle immediates that are a multiple of 4. 1367 if (VT != MVT::i64) { 1368 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 1369 return false; 1370 } else { 1371 // LDU/STU need an address with at least 4-byte alignment. 1372 if (Alignment < 4) 1373 return false; 1374 1375 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 1376 return false; 1377 } 1378 1379 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1380 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1381 // sext i32 to i64 when addr mode is r+i. 1382 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1383 LD->getExtensionType() == ISD::SEXTLOAD && 1384 isa<ConstantSDNode>(Offset)) 1385 return false; 1386 } 1387 1388 AM = ISD::PRE_INC; 1389 return true; 1390 } 1391 1392 //===----------------------------------------------------------------------===// 1393 // LowerOperation implementation 1394 //===----------------------------------------------------------------------===// 1395 1396 /// GetLabelAccessInfo - Return true if we should reference labels using a 1397 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1398 static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, 1399 unsigned &LoOpFlags, const GlobalValue *GV = 0) { 1400 HiOpFlags = PPCII::MO_HA; 1401 LoOpFlags = PPCII::MO_LO; 1402 1403 // Don't use the pic base if not in PIC relocation model. Or if we are on a 1404 // non-darwin platform. We don't support PIC on other platforms yet. 1405 bool isPIC = TM.getRelocationModel() == Reloc::PIC_ && 1406 TM.getSubtarget<PPCSubtarget>().isDarwin(); 1407 if (isPIC) { 1408 HiOpFlags |= PPCII::MO_PIC_FLAG; 1409 LoOpFlags |= PPCII::MO_PIC_FLAG; 1410 } 1411 1412 // If this is a reference to a global value that requires a non-lazy-ptr, make 1413 // sure that instruction lowering adds it. 1414 if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { 1415 HiOpFlags |= PPCII::MO_NLP_FLAG; 1416 LoOpFlags |= PPCII::MO_NLP_FLAG; 1417 1418 if (GV->hasHiddenVisibility()) { 1419 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1420 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1421 } 1422 } 1423 1424 return isPIC; 1425 } 1426 1427 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 1428 SelectionDAG &DAG) { 1429 EVT PtrVT = HiPart.getValueType(); 1430 SDValue Zero = DAG.getConstant(0, PtrVT); 1431 SDLoc DL(HiPart); 1432 1433 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 1434 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 1435 1436 // With PIC, the first instruction is actually "GR+hi(&G)". 1437 if (isPIC) 1438 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 1439 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 1440 1441 // Generate non-pic code that has direct accesses to the constant pool. 1442 // The address of the global is just (hi(&g)+lo(&g)). 1443 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 1444 } 1445 1446 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1447 SelectionDAG &DAG) const { 1448 EVT PtrVT = Op.getValueType(); 1449 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1450 const Constant *C = CP->getConstVal(); 1451 1452 // 64-bit SVR4 ABI code is always position-independent. 1453 // The actual address of the GlobalValue is stored in the TOC. 1454 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1455 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 1456 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, 1457 DAG.getRegister(PPC::X2, MVT::i64)); 1458 } 1459 1460 unsigned MOHiFlag, MOLoFlag; 1461 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1462 SDValue CPIHi = 1463 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 1464 SDValue CPILo = 1465 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 1466 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 1467 } 1468 1469 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 1470 EVT PtrVT = Op.getValueType(); 1471 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1472 1473 // 64-bit SVR4 ABI code is always position-independent. 1474 // The actual address of the GlobalValue is stored in the TOC. 1475 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1476 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1477 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, 1478 DAG.getRegister(PPC::X2, MVT::i64)); 1479 } 1480 1481 unsigned MOHiFlag, MOLoFlag; 1482 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1483 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 1484 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 1485 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 1486 } 1487 1488 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 1489 SelectionDAG &DAG) const { 1490 EVT PtrVT = Op.getValueType(); 1491 1492 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1493 1494 unsigned MOHiFlag, MOLoFlag; 1495 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1496 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 1497 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 1498 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 1499 } 1500 1501 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1502 SelectionDAG &DAG) const { 1503 1504 // FIXME: TLS addresses currently use medium model code sequences, 1505 // which is the most useful form. Eventually support for small and 1506 // large models could be added if users need it, at the cost of 1507 // additional complexity. 1508 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1509 SDLoc dl(GA); 1510 const GlobalValue *GV = GA->getGlobal(); 1511 EVT PtrVT = getPointerTy(); 1512 bool is64bit = PPCSubTarget.isPPC64(); 1513 1514 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 1515 1516 if (Model == TLSModel::LocalExec) { 1517 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1518 PPCII::MO_TPREL_HA); 1519 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1520 PPCII::MO_TPREL_LO); 1521 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 1522 is64bit ? MVT::i64 : MVT::i32); 1523 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 1524 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 1525 } 1526 1527 if (Model == TLSModel::InitialExec) { 1528 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1529 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1530 PPCII::MO_TLS); 1531 SDValue GOTPtr; 1532 if (is64bit) { 1533 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1534 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 1535 PtrVT, GOTReg, TGA); 1536 } else 1537 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 1538 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 1539 PtrVT, TGA, GOTPtr); 1540 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 1541 } 1542 1543 if (Model == TLSModel::GeneralDynamic) { 1544 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1545 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1546 SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 1547 GOTReg, TGA); 1548 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, 1549 GOTEntryHi, TGA); 1550 1551 // We need a chain node, and don't have one handy. The underlying 1552 // call has no side effects, so using the function entry node 1553 // suffices. 1554 SDValue Chain = DAG.getEntryNode(); 1555 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); 1556 SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); 1557 SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, 1558 PtrVT, ParmReg, TGA); 1559 // The return value from GET_TLS_ADDR really is in X3 already, but 1560 // some hacks are needed here to tie everything together. The extra 1561 // copies dissolve during subsequent transforms. 1562 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); 1563 return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT); 1564 } 1565 1566 if (Model == TLSModel::LocalDynamic) { 1567 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1568 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1569 SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 1570 GOTReg, TGA); 1571 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, 1572 GOTEntryHi, TGA); 1573 1574 // We need a chain node, and don't have one handy. The underlying 1575 // call has no side effects, so using the function entry node 1576 // suffices. 1577 SDValue Chain = DAG.getEntryNode(); 1578 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); 1579 SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); 1580 SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, 1581 PtrVT, ParmReg, TGA); 1582 // The return value from GET_TLSLD_ADDR really is in X3 already, but 1583 // some hacks are needed here to tie everything together. The extra 1584 // copies dissolve during subsequent transforms. 1585 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); 1586 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, 1587 Chain, ParmReg, TGA); 1588 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 1589 } 1590 1591 llvm_unreachable("Unknown TLS model!"); 1592 } 1593 1594 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 1595 SelectionDAG &DAG) const { 1596 EVT PtrVT = Op.getValueType(); 1597 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1598 SDLoc DL(GSDN); 1599 const GlobalValue *GV = GSDN->getGlobal(); 1600 1601 // 64-bit SVR4 ABI code is always position-independent. 1602 // The actual address of the GlobalValue is stored in the TOC. 1603 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1604 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 1605 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, 1606 DAG.getRegister(PPC::X2, MVT::i64)); 1607 } 1608 1609 unsigned MOHiFlag, MOLoFlag; 1610 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); 1611 1612 SDValue GAHi = 1613 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 1614 SDValue GALo = 1615 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 1616 1617 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 1618 1619 // If the global reference is actually to a non-lazy-pointer, we have to do an 1620 // extra load to get the address of the global. 1621 if (MOHiFlag & PPCII::MO_NLP_FLAG) 1622 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 1623 false, false, false, 0); 1624 return Ptr; 1625 } 1626 1627 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 1628 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1629 SDLoc dl(Op); 1630 1631 // If we're comparing for equality to zero, expose the fact that this is 1632 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1633 // fold the new nodes. 1634 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1635 if (C->isNullValue() && CC == ISD::SETEQ) { 1636 EVT VT = Op.getOperand(0).getValueType(); 1637 SDValue Zext = Op.getOperand(0); 1638 if (VT.bitsLT(MVT::i32)) { 1639 VT = MVT::i32; 1640 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 1641 } 1642 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1643 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 1644 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 1645 DAG.getConstant(Log2b, MVT::i32)); 1646 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 1647 } 1648 // Leave comparisons against 0 and -1 alone for now, since they're usually 1649 // optimized. FIXME: revisit this when we can custom lower all setcc 1650 // optimizations. 1651 if (C->isAllOnesValue() || C->isNullValue()) 1652 return SDValue(); 1653 } 1654 1655 // If we have an integer seteq/setne, turn it into a compare against zero 1656 // by xor'ing the rhs with the lhs, which is faster than setting a 1657 // condition register, reading it back out, and masking the correct bit. The 1658 // normal approach here uses sub to do this instead of xor. Using xor exposes 1659 // the result to other bit-twiddling opportunities. 1660 EVT LHSVT = Op.getOperand(0).getValueType(); 1661 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1662 EVT VT = Op.getValueType(); 1663 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 1664 Op.getOperand(1)); 1665 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); 1666 } 1667 return SDValue(); 1668 } 1669 1670 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 1671 const PPCSubtarget &Subtarget) const { 1672 SDNode *Node = Op.getNode(); 1673 EVT VT = Node->getValueType(0); 1674 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1675 SDValue InChain = Node->getOperand(0); 1676 SDValue VAListPtr = Node->getOperand(1); 1677 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 1678 SDLoc dl(Node); 1679 1680 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 1681 1682 // gpr_index 1683 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1684 VAListPtr, MachinePointerInfo(SV), MVT::i8, 1685 false, false, 0); 1686 InChain = GprIndex.getValue(1); 1687 1688 if (VT == MVT::i64) { 1689 // Check if GprIndex is even 1690 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 1691 DAG.getConstant(1, MVT::i32)); 1692 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 1693 DAG.getConstant(0, MVT::i32), ISD::SETNE); 1694 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 1695 DAG.getConstant(1, MVT::i32)); 1696 // Align GprIndex to be even if it isn't 1697 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 1698 GprIndex); 1699 } 1700 1701 // fpr index is 1 byte after gpr 1702 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1703 DAG.getConstant(1, MVT::i32)); 1704 1705 // fpr 1706 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1707 FprPtr, MachinePointerInfo(SV), MVT::i8, 1708 false, false, 0); 1709 InChain = FprIndex.getValue(1); 1710 1711 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1712 DAG.getConstant(8, MVT::i32)); 1713 1714 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1715 DAG.getConstant(4, MVT::i32)); 1716 1717 // areas 1718 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 1719 MachinePointerInfo(), false, false, 1720 false, 0); 1721 InChain = OverflowArea.getValue(1); 1722 1723 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 1724 MachinePointerInfo(), false, false, 1725 false, 0); 1726 InChain = RegSaveArea.getValue(1); 1727 1728 // select overflow_area if index > 8 1729 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 1730 DAG.getConstant(8, MVT::i32), ISD::SETLT); 1731 1732 // adjustment constant gpr_index * 4/8 1733 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 1734 VT.isInteger() ? GprIndex : FprIndex, 1735 DAG.getConstant(VT.isInteger() ? 4 : 8, 1736 MVT::i32)); 1737 1738 // OurReg = RegSaveArea + RegConstant 1739 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 1740 RegConstant); 1741 1742 // Floating types are 32 bytes into RegSaveArea 1743 if (VT.isFloatingPoint()) 1744 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 1745 DAG.getConstant(32, MVT::i32)); 1746 1747 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 1748 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 1749 VT.isInteger() ? GprIndex : FprIndex, 1750 DAG.getConstant(VT == MVT::i64 ? 2 : 1, 1751 MVT::i32)); 1752 1753 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 1754 VT.isInteger() ? VAListPtr : FprPtr, 1755 MachinePointerInfo(SV), 1756 MVT::i8, false, false, 0); 1757 1758 // determine if we should load from reg_save_area or overflow_area 1759 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 1760 1761 // increase overflow_area by 4/8 if gpr/fpr > 8 1762 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 1763 DAG.getConstant(VT.isInteger() ? 4 : 8, 1764 MVT::i32)); 1765 1766 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 1767 OverflowAreaPlusN); 1768 1769 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 1770 OverflowAreaPtr, 1771 MachinePointerInfo(), 1772 MVT::i32, false, false, 0); 1773 1774 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 1775 false, false, false, 0); 1776 } 1777 1778 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, 1779 const PPCSubtarget &Subtarget) const { 1780 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 1781 1782 // We have to copy the entire va_list struct: 1783 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 1784 return DAG.getMemcpy(Op.getOperand(0), Op, 1785 Op.getOperand(1), Op.getOperand(2), 1786 DAG.getConstant(12, MVT::i32), 8, false, true, 1787 MachinePointerInfo(), MachinePointerInfo()); 1788 } 1789 1790 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 1791 SelectionDAG &DAG) const { 1792 return Op.getOperand(0); 1793 } 1794 1795 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 1796 SelectionDAG &DAG) const { 1797 SDValue Chain = Op.getOperand(0); 1798 SDValue Trmp = Op.getOperand(1); // trampoline 1799 SDValue FPtr = Op.getOperand(2); // nested function 1800 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 1801 SDLoc dl(Op); 1802 1803 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1804 bool isPPC64 = (PtrVT == MVT::i64); 1805 Type *IntPtrTy = 1806 DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( 1807 *DAG.getContext()); 1808 1809 TargetLowering::ArgListTy Args; 1810 TargetLowering::ArgListEntry Entry; 1811 1812 Entry.Ty = IntPtrTy; 1813 Entry.Node = Trmp; Args.push_back(Entry); 1814 1815 // TrampSize == (isPPC64 ? 48 : 40); 1816 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, 1817 isPPC64 ? MVT::i64 : MVT::i32); 1818 Args.push_back(Entry); 1819 1820 Entry.Node = FPtr; Args.push_back(Entry); 1821 Entry.Node = Nest; Args.push_back(Entry); 1822 1823 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 1824 TargetLowering::CallLoweringInfo CLI(Chain, 1825 Type::getVoidTy(*DAG.getContext()), 1826 false, false, false, false, 0, 1827 CallingConv::C, 1828 /*isTailCall=*/false, 1829 /*doesNotRet=*/false, 1830 /*isReturnValueUsed=*/true, 1831 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 1832 Args, DAG, dl); 1833 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 1834 1835 return CallResult.second; 1836 } 1837 1838 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 1839 const PPCSubtarget &Subtarget) const { 1840 MachineFunction &MF = DAG.getMachineFunction(); 1841 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1842 1843 SDLoc dl(Op); 1844 1845 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 1846 // vastart just stores the address of the VarArgsFrameIndex slot into the 1847 // memory location argument. 1848 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1849 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1850 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1851 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 1852 MachinePointerInfo(SV), 1853 false, false, 0); 1854 } 1855 1856 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 1857 // We suppose the given va_list is already allocated. 1858 // 1859 // typedef struct { 1860 // char gpr; /* index into the array of 8 GPRs 1861 // * stored in the register save area 1862 // * gpr=0 corresponds to r3, 1863 // * gpr=1 to r4, etc. 1864 // */ 1865 // char fpr; /* index into the array of 8 FPRs 1866 // * stored in the register save area 1867 // * fpr=0 corresponds to f1, 1868 // * fpr=1 to f2, etc. 1869 // */ 1870 // char *overflow_arg_area; 1871 // /* location on stack that holds 1872 // * the next overflow argument 1873 // */ 1874 // char *reg_save_area; 1875 // /* where r3:r10 and f1:f8 (if saved) 1876 // * are stored 1877 // */ 1878 // } va_list[1]; 1879 1880 1881 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); 1882 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); 1883 1884 1885 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1886 1887 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 1888 PtrVT); 1889 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1890 PtrVT); 1891 1892 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 1893 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 1894 1895 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 1896 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 1897 1898 uint64_t FPROffset = 1; 1899 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 1900 1901 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1902 1903 // Store first byte : number of int regs 1904 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 1905 Op.getOperand(1), 1906 MachinePointerInfo(SV), 1907 MVT::i8, false, false, 0); 1908 uint64_t nextOffset = FPROffset; 1909 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 1910 ConstFPROffset); 1911 1912 // Store second byte : number of float regs 1913 SDValue secondStore = 1914 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 1915 MachinePointerInfo(SV, nextOffset), MVT::i8, 1916 false, false, 0); 1917 nextOffset += StackOffset; 1918 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 1919 1920 // Store second word : arguments given on stack 1921 SDValue thirdStore = 1922 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 1923 MachinePointerInfo(SV, nextOffset), 1924 false, false, 0); 1925 nextOffset += FrameOffset; 1926 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 1927 1928 // Store third word : arguments given in registers 1929 return DAG.getStore(thirdStore, dl, FR, nextPtr, 1930 MachinePointerInfo(SV, nextOffset), 1931 false, false, 0); 1932 1933 } 1934 1935 #include "PPCGenCallingConv.inc" 1936 1937 // Function whose sole purpose is to kill compiler warnings 1938 // stemming from unused functions included from PPCGenCallingConv.inc. 1939 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 1940 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 1941 } 1942 1943 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 1944 CCValAssign::LocInfo &LocInfo, 1945 ISD::ArgFlagsTy &ArgFlags, 1946 CCState &State) { 1947 return true; 1948 } 1949 1950 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 1951 MVT &LocVT, 1952 CCValAssign::LocInfo &LocInfo, 1953 ISD::ArgFlagsTy &ArgFlags, 1954 CCState &State) { 1955 static const uint16_t ArgRegs[] = { 1956 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1957 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1958 }; 1959 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1960 1961 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1962 1963 // Skip one register if the first unallocated register has an even register 1964 // number and there are still argument registers available which have not been 1965 // allocated yet. RegNum is actually an index into ArgRegs, which means we 1966 // need to skip a register if RegNum is odd. 1967 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 1968 State.AllocateReg(ArgRegs[RegNum]); 1969 } 1970 1971 // Always return false here, as this function only makes sure that the first 1972 // unallocated register has an odd register number and does not actually 1973 // allocate a register for the current argument. 1974 return false; 1975 } 1976 1977 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 1978 MVT &LocVT, 1979 CCValAssign::LocInfo &LocInfo, 1980 ISD::ArgFlagsTy &ArgFlags, 1981 CCState &State) { 1982 static const uint16_t ArgRegs[] = { 1983 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1984 PPC::F8 1985 }; 1986 1987 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1988 1989 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1990 1991 // If there is only one Floating-point register left we need to put both f64 1992 // values of a split ppc_fp128 value on the stack. 1993 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 1994 State.AllocateReg(ArgRegs[RegNum]); 1995 } 1996 1997 // Always return false here, as this function only makes sure that the two f64 1998 // values a ppc_fp128 value is split into are both passed in registers or both 1999 // passed on the stack and does not actually allocate a register for the 2000 // current argument. 2001 return false; 2002 } 2003 2004 /// GetFPR - Get the set of FP registers that should be allocated for arguments, 2005 /// on Darwin. 2006 static const uint16_t *GetFPR() { 2007 static const uint16_t FPR[] = { 2008 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2009 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 2010 }; 2011 2012 return FPR; 2013 } 2014 2015 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 2016 /// the stack. 2017 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 2018 unsigned PtrByteSize) { 2019 unsigned ArgSize = ArgVT.getStoreSize(); 2020 if (Flags.isByVal()) 2021 ArgSize = Flags.getByValSize(); 2022 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2023 2024 return ArgSize; 2025 } 2026 2027 SDValue 2028 PPCTargetLowering::LowerFormalArguments(SDValue Chain, 2029 CallingConv::ID CallConv, bool isVarArg, 2030 const SmallVectorImpl<ISD::InputArg> 2031 &Ins, 2032 SDLoc dl, SelectionDAG &DAG, 2033 SmallVectorImpl<SDValue> &InVals) 2034 const { 2035 if (PPCSubTarget.isSVR4ABI()) { 2036 if (PPCSubTarget.isPPC64()) 2037 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 2038 dl, DAG, InVals); 2039 else 2040 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 2041 dl, DAG, InVals); 2042 } else { 2043 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 2044 dl, DAG, InVals); 2045 } 2046 } 2047 2048 SDValue 2049 PPCTargetLowering::LowerFormalArguments_32SVR4( 2050 SDValue Chain, 2051 CallingConv::ID CallConv, bool isVarArg, 2052 const SmallVectorImpl<ISD::InputArg> 2053 &Ins, 2054 SDLoc dl, SelectionDAG &DAG, 2055 SmallVectorImpl<SDValue> &InVals) const { 2056 2057 // 32-bit SVR4 ABI Stack Frame Layout: 2058 // +-----------------------------------+ 2059 // +--> | Back chain | 2060 // | +-----------------------------------+ 2061 // | | Floating-point register save area | 2062 // | +-----------------------------------+ 2063 // | | General register save area | 2064 // | +-----------------------------------+ 2065 // | | CR save word | 2066 // | +-----------------------------------+ 2067 // | | VRSAVE save word | 2068 // | +-----------------------------------+ 2069 // | | Alignment padding | 2070 // | +-----------------------------------+ 2071 // | | Vector register save area | 2072 // | +-----------------------------------+ 2073 // | | Local variable space | 2074 // | +-----------------------------------+ 2075 // | | Parameter list area | 2076 // | +-----------------------------------+ 2077 // | | LR save word | 2078 // | +-----------------------------------+ 2079 // SP--> +--- | Back chain | 2080 // +-----------------------------------+ 2081 // 2082 // Specifications: 2083 // System V Application Binary Interface PowerPC Processor Supplement 2084 // AltiVec Technology Programming Interface Manual 2085 2086 MachineFunction &MF = DAG.getMachineFunction(); 2087 MachineFrameInfo *MFI = MF.getFrameInfo(); 2088 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2089 2090 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2091 // Potential tail calls could cause overwriting of argument stack slots. 2092 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2093 (CallConv == CallingConv::Fast)); 2094 unsigned PtrByteSize = 4; 2095 2096 // Assign locations to all of the incoming arguments. 2097 SmallVector<CCValAssign, 16> ArgLocs; 2098 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2099 getTargetMachine(), ArgLocs, *DAG.getContext()); 2100 2101 // Reserve space for the linkage area on the stack. 2102 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 2103 2104 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 2105 2106 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2107 CCValAssign &VA = ArgLocs[i]; 2108 2109 // Arguments stored in registers. 2110 if (VA.isRegLoc()) { 2111 const TargetRegisterClass *RC; 2112 EVT ValVT = VA.getValVT(); 2113 2114 switch (ValVT.getSimpleVT().SimpleTy) { 2115 default: 2116 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 2117 case MVT::i1: 2118 case MVT::i32: 2119 RC = &PPC::GPRCRegClass; 2120 break; 2121 case MVT::f32: 2122 RC = &PPC::F4RCRegClass; 2123 break; 2124 case MVT::f64: 2125 RC = &PPC::F8RCRegClass; 2126 break; 2127 case MVT::v16i8: 2128 case MVT::v8i16: 2129 case MVT::v4i32: 2130 case MVT::v4f32: 2131 case MVT::v2f64: 2132 RC = &PPC::VRRCRegClass; 2133 break; 2134 } 2135 2136 // Transform the arguments stored in physical registers into virtual ones. 2137 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2138 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 2139 ValVT == MVT::i1 ? MVT::i32 : ValVT); 2140 2141 if (ValVT == MVT::i1) 2142 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 2143 2144 InVals.push_back(ArgValue); 2145 } else { 2146 // Argument stored in memory. 2147 assert(VA.isMemLoc()); 2148 2149 unsigned ArgSize = VA.getLocVT().getStoreSize(); 2150 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 2151 isImmutable); 2152 2153 // Create load nodes to retrieve arguments from the stack. 2154 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2155 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2156 MachinePointerInfo(), 2157 false, false, false, 0)); 2158 } 2159 } 2160 2161 // Assign locations to all of the incoming aggregate by value arguments. 2162 // Aggregates passed by value are stored in the local variable space of the 2163 // caller's stack frame, right above the parameter list area. 2164 SmallVector<CCValAssign, 16> ByValArgLocs; 2165 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2166 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 2167 2168 // Reserve stack space for the allocations in CCInfo. 2169 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 2170 2171 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 2172 2173 // Area that is at least reserved in the caller of this function. 2174 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 2175 2176 // Set the size that is at least reserved in caller of this function. Tail 2177 // call optimized function's reserved stack space needs to be aligned so that 2178 // taking the difference between two stack areas will result in an aligned 2179 // stack. 2180 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2181 2182 MinReservedArea = 2183 std::max(MinReservedArea, 2184 PPCFrameLowering::getMinCallFrameSize(false, false)); 2185 2186 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> 2187 getStackAlignment(); 2188 unsigned AlignMask = TargetAlign-1; 2189 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 2190 2191 FI->setMinReservedArea(MinReservedArea); 2192 2193 SmallVector<SDValue, 8> MemOps; 2194 2195 // If the function takes variable number of arguments, make a frame index for 2196 // the start of the first vararg value... for expansion of llvm.va_start. 2197 if (isVarArg) { 2198 static const uint16_t GPArgRegs[] = { 2199 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2200 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2201 }; 2202 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 2203 2204 static const uint16_t FPArgRegs[] = { 2205 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2206 PPC::F8 2207 }; 2208 const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 2209 2210 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, 2211 NumGPArgRegs)); 2212 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, 2213 NumFPArgRegs)); 2214 2215 // Make room for NumGPArgRegs and NumFPArgRegs. 2216 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 2217 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; 2218 2219 FuncInfo->setVarArgsStackOffset( 2220 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2221 CCInfo.getNextStackOffset(), true)); 2222 2223 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 2224 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2225 2226 // The fixed integer arguments of a variadic function are stored to the 2227 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 2228 // the result of va_next. 2229 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 2230 // Get an existing live-in vreg, or add a new one. 2231 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 2232 if (!VReg) 2233 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 2234 2235 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2236 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2237 MachinePointerInfo(), false, false, 0); 2238 MemOps.push_back(Store); 2239 // Increment the address by four for the next argument to store 2240 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2241 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2242 } 2243 2244 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 2245 // is set. 2246 // The double arguments are stored to the VarArgsFrameIndex 2247 // on the stack. 2248 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 2249 // Get an existing live-in vreg, or add a new one. 2250 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 2251 if (!VReg) 2252 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 2253 2254 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 2255 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2256 MachinePointerInfo(), false, false, 0); 2257 MemOps.push_back(Store); 2258 // Increment the address by eight for the next argument to store 2259 SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, 2260 PtrVT); 2261 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2262 } 2263 } 2264 2265 if (!MemOps.empty()) 2266 Chain = DAG.getNode(ISD::TokenFactor, dl, 2267 MVT::Other, &MemOps[0], MemOps.size()); 2268 2269 return Chain; 2270 } 2271 2272 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2273 // value to MVT::i64 and then truncate to the correct register size. 2274 SDValue 2275 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 2276 SelectionDAG &DAG, SDValue ArgVal, 2277 SDLoc dl) const { 2278 if (Flags.isSExt()) 2279 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 2280 DAG.getValueType(ObjectVT)); 2281 else if (Flags.isZExt()) 2282 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 2283 DAG.getValueType(ObjectVT)); 2284 2285 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 2286 } 2287 2288 // Set the size that is at least reserved in caller of this function. Tail 2289 // call optimized functions' reserved stack space needs to be aligned so that 2290 // taking the difference between two stack areas will result in an aligned 2291 // stack. 2292 void 2293 PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, 2294 unsigned nAltivecParamsAtEnd, 2295 unsigned MinReservedArea, 2296 bool isPPC64) const { 2297 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2298 // Add the Altivec parameters at the end, if needed. 2299 if (nAltivecParamsAtEnd) { 2300 MinReservedArea = ((MinReservedArea+15)/16)*16; 2301 MinReservedArea += 16*nAltivecParamsAtEnd; 2302 } 2303 MinReservedArea = 2304 std::max(MinReservedArea, 2305 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 2306 unsigned TargetAlign 2307 = DAG.getMachineFunction().getTarget().getFrameLowering()-> 2308 getStackAlignment(); 2309 unsigned AlignMask = TargetAlign-1; 2310 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 2311 FI->setMinReservedArea(MinReservedArea); 2312 } 2313 2314 SDValue 2315 PPCTargetLowering::LowerFormalArguments_64SVR4( 2316 SDValue Chain, 2317 CallingConv::ID CallConv, bool isVarArg, 2318 const SmallVectorImpl<ISD::InputArg> 2319 &Ins, 2320 SDLoc dl, SelectionDAG &DAG, 2321 SmallVectorImpl<SDValue> &InVals) const { 2322 // TODO: add description of PPC stack frame format, or at least some docs. 2323 // 2324 MachineFunction &MF = DAG.getMachineFunction(); 2325 MachineFrameInfo *MFI = MF.getFrameInfo(); 2326 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2327 2328 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2329 // Potential tail calls could cause overwriting of argument stack slots. 2330 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2331 (CallConv == CallingConv::Fast)); 2332 unsigned PtrByteSize = 8; 2333 2334 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); 2335 // Area that is at least reserved in caller of this function. 2336 unsigned MinReservedArea = ArgOffset; 2337 2338 static const uint16_t GPR[] = { 2339 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2340 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2341 }; 2342 2343 static const uint16_t *FPR = GetFPR(); 2344 2345 static const uint16_t VR[] = { 2346 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2347 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2348 }; 2349 2350 const unsigned Num_GPR_Regs = array_lengthof(GPR); 2351 const unsigned Num_FPR_Regs = 13; 2352 const unsigned Num_VR_Regs = array_lengthof(VR); 2353 2354 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2355 2356 // Add DAG nodes to load the arguments or copy them out of registers. On 2357 // entry to a function on PPC, the arguments start after the linkage area, 2358 // although the first ones are often in registers. 2359 2360 SmallVector<SDValue, 8> MemOps; 2361 unsigned nAltivecParamsAtEnd = 0; 2362 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2363 unsigned CurArgIdx = 0; 2364 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2365 SDValue ArgVal; 2366 bool needsLoad = false; 2367 EVT ObjectVT = Ins[ArgNo].VT; 2368 unsigned ObjSize = ObjectVT.getStoreSize(); 2369 unsigned ArgSize = ObjSize; 2370 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2371 std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); 2372 CurArgIdx = Ins[ArgNo].OrigArgIndex; 2373 2374 unsigned CurArgOffset = ArgOffset; 2375 2376 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 2377 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 2378 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 || 2379 ObjectVT==MVT::v2f64) { 2380 if (isVarArg) { 2381 MinReservedArea = ((MinReservedArea+15)/16)*16; 2382 MinReservedArea += CalculateStackSlotSize(ObjectVT, 2383 Flags, 2384 PtrByteSize); 2385 } else 2386 nAltivecParamsAtEnd++; 2387 } else 2388 // Calculate min reserved area. 2389 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 2390 Flags, 2391 PtrByteSize); 2392 2393 // FIXME the codegen can be much improved in some cases. 2394 // We do not have to keep everything in memory. 2395 if (Flags.isByVal()) { 2396 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2397 ObjSize = Flags.getByValSize(); 2398 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2399 // Empty aggregate parameters do not take up registers. Examples: 2400 // struct { } a; 2401 // union { } b; 2402 // int c[0]; 2403 // etc. However, we have to provide a place-holder in InVals, so 2404 // pretend we have an 8-byte item at the current address for that 2405 // purpose. 2406 if (!ObjSize) { 2407 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2408 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2409 InVals.push_back(FIN); 2410 continue; 2411 } 2412 2413 unsigned BVAlign = Flags.getByValAlign(); 2414 if (BVAlign > 8) { 2415 ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; 2416 CurArgOffset = ArgOffset; 2417 } 2418 2419 // All aggregates smaller than 8 bytes must be passed right-justified. 2420 if (ObjSize < PtrByteSize) 2421 CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); 2422 // The value of the object is its address. 2423 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); 2424 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2425 InVals.push_back(FIN); 2426 2427 if (ObjSize < 8) { 2428 if (GPR_idx != Num_GPR_Regs) { 2429 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2430 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2431 SDValue Store; 2432 2433 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 2434 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 2435 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 2436 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 2437 MachinePointerInfo(FuncArg), 2438 ObjType, false, false, 0); 2439 } else { 2440 // For sizes that don't fit a truncating store (3, 5, 6, 7), 2441 // store the whole register as-is to the parameter save area 2442 // slot. The address of the parameter was already calculated 2443 // above (InVals.push_back(FIN)) to be the right-justified 2444 // offset within the slot. For this store, we need a new 2445 // frame index that points at the beginning of the slot. 2446 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2447 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2448 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2449 MachinePointerInfo(FuncArg), 2450 false, false, 0); 2451 } 2452 2453 MemOps.push_back(Store); 2454 ++GPR_idx; 2455 } 2456 // Whether we copied from a register or not, advance the offset 2457 // into the parameter save area by a full doubleword. 2458 ArgOffset += PtrByteSize; 2459 continue; 2460 } 2461 2462 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2463 // Store whatever pieces of the object are in registers 2464 // to memory. ArgOffset will be the address of the beginning 2465 // of the object. 2466 if (GPR_idx != Num_GPR_Regs) { 2467 unsigned VReg; 2468 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2469 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2470 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2471 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2472 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2473 MachinePointerInfo(FuncArg, j), 2474 false, false, 0); 2475 MemOps.push_back(Store); 2476 ++GPR_idx; 2477 ArgOffset += PtrByteSize; 2478 } else { 2479 ArgOffset += ArgSize - j; 2480 break; 2481 } 2482 } 2483 continue; 2484 } 2485 2486 switch (ObjectVT.getSimpleVT().SimpleTy) { 2487 default: llvm_unreachable("Unhandled argument type!"); 2488 case MVT::i1: 2489 case MVT::i32: 2490 case MVT::i64: 2491 if (GPR_idx != Num_GPR_Regs) { 2492 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2493 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2494 2495 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 2496 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2497 // value to MVT::i64 and then truncate to the correct register size. 2498 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2499 2500 ++GPR_idx; 2501 } else { 2502 needsLoad = true; 2503 ArgSize = PtrByteSize; 2504 } 2505 ArgOffset += 8; 2506 break; 2507 2508 case MVT::f32: 2509 case MVT::f64: 2510 // Every 8 bytes of argument space consumes one of the GPRs available for 2511 // argument passing. 2512 if (GPR_idx != Num_GPR_Regs) { 2513 ++GPR_idx; 2514 } 2515 if (FPR_idx != Num_FPR_Regs) { 2516 unsigned VReg; 2517 2518 if (ObjectVT == MVT::f32) 2519 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2520 else 2521 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 2522 2523 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2524 ++FPR_idx; 2525 } else { 2526 needsLoad = true; 2527 ArgSize = PtrByteSize; 2528 } 2529 2530 ArgOffset += 8; 2531 break; 2532 case MVT::v4f32: 2533 case MVT::v4i32: 2534 case MVT::v8i16: 2535 case MVT::v16i8: 2536 case MVT::v2f64: 2537 // Note that vector arguments in registers don't reserve stack space, 2538 // except in varargs functions. 2539 if (VR_idx != Num_VR_Regs) { 2540 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2541 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2542 if (isVarArg) { 2543 while ((ArgOffset % 16) != 0) { 2544 ArgOffset += PtrByteSize; 2545 if (GPR_idx != Num_GPR_Regs) 2546 GPR_idx++; 2547 } 2548 ArgOffset += 16; 2549 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 2550 } 2551 ++VR_idx; 2552 } else { 2553 // Vectors are aligned. 2554 ArgOffset = ((ArgOffset+15)/16)*16; 2555 CurArgOffset = ArgOffset; 2556 ArgOffset += 16; 2557 needsLoad = true; 2558 } 2559 break; 2560 } 2561 2562 // We need to load the argument to a virtual register if we determined 2563 // above that we ran out of physical registers of the appropriate type. 2564 if (needsLoad) { 2565 int FI = MFI->CreateFixedObject(ObjSize, 2566 CurArgOffset + (ArgSize - ObjSize), 2567 isImmutable); 2568 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2569 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2570 false, false, false, 0); 2571 } 2572 2573 InVals.push_back(ArgVal); 2574 } 2575 2576 // Set the size that is at least reserved in caller of this function. Tail 2577 // call optimized functions' reserved stack space needs to be aligned so that 2578 // taking the difference between two stack areas will result in an aligned 2579 // stack. 2580 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true); 2581 2582 // If the function takes variable number of arguments, make a frame index for 2583 // the start of the first vararg value... for expansion of llvm.va_start. 2584 if (isVarArg) { 2585 int Depth = ArgOffset; 2586 2587 FuncInfo->setVarArgsFrameIndex( 2588 MFI->CreateFixedObject(PtrByteSize, Depth, true)); 2589 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2590 2591 // If this function is vararg, store any remaining integer argument regs 2592 // to their spots on the stack so that they may be loaded by deferencing the 2593 // result of va_next. 2594 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 2595 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2596 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2597 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2598 MachinePointerInfo(), false, false, 0); 2599 MemOps.push_back(Store); 2600 // Increment the address by four for the next argument to store 2601 SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT); 2602 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2603 } 2604 } 2605 2606 if (!MemOps.empty()) 2607 Chain = DAG.getNode(ISD::TokenFactor, dl, 2608 MVT::Other, &MemOps[0], MemOps.size()); 2609 2610 return Chain; 2611 } 2612 2613 SDValue 2614 PPCTargetLowering::LowerFormalArguments_Darwin( 2615 SDValue Chain, 2616 CallingConv::ID CallConv, bool isVarArg, 2617 const SmallVectorImpl<ISD::InputArg> 2618 &Ins, 2619 SDLoc dl, SelectionDAG &DAG, 2620 SmallVectorImpl<SDValue> &InVals) const { 2621 // TODO: add description of PPC stack frame format, or at least some docs. 2622 // 2623 MachineFunction &MF = DAG.getMachineFunction(); 2624 MachineFrameInfo *MFI = MF.getFrameInfo(); 2625 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2626 2627 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2628 bool isPPC64 = PtrVT == MVT::i64; 2629 // Potential tail calls could cause overwriting of argument stack slots. 2630 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2631 (CallConv == CallingConv::Fast)); 2632 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2633 2634 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 2635 // Area that is at least reserved in caller of this function. 2636 unsigned MinReservedArea = ArgOffset; 2637 2638 static const uint16_t GPR_32[] = { // 32-bit registers. 2639 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2640 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2641 }; 2642 static const uint16_t GPR_64[] = { // 64-bit registers. 2643 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2644 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2645 }; 2646 2647 static const uint16_t *FPR = GetFPR(); 2648 2649 static const uint16_t VR[] = { 2650 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2651 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2652 }; 2653 2654 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 2655 const unsigned Num_FPR_Regs = 13; 2656 const unsigned Num_VR_Regs = array_lengthof( VR); 2657 2658 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2659 2660 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 2661 2662 // In 32-bit non-varargs functions, the stack space for vectors is after the 2663 // stack space for non-vectors. We do not use this space unless we have 2664 // too many vectors to fit in registers, something that only occurs in 2665 // constructed examples:), but we have to walk the arglist to figure 2666 // that out...for the pathological case, compute VecArgOffset as the 2667 // start of the vector parameter area. Computing VecArgOffset is the 2668 // entire point of the following loop. 2669 unsigned VecArgOffset = ArgOffset; 2670 if (!isVarArg && !isPPC64) { 2671 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 2672 ++ArgNo) { 2673 EVT ObjectVT = Ins[ArgNo].VT; 2674 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2675 2676 if (Flags.isByVal()) { 2677 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 2678 unsigned ObjSize = Flags.getByValSize(); 2679 unsigned ArgSize = 2680 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2681 VecArgOffset += ArgSize; 2682 continue; 2683 } 2684 2685 switch(ObjectVT.getSimpleVT().SimpleTy) { 2686 default: llvm_unreachable("Unhandled argument type!"); 2687 case MVT::i1: 2688 case MVT::i32: 2689 case MVT::f32: 2690 VecArgOffset += 4; 2691 break; 2692 case MVT::i64: // PPC64 2693 case MVT::f64: 2694 // FIXME: We are guaranteed to be !isPPC64 at this point. 2695 // Does MVT::i64 apply? 2696 VecArgOffset += 8; 2697 break; 2698 case MVT::v4f32: 2699 case MVT::v4i32: 2700 case MVT::v8i16: 2701 case MVT::v16i8: 2702 // Nothing to do, we're only looking at Nonvector args here. 2703 break; 2704 } 2705 } 2706 } 2707 // We've found where the vector parameter area in memory is. Skip the 2708 // first 12 parameters; these don't use that memory. 2709 VecArgOffset = ((VecArgOffset+15)/16)*16; 2710 VecArgOffset += 12*16; 2711 2712 // Add DAG nodes to load the arguments or copy them out of registers. On 2713 // entry to a function on PPC, the arguments start after the linkage area, 2714 // although the first ones are often in registers. 2715 2716 SmallVector<SDValue, 8> MemOps; 2717 unsigned nAltivecParamsAtEnd = 0; 2718 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2719 unsigned CurArgIdx = 0; 2720 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2721 SDValue ArgVal; 2722 bool needsLoad = false; 2723 EVT ObjectVT = Ins[ArgNo].VT; 2724 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 2725 unsigned ArgSize = ObjSize; 2726 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2727 std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); 2728 CurArgIdx = Ins[ArgNo].OrigArgIndex; 2729 2730 unsigned CurArgOffset = ArgOffset; 2731 2732 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 2733 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 2734 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 2735 if (isVarArg || isPPC64) { 2736 MinReservedArea = ((MinReservedArea+15)/16)*16; 2737 MinReservedArea += CalculateStackSlotSize(ObjectVT, 2738 Flags, 2739 PtrByteSize); 2740 } else nAltivecParamsAtEnd++; 2741 } else 2742 // Calculate min reserved area. 2743 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 2744 Flags, 2745 PtrByteSize); 2746 2747 // FIXME the codegen can be much improved in some cases. 2748 // We do not have to keep everything in memory. 2749 if (Flags.isByVal()) { 2750 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2751 ObjSize = Flags.getByValSize(); 2752 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2753 // Objects of size 1 and 2 are right justified, everything else is 2754 // left justified. This means the memory address is adjusted forwards. 2755 if (ObjSize==1 || ObjSize==2) { 2756 CurArgOffset = CurArgOffset + (4 - ObjSize); 2757 } 2758 // The value of the object is its address. 2759 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); 2760 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2761 InVals.push_back(FIN); 2762 if (ObjSize==1 || ObjSize==2) { 2763 if (GPR_idx != Num_GPR_Regs) { 2764 unsigned VReg; 2765 if (isPPC64) 2766 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2767 else 2768 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2769 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2770 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 2771 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 2772 MachinePointerInfo(FuncArg), 2773 ObjType, false, false, 0); 2774 MemOps.push_back(Store); 2775 ++GPR_idx; 2776 } 2777 2778 ArgOffset += PtrByteSize; 2779 2780 continue; 2781 } 2782 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2783 // Store whatever pieces of the object are in registers 2784 // to memory. ArgOffset will be the address of the beginning 2785 // of the object. 2786 if (GPR_idx != Num_GPR_Regs) { 2787 unsigned VReg; 2788 if (isPPC64) 2789 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2790 else 2791 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2792 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2793 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2794 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2795 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2796 MachinePointerInfo(FuncArg, j), 2797 false, false, 0); 2798 MemOps.push_back(Store); 2799 ++GPR_idx; 2800 ArgOffset += PtrByteSize; 2801 } else { 2802 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 2803 break; 2804 } 2805 } 2806 continue; 2807 } 2808 2809 switch (ObjectVT.getSimpleVT().SimpleTy) { 2810 default: llvm_unreachable("Unhandled argument type!"); 2811 case MVT::i1: 2812 case MVT::i32: 2813 if (!isPPC64) { 2814 if (GPR_idx != Num_GPR_Regs) { 2815 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2816 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2817 2818 if (ObjectVT == MVT::i1) 2819 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 2820 2821 ++GPR_idx; 2822 } else { 2823 needsLoad = true; 2824 ArgSize = PtrByteSize; 2825 } 2826 // All int arguments reserve stack space in the Darwin ABI. 2827 ArgOffset += PtrByteSize; 2828 break; 2829 } 2830 // FALLTHROUGH 2831 case MVT::i64: // PPC64 2832 if (GPR_idx != Num_GPR_Regs) { 2833 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2834 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2835 2836 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 2837 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2838 // value to MVT::i64 and then truncate to the correct register size. 2839 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2840 2841 ++GPR_idx; 2842 } else { 2843 needsLoad = true; 2844 ArgSize = PtrByteSize; 2845 } 2846 // All int arguments reserve stack space in the Darwin ABI. 2847 ArgOffset += 8; 2848 break; 2849 2850 case MVT::f32: 2851 case MVT::f64: 2852 // Every 4 bytes of argument space consumes one of the GPRs available for 2853 // argument passing. 2854 if (GPR_idx != Num_GPR_Regs) { 2855 ++GPR_idx; 2856 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 2857 ++GPR_idx; 2858 } 2859 if (FPR_idx != Num_FPR_Regs) { 2860 unsigned VReg; 2861 2862 if (ObjectVT == MVT::f32) 2863 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2864 else 2865 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 2866 2867 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2868 ++FPR_idx; 2869 } else { 2870 needsLoad = true; 2871 } 2872 2873 // All FP arguments reserve stack space in the Darwin ABI. 2874 ArgOffset += isPPC64 ? 8 : ObjSize; 2875 break; 2876 case MVT::v4f32: 2877 case MVT::v4i32: 2878 case MVT::v8i16: 2879 case MVT::v16i8: 2880 // Note that vector arguments in registers don't reserve stack space, 2881 // except in varargs functions. 2882 if (VR_idx != Num_VR_Regs) { 2883 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2884 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2885 if (isVarArg) { 2886 while ((ArgOffset % 16) != 0) { 2887 ArgOffset += PtrByteSize; 2888 if (GPR_idx != Num_GPR_Regs) 2889 GPR_idx++; 2890 } 2891 ArgOffset += 16; 2892 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 2893 } 2894 ++VR_idx; 2895 } else { 2896 if (!isVarArg && !isPPC64) { 2897 // Vectors go after all the nonvectors. 2898 CurArgOffset = VecArgOffset; 2899 VecArgOffset += 16; 2900 } else { 2901 // Vectors are aligned. 2902 ArgOffset = ((ArgOffset+15)/16)*16; 2903 CurArgOffset = ArgOffset; 2904 ArgOffset += 16; 2905 } 2906 needsLoad = true; 2907 } 2908 break; 2909 } 2910 2911 // We need to load the argument to a virtual register if we determined above 2912 // that we ran out of physical registers of the appropriate type. 2913 if (needsLoad) { 2914 int FI = MFI->CreateFixedObject(ObjSize, 2915 CurArgOffset + (ArgSize - ObjSize), 2916 isImmutable); 2917 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2918 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2919 false, false, false, 0); 2920 } 2921 2922 InVals.push_back(ArgVal); 2923 } 2924 2925 // Set the size that is at least reserved in caller of this function. Tail 2926 // call optimized functions' reserved stack space needs to be aligned so that 2927 // taking the difference between two stack areas will result in an aligned 2928 // stack. 2929 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64); 2930 2931 // If the function takes variable number of arguments, make a frame index for 2932 // the start of the first vararg value... for expansion of llvm.va_start. 2933 if (isVarArg) { 2934 int Depth = ArgOffset; 2935 2936 FuncInfo->setVarArgsFrameIndex( 2937 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2938 Depth, true)); 2939 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2940 2941 // If this function is vararg, store any remaining integer argument regs 2942 // to their spots on the stack so that they may be loaded by deferencing the 2943 // result of va_next. 2944 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 2945 unsigned VReg; 2946 2947 if (isPPC64) 2948 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2949 else 2950 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2951 2952 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2953 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2954 MachinePointerInfo(), false, false, 0); 2955 MemOps.push_back(Store); 2956 // Increment the address by four for the next argument to store 2957 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2958 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2959 } 2960 } 2961 2962 if (!MemOps.empty()) 2963 Chain = DAG.getNode(ISD::TokenFactor, dl, 2964 MVT::Other, &MemOps[0], MemOps.size()); 2965 2966 return Chain; 2967 } 2968 2969 /// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus 2970 /// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI. 2971 static unsigned 2972 CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, 2973 bool isPPC64, 2974 bool isVarArg, 2975 unsigned CC, 2976 const SmallVectorImpl<ISD::OutputArg> 2977 &Outs, 2978 const SmallVectorImpl<SDValue> &OutVals, 2979 unsigned &nAltivecParamsAtEnd) { 2980 // Count how many bytes are to be pushed on the stack, including the linkage 2981 // area, and parameter passing area. We start with 24/48 bytes, which is 2982 // prereserved space for [SP][CR][LR][3 x unused]. 2983 unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true); 2984 unsigned NumOps = Outs.size(); 2985 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2986 2987 // Add up all the space actually used. 2988 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 2989 // they all go in registers, but we must reserve stack space for them for 2990 // possible use by the caller. In varargs or 64-bit calls, parameters are 2991 // assigned stack space in order, with padding so Altivec parameters are 2992 // 16-byte aligned. 2993 nAltivecParamsAtEnd = 0; 2994 for (unsigned i = 0; i != NumOps; ++i) { 2995 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2996 EVT ArgVT = Outs[i].VT; 2997 // Varargs Altivec parameters are padded to a 16 byte boundary. 2998 if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || 2999 ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 || 3000 ArgVT==MVT::v2f64) { 3001 if (!isVarArg && !isPPC64) { 3002 // Non-varargs Altivec parameters go after all the non-Altivec 3003 // parameters; handle those later so we know how much padding we need. 3004 nAltivecParamsAtEnd++; 3005 continue; 3006 } 3007 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 3008 NumBytes = ((NumBytes+15)/16)*16; 3009 } 3010 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 3011 } 3012 3013 // Allow for Altivec parameters at the end, if needed. 3014 if (nAltivecParamsAtEnd) { 3015 NumBytes = ((NumBytes+15)/16)*16; 3016 NumBytes += 16*nAltivecParamsAtEnd; 3017 } 3018 3019 // The prolog code of the callee may store up to 8 GPR argument registers to 3020 // the stack, allowing va_start to index over them in memory if its varargs. 3021 // Because we cannot tell if this is needed on the caller side, we have to 3022 // conservatively assume that it is needed. As such, make sure we have at 3023 // least enough stack space for the caller to store the 8 GPRs. 3024 NumBytes = std::max(NumBytes, 3025 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 3026 3027 // Tail call needs the stack to be aligned. 3028 if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){ 3029 unsigned TargetAlign = DAG.getMachineFunction().getTarget(). 3030 getFrameLowering()->getStackAlignment(); 3031 unsigned AlignMask = TargetAlign-1; 3032 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 3033 } 3034 3035 return NumBytes; 3036 } 3037 3038 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 3039 /// adjusted to accommodate the arguments for the tailcall. 3040 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 3041 unsigned ParamSize) { 3042 3043 if (!isTailCall) return 0; 3044 3045 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 3046 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 3047 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 3048 // Remember only if the new adjustement is bigger. 3049 if (SPDiff < FI->getTailCallSPDelta()) 3050 FI->setTailCallSPDelta(SPDiff); 3051 3052 return SPDiff; 3053 } 3054 3055 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 3056 /// for tail call optimization. Targets which want to do tail call 3057 /// optimization should implement this function. 3058 bool 3059 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 3060 CallingConv::ID CalleeCC, 3061 bool isVarArg, 3062 const SmallVectorImpl<ISD::InputArg> &Ins, 3063 SelectionDAG& DAG) const { 3064 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 3065 return false; 3066 3067 // Variable argument functions are not supported. 3068 if (isVarArg) 3069 return false; 3070 3071 MachineFunction &MF = DAG.getMachineFunction(); 3072 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 3073 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 3074 // Functions containing by val parameters are not supported. 3075 for (unsigned i = 0; i != Ins.size(); i++) { 3076 ISD::ArgFlagsTy Flags = Ins[i].Flags; 3077 if (Flags.isByVal()) return false; 3078 } 3079 3080 // Non-PIC/GOT tail calls are supported. 3081 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 3082 return true; 3083 3084 // At the moment we can only do local tail calls (in same module, hidden 3085 // or protected) if we are generating PIC. 3086 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3087 return G->getGlobal()->hasHiddenVisibility() 3088 || G->getGlobal()->hasProtectedVisibility(); 3089 } 3090 3091 return false; 3092 } 3093 3094 /// isCallCompatibleAddress - Return the immediate to use if the specified 3095 /// 32-bit value is representable in the immediate field of a BxA instruction. 3096 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 3097 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 3098 if (!C) return 0; 3099 3100 int Addr = C->getZExtValue(); 3101 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 3102 SignExtend32<26>(Addr) != Addr) 3103 return 0; // Top 6 bits have to be sext of immediate. 3104 3105 return DAG.getConstant((int)C->getZExtValue() >> 2, 3106 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 3107 } 3108 3109 namespace { 3110 3111 struct TailCallArgumentInfo { 3112 SDValue Arg; 3113 SDValue FrameIdxOp; 3114 int FrameIdx; 3115 3116 TailCallArgumentInfo() : FrameIdx(0) {} 3117 }; 3118 3119 } 3120 3121 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 3122 static void 3123 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 3124 SDValue Chain, 3125 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 3126 SmallVectorImpl<SDValue> &MemOpChains, 3127 SDLoc dl) { 3128 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 3129 SDValue Arg = TailCallArgs[i].Arg; 3130 SDValue FIN = TailCallArgs[i].FrameIdxOp; 3131 int FI = TailCallArgs[i].FrameIdx; 3132 // Store relative to framepointer. 3133 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, 3134 MachinePointerInfo::getFixedStack(FI), 3135 false, false, 0)); 3136 } 3137 } 3138 3139 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 3140 /// the appropriate stack slot for the tail call optimized function call. 3141 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 3142 MachineFunction &MF, 3143 SDValue Chain, 3144 SDValue OldRetAddr, 3145 SDValue OldFP, 3146 int SPDiff, 3147 bool isPPC64, 3148 bool isDarwinABI, 3149 SDLoc dl) { 3150 if (SPDiff) { 3151 // Calculate the new stack slot for the return address. 3152 int SlotSize = isPPC64 ? 8 : 4; 3153 int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, 3154 isDarwinABI); 3155 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 3156 NewRetAddrLoc, true); 3157 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3158 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 3159 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 3160 MachinePointerInfo::getFixedStack(NewRetAddr), 3161 false, false, 0); 3162 3163 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 3164 // slot as the FP is never overwritten. 3165 if (isDarwinABI) { 3166 int NewFPLoc = 3167 SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); 3168 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 3169 true); 3170 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 3171 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 3172 MachinePointerInfo::getFixedStack(NewFPIdx), 3173 false, false, 0); 3174 } 3175 } 3176 return Chain; 3177 } 3178 3179 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 3180 /// the position of the argument. 3181 static void 3182 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 3183 SDValue Arg, int SPDiff, unsigned ArgOffset, 3184 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 3185 int Offset = ArgOffset + SPDiff; 3186 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 3187 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 3188 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3189 SDValue FIN = DAG.getFrameIndex(FI, VT); 3190 TailCallArgumentInfo Info; 3191 Info.Arg = Arg; 3192 Info.FrameIdxOp = FIN; 3193 Info.FrameIdx = FI; 3194 TailCallArguments.push_back(Info); 3195 } 3196 3197 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 3198 /// stack slot. Returns the chain as result and the loaded frame pointers in 3199 /// LROpOut/FPOpout. Used when tail calling. 3200 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 3201 int SPDiff, 3202 SDValue Chain, 3203 SDValue &LROpOut, 3204 SDValue &FPOpOut, 3205 bool isDarwinABI, 3206 SDLoc dl) const { 3207 if (SPDiff) { 3208 // Load the LR and FP stack slot for later adjusting. 3209 EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; 3210 LROpOut = getReturnAddrFrameIndex(DAG); 3211 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 3212 false, false, false, 0); 3213 Chain = SDValue(LROpOut.getNode(), 1); 3214 3215 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 3216 // slot as the FP is never overwritten. 3217 if (isDarwinABI) { 3218 FPOpOut = getFramePointerFrameIndex(DAG); 3219 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 3220 false, false, false, 0); 3221 Chain = SDValue(FPOpOut.getNode(), 1); 3222 } 3223 } 3224 return Chain; 3225 } 3226 3227 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 3228 /// by "Src" to address "Dst" of size "Size". Alignment information is 3229 /// specified by the specific parameter attribute. The copy will be passed as 3230 /// a byval function parameter. 3231 /// Sometimes what we are copying is the end of a larger object, the part that 3232 /// does not fit in registers. 3233 static SDValue 3234 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 3235 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 3236 SDLoc dl) { 3237 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 3238 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 3239 false, false, MachinePointerInfo(0), 3240 MachinePointerInfo(0)); 3241 } 3242 3243 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 3244 /// tail calls. 3245 static void 3246 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 3247 SDValue Arg, SDValue PtrOff, int SPDiff, 3248 unsigned ArgOffset, bool isPPC64, bool isTailCall, 3249 bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 3250 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, 3251 SDLoc dl) { 3252 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3253 if (!isTailCall) { 3254 if (isVector) { 3255 SDValue StackPtr; 3256 if (isPPC64) 3257 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3258 else 3259 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3260 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3261 DAG.getConstant(ArgOffset, PtrVT)); 3262 } 3263 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3264 MachinePointerInfo(), false, false, 0)); 3265 // Calculate and remember argument location. 3266 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 3267 TailCallArguments); 3268 } 3269 3270 static 3271 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 3272 SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 3273 SDValue LROp, SDValue FPOp, bool isDarwinABI, 3274 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 3275 MachineFunction &MF = DAG.getMachineFunction(); 3276 3277 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 3278 // might overwrite each other in case of tail call optimization. 3279 SmallVector<SDValue, 8> MemOpChains2; 3280 // Do not flag preceding copytoreg stuff together with the following stuff. 3281 InFlag = SDValue(); 3282 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 3283 MemOpChains2, dl); 3284 if (!MemOpChains2.empty()) 3285 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3286 &MemOpChains2[0], MemOpChains2.size()); 3287 3288 // Store the return address to the appropriate stack slot. 3289 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 3290 isPPC64, isDarwinABI, dl); 3291 3292 // Emit callseq_end just before tailcall node. 3293 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3294 DAG.getIntPtrConstant(0, true), InFlag, dl); 3295 InFlag = Chain.getValue(1); 3296 } 3297 3298 static 3299 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 3300 SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, 3301 SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, 3302 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 3303 const PPCSubtarget &PPCSubTarget) { 3304 3305 bool isPPC64 = PPCSubTarget.isPPC64(); 3306 bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); 3307 3308 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3309 NodeTys.push_back(MVT::Other); // Returns a chain 3310 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 3311 3312 unsigned CallOpc = PPCISD::CALL; 3313 3314 bool needIndirectCall = true; 3315 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 3316 // If this is an absolute destination address, use the munged value. 3317 Callee = SDValue(Dest, 0); 3318 needIndirectCall = false; 3319 } 3320 3321 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3322 // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 3323 // Use indirect calls for ALL functions calls in JIT mode, since the 3324 // far-call stubs may be outside relocation limits for a BL instruction. 3325 if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { 3326 unsigned OpFlags = 0; 3327 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 3328 (PPCSubTarget.getTargetTriple().isMacOSX() && 3329 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 3330 (G->getGlobal()->isDeclaration() || 3331 G->getGlobal()->isWeakForLinker())) { 3332 // PC-relative references to external symbols should go through $stub, 3333 // unless we're building with the leopard linker or later, which 3334 // automatically synthesizes these stubs. 3335 OpFlags = PPCII::MO_DARWIN_STUB; 3336 } 3337 3338 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 3339 // every direct call is) turn it into a TargetGlobalAddress / 3340 // TargetExternalSymbol node so that legalize doesn't hack it. 3341 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 3342 Callee.getValueType(), 3343 0, OpFlags); 3344 needIndirectCall = false; 3345 } 3346 } 3347 3348 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3349 unsigned char OpFlags = 0; 3350 3351 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 3352 (PPCSubTarget.getTargetTriple().isMacOSX() && 3353 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { 3354 // PC-relative references to external symbols should go through $stub, 3355 // unless we're building with the leopard linker or later, which 3356 // automatically synthesizes these stubs. 3357 OpFlags = PPCII::MO_DARWIN_STUB; 3358 } 3359 3360 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 3361 OpFlags); 3362 needIndirectCall = false; 3363 } 3364 3365 if (needIndirectCall) { 3366 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 3367 // to do the call, we can't use PPCISD::CALL. 3368 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 3369 3370 if (isSVR4ABI && isPPC64) { 3371 // Function pointers in the 64-bit SVR4 ABI do not point to the function 3372 // entry point, but to the function descriptor (the function entry point 3373 // address is part of the function descriptor though). 3374 // The function descriptor is a three doubleword structure with the 3375 // following fields: function entry point, TOC base address and 3376 // environment pointer. 3377 // Thus for a call through a function pointer, the following actions need 3378 // to be performed: 3379 // 1. Save the TOC of the caller in the TOC save area of its stack 3380 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 3381 // 2. Load the address of the function entry point from the function 3382 // descriptor. 3383 // 3. Load the TOC of the callee from the function descriptor into r2. 3384 // 4. Load the environment pointer from the function descriptor into 3385 // r11. 3386 // 5. Branch to the function entry point address. 3387 // 6. On return of the callee, the TOC of the caller needs to be 3388 // restored (this is done in FinishCall()). 3389 // 3390 // All those operations are flagged together to ensure that no other 3391 // operations can be scheduled in between. E.g. without flagging the 3392 // operations together, a TOC access in the caller could be scheduled 3393 // between the load of the callee TOC and the branch to the callee, which 3394 // results in the TOC access going through the TOC of the callee instead 3395 // of going through the TOC of the caller, which leads to incorrect code. 3396 3397 // Load the address of the function entry point from the function 3398 // descriptor. 3399 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); 3400 SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps, 3401 InFlag.getNode() ? 3 : 2); 3402 Chain = LoadFuncPtr.getValue(1); 3403 InFlag = LoadFuncPtr.getValue(2); 3404 3405 // Load environment pointer into r11. 3406 // Offset of the environment pointer within the function descriptor. 3407 SDValue PtrOff = DAG.getIntPtrConstant(16); 3408 3409 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 3410 SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, 3411 InFlag); 3412 Chain = LoadEnvPtr.getValue(1); 3413 InFlag = LoadEnvPtr.getValue(2); 3414 3415 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 3416 InFlag); 3417 Chain = EnvVal.getValue(0); 3418 InFlag = EnvVal.getValue(1); 3419 3420 // Load TOC of the callee into r2. We are using a target-specific load 3421 // with r2 hard coded, because the result of a target-independent load 3422 // would never go directly into r2, since r2 is a reserved register (which 3423 // prevents the register allocator from allocating it), resulting in an 3424 // additional register being allocated and an unnecessary move instruction 3425 // being generated. 3426 VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3427 SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, 3428 Callee, InFlag); 3429 Chain = LoadTOCPtr.getValue(0); 3430 InFlag = LoadTOCPtr.getValue(1); 3431 3432 MTCTROps[0] = Chain; 3433 MTCTROps[1] = LoadFuncPtr; 3434 MTCTROps[2] = InFlag; 3435 } 3436 3437 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, 3438 2 + (InFlag.getNode() != 0)); 3439 InFlag = Chain.getValue(1); 3440 3441 NodeTys.clear(); 3442 NodeTys.push_back(MVT::Other); 3443 NodeTys.push_back(MVT::Glue); 3444 Ops.push_back(Chain); 3445 CallOpc = PPCISD::BCTRL; 3446 Callee.setNode(0); 3447 // Add use of X11 (holding environment pointer) 3448 if (isSVR4ABI && isPPC64) 3449 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 3450 // Add CTR register as callee so a bctr can be emitted later. 3451 if (isTailCall) 3452 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 3453 } 3454 3455 // If this is a direct call, pass the chain and the callee. 3456 if (Callee.getNode()) { 3457 Ops.push_back(Chain); 3458 Ops.push_back(Callee); 3459 } 3460 // If this is a tail call add stack pointer delta. 3461 if (isTailCall) 3462 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 3463 3464 // Add argument registers to the end of the list so that they are known live 3465 // into the call. 3466 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 3467 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 3468 RegsToPass[i].second.getValueType())); 3469 3470 return CallOpc; 3471 } 3472 3473 static 3474 bool isLocalCall(const SDValue &Callee) 3475 { 3476 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3477 return !G->getGlobal()->isDeclaration() && 3478 !G->getGlobal()->isWeakForLinker(); 3479 return false; 3480 } 3481 3482 SDValue 3483 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 3484 CallingConv::ID CallConv, bool isVarArg, 3485 const SmallVectorImpl<ISD::InputArg> &Ins, 3486 SDLoc dl, SelectionDAG &DAG, 3487 SmallVectorImpl<SDValue> &InVals) const { 3488 3489 SmallVector<CCValAssign, 16> RVLocs; 3490 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3491 getTargetMachine(), RVLocs, *DAG.getContext()); 3492 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 3493 3494 // Copy all of the result registers out of their specified physreg. 3495 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 3496 CCValAssign &VA = RVLocs[i]; 3497 assert(VA.isRegLoc() && "Can only return in registers!"); 3498 3499 SDValue Val = DAG.getCopyFromReg(Chain, dl, 3500 VA.getLocReg(), VA.getLocVT(), InFlag); 3501 Chain = Val.getValue(1); 3502 InFlag = Val.getValue(2); 3503 3504 switch (VA.getLocInfo()) { 3505 default: llvm_unreachable("Unknown loc info!"); 3506 case CCValAssign::Full: break; 3507 case CCValAssign::AExt: 3508 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3509 break; 3510 case CCValAssign::ZExt: 3511 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 3512 DAG.getValueType(VA.getValVT())); 3513 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3514 break; 3515 case CCValAssign::SExt: 3516 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 3517 DAG.getValueType(VA.getValVT())); 3518 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3519 break; 3520 } 3521 3522 InVals.push_back(Val); 3523 } 3524 3525 return Chain; 3526 } 3527 3528 SDValue 3529 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, 3530 bool isTailCall, bool isVarArg, 3531 SelectionDAG &DAG, 3532 SmallVector<std::pair<unsigned, SDValue>, 8> 3533 &RegsToPass, 3534 SDValue InFlag, SDValue Chain, 3535 SDValue &Callee, 3536 int SPDiff, unsigned NumBytes, 3537 const SmallVectorImpl<ISD::InputArg> &Ins, 3538 SmallVectorImpl<SDValue> &InVals) const { 3539 std::vector<EVT> NodeTys; 3540 SmallVector<SDValue, 8> Ops; 3541 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, 3542 isTailCall, RegsToPass, Ops, NodeTys, 3543 PPCSubTarget); 3544 3545 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 3546 if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) 3547 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 3548 3549 // When performing tail call optimization the callee pops its arguments off 3550 // the stack. Account for this here so these bytes can be pushed back on in 3551 // PPCFrameLowering::eliminateCallFramePseudoInstr. 3552 int BytesCalleePops = 3553 (CallConv == CallingConv::Fast && 3554 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 3555 3556 // Add a register mask operand representing the call-preserved registers. 3557 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 3558 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 3559 assert(Mask && "Missing call preserved mask for calling convention"); 3560 Ops.push_back(DAG.getRegisterMask(Mask)); 3561 3562 if (InFlag.getNode()) 3563 Ops.push_back(InFlag); 3564 3565 // Emit tail call. 3566 if (isTailCall) { 3567 assert(((Callee.getOpcode() == ISD::Register && 3568 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 3569 Callee.getOpcode() == ISD::TargetExternalSymbol || 3570 Callee.getOpcode() == ISD::TargetGlobalAddress || 3571 isa<ConstantSDNode>(Callee)) && 3572 "Expecting an global address, external symbol, absolute value or register"); 3573 3574 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); 3575 } 3576 3577 // Add a NOP immediately after the branch instruction when using the 64-bit 3578 // SVR4 ABI. At link time, if caller and callee are in a different module and 3579 // thus have a different TOC, the call will be replaced with a call to a stub 3580 // function which saves the current TOC, loads the TOC of the callee and 3581 // branches to the callee. The NOP will be replaced with a load instruction 3582 // which restores the TOC of the caller from the TOC save slot of the current 3583 // stack frame. If caller and callee belong to the same module (and have the 3584 // same TOC), the NOP will remain unchanged. 3585 3586 bool needsTOCRestore = false; 3587 if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { 3588 if (CallOpc == PPCISD::BCTRL) { 3589 // This is a call through a function pointer. 3590 // Restore the caller TOC from the save area into R2. 3591 // See PrepareCall() for more information about calls through function 3592 // pointers in the 64-bit SVR4 ABI. 3593 // We are using a target-specific load with r2 hard coded, because the 3594 // result of a target-independent load would never go directly into r2, 3595 // since r2 is a reserved register (which prevents the register allocator 3596 // from allocating it), resulting in an additional register being 3597 // allocated and an unnecessary move instruction being generated. 3598 needsTOCRestore = true; 3599 } else if ((CallOpc == PPCISD::CALL) && 3600 (!isLocalCall(Callee) || 3601 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3602 // Otherwise insert NOP for non-local calls. 3603 CallOpc = PPCISD::CALL_NOP; 3604 } 3605 } 3606 3607 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 3608 InFlag = Chain.getValue(1); 3609 3610 if (needsTOCRestore) { 3611 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3612 Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); 3613 InFlag = Chain.getValue(1); 3614 } 3615 3616 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3617 DAG.getIntPtrConstant(BytesCalleePops, true), 3618 InFlag, dl); 3619 if (!Ins.empty()) 3620 InFlag = Chain.getValue(1); 3621 3622 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 3623 Ins, dl, DAG, InVals); 3624 } 3625 3626 SDValue 3627 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 3628 SmallVectorImpl<SDValue> &InVals) const { 3629 SelectionDAG &DAG = CLI.DAG; 3630 SDLoc &dl = CLI.DL; 3631 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 3632 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 3633 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 3634 SDValue Chain = CLI.Chain; 3635 SDValue Callee = CLI.Callee; 3636 bool &isTailCall = CLI.IsTailCall; 3637 CallingConv::ID CallConv = CLI.CallConv; 3638 bool isVarArg = CLI.IsVarArg; 3639 3640 if (isTailCall) 3641 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 3642 Ins, DAG); 3643 3644 if (PPCSubTarget.isSVR4ABI()) { 3645 if (PPCSubTarget.isPPC64()) 3646 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 3647 isTailCall, Outs, OutVals, Ins, 3648 dl, DAG, InVals); 3649 else 3650 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 3651 isTailCall, Outs, OutVals, Ins, 3652 dl, DAG, InVals); 3653 } 3654 3655 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 3656 isTailCall, Outs, OutVals, Ins, 3657 dl, DAG, InVals); 3658 } 3659 3660 SDValue 3661 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, 3662 CallingConv::ID CallConv, bool isVarArg, 3663 bool isTailCall, 3664 const SmallVectorImpl<ISD::OutputArg> &Outs, 3665 const SmallVectorImpl<SDValue> &OutVals, 3666 const SmallVectorImpl<ISD::InputArg> &Ins, 3667 SDLoc dl, SelectionDAG &DAG, 3668 SmallVectorImpl<SDValue> &InVals) const { 3669 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 3670 // of the 32-bit SVR4 ABI stack frame layout. 3671 3672 assert((CallConv == CallingConv::C || 3673 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 3674 3675 unsigned PtrByteSize = 4; 3676 3677 MachineFunction &MF = DAG.getMachineFunction(); 3678 3679 // Mark this function as potentially containing a function that contains a 3680 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3681 // and restoring the callers stack pointer in this functions epilog. This is 3682 // done because by tail calling the called function might overwrite the value 3683 // in this function's (MF) stack pointer stack slot 0(SP). 3684 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3685 CallConv == CallingConv::Fast) 3686 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3687 3688 // Count how many bytes are to be pushed on the stack, including the linkage 3689 // area, parameter list area and the part of the local variable space which 3690 // contains copies of aggregates which are passed by value. 3691 3692 // Assign locations to all of the outgoing arguments. 3693 SmallVector<CCValAssign, 16> ArgLocs; 3694 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3695 getTargetMachine(), ArgLocs, *DAG.getContext()); 3696 3697 // Reserve space for the linkage area on the stack. 3698 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 3699 3700 if (isVarArg) { 3701 // Handle fixed and variable vector arguments differently. 3702 // Fixed vector arguments go into registers as long as registers are 3703 // available. Variable vector arguments always go into memory. 3704 unsigned NumArgs = Outs.size(); 3705 3706 for (unsigned i = 0; i != NumArgs; ++i) { 3707 MVT ArgVT = Outs[i].VT; 3708 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3709 bool Result; 3710 3711 if (Outs[i].IsFixed) { 3712 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 3713 CCInfo); 3714 } else { 3715 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 3716 ArgFlags, CCInfo); 3717 } 3718 3719 if (Result) { 3720 #ifndef NDEBUG 3721 errs() << "Call operand #" << i << " has unhandled type " 3722 << EVT(ArgVT).getEVTString() << "\n"; 3723 #endif 3724 llvm_unreachable(0); 3725 } 3726 } 3727 } else { 3728 // All arguments are treated the same. 3729 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 3730 } 3731 3732 // Assign locations to all of the outgoing aggregate by value arguments. 3733 SmallVector<CCValAssign, 16> ByValArgLocs; 3734 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3735 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 3736 3737 // Reserve stack space for the allocations in CCInfo. 3738 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 3739 3740 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 3741 3742 // Size of the linkage area, parameter list area and the part of the local 3743 // space variable where copies of aggregates which are passed by value are 3744 // stored. 3745 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 3746 3747 // Calculate by how many bytes the stack has to be adjusted in case of tail 3748 // call optimization. 3749 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 3750 3751 // Adjust the stack pointer for the new arguments... 3752 // These operations are automatically eliminated by the prolog/epilog pass 3753 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 3754 dl); 3755 SDValue CallSeqStart = Chain; 3756 3757 // Load the return address and frame pointer so it can be moved somewhere else 3758 // later. 3759 SDValue LROp, FPOp; 3760 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 3761 dl); 3762 3763 // Set up a copy of the stack pointer for use loading and storing any 3764 // arguments that may not fit in the registers available for argument 3765 // passing. 3766 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3767 3768 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3769 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 3770 SmallVector<SDValue, 8> MemOpChains; 3771 3772 bool seenFloatArg = false; 3773 // Walk the register/memloc assignments, inserting copies/loads. 3774 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 3775 i != e; 3776 ++i) { 3777 CCValAssign &VA = ArgLocs[i]; 3778 SDValue Arg = OutVals[i]; 3779 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3780 3781 if (Flags.isByVal()) { 3782 // Argument is an aggregate which is passed by value, thus we need to 3783 // create a copy of it in the local variable space of the current stack 3784 // frame (which is the stack frame of the caller) and pass the address of 3785 // this copy to the callee. 3786 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 3787 CCValAssign &ByValVA = ByValArgLocs[j++]; 3788 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 3789 3790 // Memory reserved in the local variable space of the callers stack frame. 3791 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 3792 3793 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3794 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3795 3796 // Create a copy of the argument in the local area of the current 3797 // stack frame. 3798 SDValue MemcpyCall = 3799 CreateCopyOfByValArgument(Arg, PtrOff, 3800 CallSeqStart.getNode()->getOperand(0), 3801 Flags, DAG, dl); 3802 3803 // This must go outside the CALLSEQ_START..END. 3804 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3805 CallSeqStart.getNode()->getOperand(1), 3806 SDLoc(MemcpyCall)); 3807 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3808 NewCallSeqStart.getNode()); 3809 Chain = CallSeqStart = NewCallSeqStart; 3810 3811 // Pass the address of the aggregate copy on the stack either in a 3812 // physical register or in the parameter list area of the current stack 3813 // frame to the callee. 3814 Arg = PtrOff; 3815 } 3816 3817 if (VA.isRegLoc()) { 3818 if (Arg.getValueType() == MVT::i1) 3819 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); 3820 3821 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 3822 // Put argument in a physical register. 3823 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 3824 } else { 3825 // Put argument in the parameter list area of the current stack frame. 3826 assert(VA.isMemLoc()); 3827 unsigned LocMemOffset = VA.getLocMemOffset(); 3828 3829 if (!isTailCall) { 3830 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3831 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3832 3833 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3834 MachinePointerInfo(), 3835 false, false, 0)); 3836 } else { 3837 // Calculate and remember argument location. 3838 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 3839 TailCallArguments); 3840 } 3841 } 3842 } 3843 3844 if (!MemOpChains.empty()) 3845 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3846 &MemOpChains[0], MemOpChains.size()); 3847 3848 // Build a sequence of copy-to-reg nodes chained together with token chain 3849 // and flag operands which copy the outgoing args into the appropriate regs. 3850 SDValue InFlag; 3851 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 3852 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 3853 RegsToPass[i].second, InFlag); 3854 InFlag = Chain.getValue(1); 3855 } 3856 3857 // Set CR bit 6 to true if this is a vararg call with floating args passed in 3858 // registers. 3859 if (isVarArg) { 3860 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3861 SDValue Ops[] = { Chain, InFlag }; 3862 3863 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 3864 dl, VTs, Ops, InFlag.getNode() ? 2 : 1); 3865 3866 InFlag = Chain.getValue(1); 3867 } 3868 3869 if (isTailCall) 3870 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 3871 false, TailCallArguments); 3872 3873 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 3874 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 3875 Ins, InVals); 3876 } 3877 3878 // Copy an argument into memory, being careful to do this outside the 3879 // call sequence for the call to which the argument belongs. 3880 SDValue 3881 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 3882 SDValue CallSeqStart, 3883 ISD::ArgFlagsTy Flags, 3884 SelectionDAG &DAG, 3885 SDLoc dl) const { 3886 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 3887 CallSeqStart.getNode()->getOperand(0), 3888 Flags, DAG, dl); 3889 // The MEMCPY must go outside the CALLSEQ_START..END. 3890 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3891 CallSeqStart.getNode()->getOperand(1), 3892 SDLoc(MemcpyCall)); 3893 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3894 NewCallSeqStart.getNode()); 3895 return NewCallSeqStart; 3896 } 3897 3898 SDValue 3899 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, 3900 CallingConv::ID CallConv, bool isVarArg, 3901 bool isTailCall, 3902 const SmallVectorImpl<ISD::OutputArg> &Outs, 3903 const SmallVectorImpl<SDValue> &OutVals, 3904 const SmallVectorImpl<ISD::InputArg> &Ins, 3905 SDLoc dl, SelectionDAG &DAG, 3906 SmallVectorImpl<SDValue> &InVals) const { 3907 3908 unsigned NumOps = Outs.size(); 3909 3910 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3911 unsigned PtrByteSize = 8; 3912 3913 MachineFunction &MF = DAG.getMachineFunction(); 3914 3915 // Mark this function as potentially containing a function that contains a 3916 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3917 // and restoring the callers stack pointer in this functions epilog. This is 3918 // done because by tail calling the called function might overwrite the value 3919 // in this function's (MF) stack pointer stack slot 0(SP). 3920 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3921 CallConv == CallingConv::Fast) 3922 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3923 3924 unsigned nAltivecParamsAtEnd = 0; 3925 3926 // Count how many bytes are to be pushed on the stack, including the linkage 3927 // area, and parameter passing area. We start with at least 48 bytes, which 3928 // is reserved space for [SP][CR][LR][3 x unused]. 3929 // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result 3930 // of this call. 3931 unsigned NumBytes = 3932 CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv, 3933 Outs, OutVals, nAltivecParamsAtEnd); 3934 3935 // Calculate by how many bytes the stack has to be adjusted in case of tail 3936 // call optimization. 3937 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 3938 3939 // To protect arguments on the stack from being clobbered in a tail call, 3940 // force all the loads to happen before doing any other lowering. 3941 if (isTailCall) 3942 Chain = DAG.getStackArgumentTokenFactor(Chain); 3943 3944 // Adjust the stack pointer for the new arguments... 3945 // These operations are automatically eliminated by the prolog/epilog pass 3946 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 3947 dl); 3948 SDValue CallSeqStart = Chain; 3949 3950 // Load the return address and frame pointer so it can be move somewhere else 3951 // later. 3952 SDValue LROp, FPOp; 3953 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 3954 dl); 3955 3956 // Set up a copy of the stack pointer for use loading and storing any 3957 // arguments that may not fit in the registers available for argument 3958 // passing. 3959 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3960 3961 // Figure out which arguments are going to go in registers, and which in 3962 // memory. Also, if this is a vararg function, floating point operations 3963 // must be stored to our stack, and loaded into integer regs as well, if 3964 // any integer regs are available for argument passing. 3965 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); 3966 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3967 3968 static const uint16_t GPR[] = { 3969 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3970 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3971 }; 3972 static const uint16_t *FPR = GetFPR(); 3973 3974 static const uint16_t VR[] = { 3975 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3976 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3977 }; 3978 const unsigned NumGPRs = array_lengthof(GPR); 3979 const unsigned NumFPRs = 13; 3980 const unsigned NumVRs = array_lengthof(VR); 3981 3982 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3983 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 3984 3985 SmallVector<SDValue, 8> MemOpChains; 3986 for (unsigned i = 0; i != NumOps; ++i) { 3987 SDValue Arg = OutVals[i]; 3988 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3989 3990 // PtrOff will be used to store the current argument to the stack if a 3991 // register cannot be found for it. 3992 SDValue PtrOff; 3993 3994 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 3995 3996 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 3997 3998 // Promote integers to 64-bit values. 3999 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 4000 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4001 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4002 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4003 } 4004 4005 // FIXME memcpy is used way more than necessary. Correctness first. 4006 // Note: "by value" is code for passing a structure by value, not 4007 // basic types. 4008 if (Flags.isByVal()) { 4009 // Note: Size includes alignment padding, so 4010 // struct x { short a; char b; } 4011 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 4012 // These are the proper values we need for right-justifying the 4013 // aggregate in a parameter register. 4014 unsigned Size = Flags.getByValSize(); 4015 4016 // An empty aggregate parameter takes up no storage and no 4017 // registers. 4018 if (Size == 0) 4019 continue; 4020 4021 unsigned BVAlign = Flags.getByValAlign(); 4022 if (BVAlign > 8) { 4023 if (BVAlign % PtrByteSize != 0) 4024 llvm_unreachable( 4025 "ByVal alignment is not a multiple of the pointer size"); 4026 4027 ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; 4028 } 4029 4030 // All aggregates smaller than 8 bytes must be passed right-justified. 4031 if (Size==1 || Size==2 || Size==4) { 4032 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 4033 if (GPR_idx != NumGPRs) { 4034 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4035 MachinePointerInfo(), VT, 4036 false, false, 0); 4037 MemOpChains.push_back(Load.getValue(1)); 4038 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4039 4040 ArgOffset += PtrByteSize; 4041 continue; 4042 } 4043 } 4044 4045 if (GPR_idx == NumGPRs && Size < 8) { 4046 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4047 PtrOff.getValueType()); 4048 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4049 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4050 CallSeqStart, 4051 Flags, DAG, dl); 4052 ArgOffset += PtrByteSize; 4053 continue; 4054 } 4055 // Copy entire object into memory. There are cases where gcc-generated 4056 // code assumes it is there, even if it could be put entirely into 4057 // registers. (This is not what the doc says.) 4058 4059 // FIXME: The above statement is likely due to a misunderstanding of the 4060 // documents. All arguments must be copied into the parameter area BY 4061 // THE CALLEE in the event that the callee takes the address of any 4062 // formal argument. That has not yet been implemented. However, it is 4063 // reasonable to use the stack area as a staging area for the register 4064 // load. 4065 4066 // Skip this for small aggregates, as we will use the same slot for a 4067 // right-justified copy, below. 4068 if (Size >= 8) 4069 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4070 CallSeqStart, 4071 Flags, DAG, dl); 4072 4073 // When a register is available, pass a small aggregate right-justified. 4074 if (Size < 8 && GPR_idx != NumGPRs) { 4075 // The easiest way to get this right-justified in a register 4076 // is to copy the structure into the rightmost portion of a 4077 // local variable slot, then load the whole slot into the 4078 // register. 4079 // FIXME: The memcpy seems to produce pretty awful code for 4080 // small aggregates, particularly for packed ones. 4081 // FIXME: It would be preferable to use the slot in the 4082 // parameter save area instead of a new local variable. 4083 SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); 4084 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4085 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4086 CallSeqStart, 4087 Flags, DAG, dl); 4088 4089 // Load the slot into the register. 4090 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, 4091 MachinePointerInfo(), 4092 false, false, false, 0); 4093 MemOpChains.push_back(Load.getValue(1)); 4094 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4095 4096 // Done with this argument. 4097 ArgOffset += PtrByteSize; 4098 continue; 4099 } 4100 4101 // For aggregates larger than PtrByteSize, copy the pieces of the 4102 // object that fit into registers from the parameter save area. 4103 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4104 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4105 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4106 if (GPR_idx != NumGPRs) { 4107 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4108 MachinePointerInfo(), 4109 false, false, false, 0); 4110 MemOpChains.push_back(Load.getValue(1)); 4111 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4112 ArgOffset += PtrByteSize; 4113 } else { 4114 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4115 break; 4116 } 4117 } 4118 continue; 4119 } 4120 4121 switch (Arg.getSimpleValueType().SimpleTy) { 4122 default: llvm_unreachable("Unexpected ValueType for argument!"); 4123 case MVT::i1: 4124 case MVT::i32: 4125 case MVT::i64: 4126 if (GPR_idx != NumGPRs) { 4127 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4128 } else { 4129 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4130 true, isTailCall, false, MemOpChains, 4131 TailCallArguments, dl); 4132 } 4133 ArgOffset += PtrByteSize; 4134 break; 4135 case MVT::f32: 4136 case MVT::f64: 4137 if (FPR_idx != NumFPRs) { 4138 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4139 4140 if (isVarArg) { 4141 // A single float or an aggregate containing only a single float 4142 // must be passed right-justified in the stack doubleword, and 4143 // in the GPR, if one is available. 4144 SDValue StoreOff; 4145 if (Arg.getSimpleValueType().SimpleTy == MVT::f32) { 4146 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4147 StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4148 } else 4149 StoreOff = PtrOff; 4150 4151 SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff, 4152 MachinePointerInfo(), false, false, 0); 4153 MemOpChains.push_back(Store); 4154 4155 // Float varargs are always shadowed in available integer registers 4156 if (GPR_idx != NumGPRs) { 4157 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4158 MachinePointerInfo(), false, false, 4159 false, 0); 4160 MemOpChains.push_back(Load.getValue(1)); 4161 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4162 } 4163 } else if (GPR_idx != NumGPRs) 4164 // If we have any FPRs remaining, we may also have GPRs remaining. 4165 ++GPR_idx; 4166 } else { 4167 // Single-precision floating-point values are mapped to the 4168 // second (rightmost) word of the stack doubleword. 4169 if (Arg.getValueType() == MVT::f32) { 4170 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4171 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4172 } 4173 4174 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4175 true, isTailCall, false, MemOpChains, 4176 TailCallArguments, dl); 4177 } 4178 ArgOffset += 8; 4179 break; 4180 case MVT::v4f32: 4181 case MVT::v4i32: 4182 case MVT::v8i16: 4183 case MVT::v16i8: 4184 case MVT::v2f64: 4185 if (isVarArg) { 4186 // These go aligned on the stack, or in the corresponding R registers 4187 // when within range. The Darwin PPC ABI doc claims they also go in 4188 // V registers; in fact gcc does this only for arguments that are 4189 // prototyped, not for those that match the ... We do it for all 4190 // arguments, seems to work. 4191 while (ArgOffset % 16 !=0) { 4192 ArgOffset += PtrByteSize; 4193 if (GPR_idx != NumGPRs) 4194 GPR_idx++; 4195 } 4196 // We could elide this store in the case where the object fits 4197 // entirely in R registers. Maybe later. 4198 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4199 DAG.getConstant(ArgOffset, PtrVT)); 4200 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4201 MachinePointerInfo(), false, false, 0); 4202 MemOpChains.push_back(Store); 4203 if (VR_idx != NumVRs) { 4204 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4205 MachinePointerInfo(), 4206 false, false, false, 0); 4207 MemOpChains.push_back(Load.getValue(1)); 4208 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 4209 } 4210 ArgOffset += 16; 4211 for (unsigned i=0; i<16; i+=PtrByteSize) { 4212 if (GPR_idx == NumGPRs) 4213 break; 4214 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4215 DAG.getConstant(i, PtrVT)); 4216 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4217 false, false, false, 0); 4218 MemOpChains.push_back(Load.getValue(1)); 4219 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4220 } 4221 break; 4222 } 4223 4224 // Non-varargs Altivec params generally go in registers, but have 4225 // stack space allocated at the end. 4226 if (VR_idx != NumVRs) { 4227 // Doesn't have GPR space allocated. 4228 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 4229 } else { 4230 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4231 true, isTailCall, true, MemOpChains, 4232 TailCallArguments, dl); 4233 ArgOffset += 16; 4234 } 4235 break; 4236 } 4237 } 4238 4239 if (!MemOpChains.empty()) 4240 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 4241 &MemOpChains[0], MemOpChains.size()); 4242 4243 // Check if this is an indirect call (MTCTR/BCTRL). 4244 // See PrepareCall() for more information about calls through function 4245 // pointers in the 64-bit SVR4 ABI. 4246 if (!isTailCall && 4247 !dyn_cast<GlobalAddressSDNode>(Callee) && 4248 !dyn_cast<ExternalSymbolSDNode>(Callee) && 4249 !isBLACompatibleAddress(Callee, DAG)) { 4250 // Load r2 into a virtual register and store it to the TOC save area. 4251 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 4252 // TOC save area offset. 4253 SDValue PtrOff = DAG.getIntPtrConstant(40); 4254 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4255 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), 4256 false, false, 0); 4257 // R12 must contain the address of an indirect callee. This does not 4258 // mean the MTCTR instruction must use R12; it's easier to model this 4259 // as an extra parameter, so do that. 4260 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 4261 } 4262 4263 // Build a sequence of copy-to-reg nodes chained together with token chain 4264 // and flag operands which copy the outgoing args into the appropriate regs. 4265 SDValue InFlag; 4266 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4267 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4268 RegsToPass[i].second, InFlag); 4269 InFlag = Chain.getValue(1); 4270 } 4271 4272 if (isTailCall) 4273 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, 4274 FPOp, true, TailCallArguments); 4275 4276 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 4277 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4278 Ins, InVals); 4279 } 4280 4281 SDValue 4282 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 4283 CallingConv::ID CallConv, bool isVarArg, 4284 bool isTailCall, 4285 const SmallVectorImpl<ISD::OutputArg> &Outs, 4286 const SmallVectorImpl<SDValue> &OutVals, 4287 const SmallVectorImpl<ISD::InputArg> &Ins, 4288 SDLoc dl, SelectionDAG &DAG, 4289 SmallVectorImpl<SDValue> &InVals) const { 4290 4291 unsigned NumOps = Outs.size(); 4292 4293 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4294 bool isPPC64 = PtrVT == MVT::i64; 4295 unsigned PtrByteSize = isPPC64 ? 8 : 4; 4296 4297 MachineFunction &MF = DAG.getMachineFunction(); 4298 4299 // Mark this function as potentially containing a function that contains a 4300 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4301 // and restoring the callers stack pointer in this functions epilog. This is 4302 // done because by tail calling the called function might overwrite the value 4303 // in this function's (MF) stack pointer stack slot 0(SP). 4304 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4305 CallConv == CallingConv::Fast) 4306 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4307 4308 unsigned nAltivecParamsAtEnd = 0; 4309 4310 // Count how many bytes are to be pushed on the stack, including the linkage 4311 // area, and parameter passing area. We start with 24/48 bytes, which is 4312 // prereserved space for [SP][CR][LR][3 x unused]. 4313 unsigned NumBytes = 4314 CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, 4315 Outs, OutVals, 4316 nAltivecParamsAtEnd); 4317 4318 // Calculate by how many bytes the stack has to be adjusted in case of tail 4319 // call optimization. 4320 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4321 4322 // To protect arguments on the stack from being clobbered in a tail call, 4323 // force all the loads to happen before doing any other lowering. 4324 if (isTailCall) 4325 Chain = DAG.getStackArgumentTokenFactor(Chain); 4326 4327 // Adjust the stack pointer for the new arguments... 4328 // These operations are automatically eliminated by the prolog/epilog pass 4329 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4330 dl); 4331 SDValue CallSeqStart = Chain; 4332 4333 // Load the return address and frame pointer so it can be move somewhere else 4334 // later. 4335 SDValue LROp, FPOp; 4336 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4337 dl); 4338 4339 // Set up a copy of the stack pointer for use loading and storing any 4340 // arguments that may not fit in the registers available for argument 4341 // passing. 4342 SDValue StackPtr; 4343 if (isPPC64) 4344 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4345 else 4346 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4347 4348 // Figure out which arguments are going to go in registers, and which in 4349 // memory. Also, if this is a vararg function, floating point operations 4350 // must be stored to our stack, and loaded into integer regs as well, if 4351 // any integer regs are available for argument passing. 4352 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 4353 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4354 4355 static const uint16_t GPR_32[] = { // 32-bit registers. 4356 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 4357 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 4358 }; 4359 static const uint16_t GPR_64[] = { // 64-bit registers. 4360 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4361 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4362 }; 4363 static const uint16_t *FPR = GetFPR(); 4364 4365 static const uint16_t VR[] = { 4366 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4367 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4368 }; 4369 const unsigned NumGPRs = array_lengthof(GPR_32); 4370 const unsigned NumFPRs = 13; 4371 const unsigned NumVRs = array_lengthof(VR); 4372 4373 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 4374 4375 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4376 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4377 4378 SmallVector<SDValue, 8> MemOpChains; 4379 for (unsigned i = 0; i != NumOps; ++i) { 4380 SDValue Arg = OutVals[i]; 4381 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4382 4383 // PtrOff will be used to store the current argument to the stack if a 4384 // register cannot be found for it. 4385 SDValue PtrOff; 4386 4387 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4388 4389 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4390 4391 // On PPC64, promote integers to 64-bit values. 4392 if (isPPC64 && Arg.getValueType() == MVT::i32) { 4393 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4394 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4395 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4396 } 4397 4398 // FIXME memcpy is used way more than necessary. Correctness first. 4399 // Note: "by value" is code for passing a structure by value, not 4400 // basic types. 4401 if (Flags.isByVal()) { 4402 unsigned Size = Flags.getByValSize(); 4403 // Very small objects are passed right-justified. Everything else is 4404 // passed left-justified. 4405 if (Size==1 || Size==2) { 4406 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 4407 if (GPR_idx != NumGPRs) { 4408 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4409 MachinePointerInfo(), VT, 4410 false, false, 0); 4411 MemOpChains.push_back(Load.getValue(1)); 4412 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4413 4414 ArgOffset += PtrByteSize; 4415 } else { 4416 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4417 PtrOff.getValueType()); 4418 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4419 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4420 CallSeqStart, 4421 Flags, DAG, dl); 4422 ArgOffset += PtrByteSize; 4423 } 4424 continue; 4425 } 4426 // Copy entire object into memory. There are cases where gcc-generated 4427 // code assumes it is there, even if it could be put entirely into 4428 // registers. (This is not what the doc says.) 4429 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4430 CallSeqStart, 4431 Flags, DAG, dl); 4432 4433 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 4434 // copy the pieces of the object that fit into registers from the 4435 // parameter save area. 4436 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4437 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4438 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4439 if (GPR_idx != NumGPRs) { 4440 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4441 MachinePointerInfo(), 4442 false, false, false, 0); 4443 MemOpChains.push_back(Load.getValue(1)); 4444 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4445 ArgOffset += PtrByteSize; 4446 } else { 4447 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4448 break; 4449 } 4450 } 4451 continue; 4452 } 4453 4454 switch (Arg.getSimpleValueType().SimpleTy) { 4455 default: llvm_unreachable("Unexpected ValueType for argument!"); 4456 case MVT::i1: 4457 case MVT::i32: 4458 case MVT::i64: 4459 if (GPR_idx != NumGPRs) { 4460 if (Arg.getValueType() == MVT::i1) 4461 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 4462 4463 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4464 } else { 4465 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4466 isPPC64, isTailCall, false, MemOpChains, 4467 TailCallArguments, dl); 4468 } 4469 ArgOffset += PtrByteSize; 4470 break; 4471 case MVT::f32: 4472 case MVT::f64: 4473 if (FPR_idx != NumFPRs) { 4474 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4475 4476 if (isVarArg) { 4477 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4478 MachinePointerInfo(), false, false, 0); 4479 MemOpChains.push_back(Store); 4480 4481 // Float varargs are always shadowed in available integer registers 4482 if (GPR_idx != NumGPRs) { 4483 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4484 MachinePointerInfo(), false, false, 4485 false, 0); 4486 MemOpChains.push_back(Load.getValue(1)); 4487 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4488 } 4489 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 4490 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4491 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4492 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4493 MachinePointerInfo(), 4494 false, false, false, 0); 4495 MemOpChains.push_back(Load.getValue(1)); 4496 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4497 } 4498 } else { 4499 // If we have any FPRs remaining, we may also have GPRs remaining. 4500 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 4501 // GPRs. 4502 if (GPR_idx != NumGPRs) 4503 ++GPR_idx; 4504 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 4505 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 4506 ++GPR_idx; 4507 } 4508 } else 4509 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4510 isPPC64, isTailCall, false, MemOpChains, 4511 TailCallArguments, dl); 4512 if (isPPC64) 4513 ArgOffset += 8; 4514 else 4515 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 4516 break; 4517 case MVT::v4f32: 4518 case MVT::v4i32: 4519 case MVT::v8i16: 4520 case MVT::v16i8: 4521 if (isVarArg) { 4522 // These go aligned on the stack, or in the corresponding R registers 4523 // when within range. The Darwin PPC ABI doc claims they also go in 4524 // V registers; in fact gcc does this only for arguments that are 4525 // prototyped, not for those that match the ... We do it for all 4526 // arguments, seems to work. 4527 while (ArgOffset % 16 !=0) { 4528 ArgOffset += PtrByteSize; 4529 if (GPR_idx != NumGPRs) 4530 GPR_idx++; 4531 } 4532 // We could elide this store in the case where the object fits 4533 // entirely in R registers. Maybe later. 4534 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4535 DAG.getConstant(ArgOffset, PtrVT)); 4536 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4537 MachinePointerInfo(), false, false, 0); 4538 MemOpChains.push_back(Store); 4539 if (VR_idx != NumVRs) { 4540 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4541 MachinePointerInfo(), 4542 false, false, false, 0); 4543 MemOpChains.push_back(Load.getValue(1)); 4544 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 4545 } 4546 ArgOffset += 16; 4547 for (unsigned i=0; i<16; i+=PtrByteSize) { 4548 if (GPR_idx == NumGPRs) 4549 break; 4550 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4551 DAG.getConstant(i, PtrVT)); 4552 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4553 false, false, false, 0); 4554 MemOpChains.push_back(Load.getValue(1)); 4555 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4556 } 4557 break; 4558 } 4559 4560 // Non-varargs Altivec params generally go in registers, but have 4561 // stack space allocated at the end. 4562 if (VR_idx != NumVRs) { 4563 // Doesn't have GPR space allocated. 4564 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 4565 } else if (nAltivecParamsAtEnd==0) { 4566 // We are emitting Altivec params in order. 4567 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4568 isPPC64, isTailCall, true, MemOpChains, 4569 TailCallArguments, dl); 4570 ArgOffset += 16; 4571 } 4572 break; 4573 } 4574 } 4575 // If all Altivec parameters fit in registers, as they usually do, 4576 // they get stack space following the non-Altivec parameters. We 4577 // don't track this here because nobody below needs it. 4578 // If there are more Altivec parameters than fit in registers emit 4579 // the stores here. 4580 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 4581 unsigned j = 0; 4582 // Offset is aligned; skip 1st 12 params which go in V registers. 4583 ArgOffset = ((ArgOffset+15)/16)*16; 4584 ArgOffset += 12*16; 4585 for (unsigned i = 0; i != NumOps; ++i) { 4586 SDValue Arg = OutVals[i]; 4587 EVT ArgType = Outs[i].VT; 4588 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 4589 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 4590 if (++j > NumVRs) { 4591 SDValue PtrOff; 4592 // We are emitting Altivec params in order. 4593 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4594 isPPC64, isTailCall, true, MemOpChains, 4595 TailCallArguments, dl); 4596 ArgOffset += 16; 4597 } 4598 } 4599 } 4600 } 4601 4602 if (!MemOpChains.empty()) 4603 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 4604 &MemOpChains[0], MemOpChains.size()); 4605 4606 // On Darwin, R12 must contain the address of an indirect callee. This does 4607 // not mean the MTCTR instruction must use R12; it's easier to model this as 4608 // an extra parameter, so do that. 4609 if (!isTailCall && 4610 !dyn_cast<GlobalAddressSDNode>(Callee) && 4611 !dyn_cast<ExternalSymbolSDNode>(Callee) && 4612 !isBLACompatibleAddress(Callee, DAG)) 4613 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 4614 PPC::R12), Callee)); 4615 4616 // Build a sequence of copy-to-reg nodes chained together with token chain 4617 // and flag operands which copy the outgoing args into the appropriate regs. 4618 SDValue InFlag; 4619 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4620 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4621 RegsToPass[i].second, InFlag); 4622 InFlag = Chain.getValue(1); 4623 } 4624 4625 if (isTailCall) 4626 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 4627 FPOp, true, TailCallArguments); 4628 4629 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 4630 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4631 Ins, InVals); 4632 } 4633 4634 bool 4635 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 4636 MachineFunction &MF, bool isVarArg, 4637 const SmallVectorImpl<ISD::OutputArg> &Outs, 4638 LLVMContext &Context) const { 4639 SmallVector<CCValAssign, 16> RVLocs; 4640 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), 4641 RVLocs, Context); 4642 return CCInfo.CheckReturn(Outs, RetCC_PPC); 4643 } 4644 4645 SDValue 4646 PPCTargetLowering::LowerReturn(SDValue Chain, 4647 CallingConv::ID CallConv, bool isVarArg, 4648 const SmallVectorImpl<ISD::OutputArg> &Outs, 4649 const SmallVectorImpl<SDValue> &OutVals, 4650 SDLoc dl, SelectionDAG &DAG) const { 4651 4652 SmallVector<CCValAssign, 16> RVLocs; 4653 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4654 getTargetMachine(), RVLocs, *DAG.getContext()); 4655 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 4656 4657 SDValue Flag; 4658 SmallVector<SDValue, 4> RetOps(1, Chain); 4659 4660 // Copy the result values into the output registers. 4661 for (unsigned i = 0; i != RVLocs.size(); ++i) { 4662 CCValAssign &VA = RVLocs[i]; 4663 assert(VA.isRegLoc() && "Can only return in registers!"); 4664 4665 SDValue Arg = OutVals[i]; 4666 4667 switch (VA.getLocInfo()) { 4668 default: llvm_unreachable("Unknown loc info!"); 4669 case CCValAssign::Full: break; 4670 case CCValAssign::AExt: 4671 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 4672 break; 4673 case CCValAssign::ZExt: 4674 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 4675 break; 4676 case CCValAssign::SExt: 4677 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 4678 break; 4679 } 4680 4681 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 4682 Flag = Chain.getValue(1); 4683 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 4684 } 4685 4686 RetOps[0] = Chain; // Update chain. 4687 4688 // Add the flag if we have it. 4689 if (Flag.getNode()) 4690 RetOps.push_back(Flag); 4691 4692 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, 4693 &RetOps[0], RetOps.size()); 4694 } 4695 4696 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 4697 const PPCSubtarget &Subtarget) const { 4698 // When we pop the dynamic allocation we need to restore the SP link. 4699 SDLoc dl(Op); 4700 4701 // Get the corect type for pointers. 4702 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4703 4704 // Construct the stack pointer operand. 4705 bool isPPC64 = Subtarget.isPPC64(); 4706 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 4707 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 4708 4709 // Get the operands for the STACKRESTORE. 4710 SDValue Chain = Op.getOperand(0); 4711 SDValue SaveSP = Op.getOperand(1); 4712 4713 // Load the old link SP. 4714 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 4715 MachinePointerInfo(), 4716 false, false, false, 0); 4717 4718 // Restore the stack pointer. 4719 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 4720 4721 // Store the old link SP. 4722 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 4723 false, false, 0); 4724 } 4725 4726 4727 4728 SDValue 4729 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 4730 MachineFunction &MF = DAG.getMachineFunction(); 4731 bool isPPC64 = PPCSubTarget.isPPC64(); 4732 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 4733 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4734 4735 // Get current frame pointer save index. The users of this index will be 4736 // primarily DYNALLOC instructions. 4737 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 4738 int RASI = FI->getReturnAddrSaveIndex(); 4739 4740 // If the frame pointer save index hasn't been defined yet. 4741 if (!RASI) { 4742 // Find out what the fix offset of the frame pointer save area. 4743 int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); 4744 // Allocate the frame index for frame pointer save area. 4745 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); 4746 // Save the result. 4747 FI->setReturnAddrSaveIndex(RASI); 4748 } 4749 return DAG.getFrameIndex(RASI, PtrVT); 4750 } 4751 4752 SDValue 4753 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 4754 MachineFunction &MF = DAG.getMachineFunction(); 4755 bool isPPC64 = PPCSubTarget.isPPC64(); 4756 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 4757 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4758 4759 // Get current frame pointer save index. The users of this index will be 4760 // primarily DYNALLOC instructions. 4761 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 4762 int FPSI = FI->getFramePointerSaveIndex(); 4763 4764 // If the frame pointer save index hasn't been defined yet. 4765 if (!FPSI) { 4766 // Find out what the fix offset of the frame pointer save area. 4767 int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, 4768 isDarwinABI); 4769 4770 // Allocate the frame index for frame pointer save area. 4771 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 4772 // Save the result. 4773 FI->setFramePointerSaveIndex(FPSI); 4774 } 4775 return DAG.getFrameIndex(FPSI, PtrVT); 4776 } 4777 4778 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 4779 SelectionDAG &DAG, 4780 const PPCSubtarget &Subtarget) const { 4781 // Get the inputs. 4782 SDValue Chain = Op.getOperand(0); 4783 SDValue Size = Op.getOperand(1); 4784 SDLoc dl(Op); 4785 4786 // Get the corect type for pointers. 4787 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4788 // Negate the size. 4789 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 4790 DAG.getConstant(0, PtrVT), Size); 4791 // Construct a node for the frame pointer save index. 4792 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 4793 // Build a DYNALLOC node. 4794 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 4795 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 4796 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); 4797 } 4798 4799 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 4800 SelectionDAG &DAG) const { 4801 SDLoc DL(Op); 4802 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 4803 DAG.getVTList(MVT::i32, MVT::Other), 4804 Op.getOperand(0), Op.getOperand(1)); 4805 } 4806 4807 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 4808 SelectionDAG &DAG) const { 4809 SDLoc DL(Op); 4810 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 4811 Op.getOperand(0), Op.getOperand(1)); 4812 } 4813 4814 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 4815 assert(Op.getValueType() == MVT::i1 && 4816 "Custom lowering only for i1 loads"); 4817 4818 // First, load 8 bits into 32 bits, then truncate to 1 bit. 4819 4820 SDLoc dl(Op); 4821 LoadSDNode *LD = cast<LoadSDNode>(Op); 4822 4823 SDValue Chain = LD->getChain(); 4824 SDValue BasePtr = LD->getBasePtr(); 4825 MachineMemOperand *MMO = LD->getMemOperand(); 4826 4827 SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, 4828 BasePtr, MVT::i8, MMO); 4829 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 4830 4831 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 4832 return DAG.getMergeValues(Ops, 2, dl); 4833 } 4834 4835 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 4836 assert(Op.getOperand(1).getValueType() == MVT::i1 && 4837 "Custom lowering only for i1 stores"); 4838 4839 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 4840 4841 SDLoc dl(Op); 4842 StoreSDNode *ST = cast<StoreSDNode>(Op); 4843 4844 SDValue Chain = ST->getChain(); 4845 SDValue BasePtr = ST->getBasePtr(); 4846 SDValue Value = ST->getValue(); 4847 MachineMemOperand *MMO = ST->getMemOperand(); 4848 4849 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); 4850 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 4851 } 4852 4853 // FIXME: Remove this once the ANDI glue bug is fixed: 4854 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 4855 assert(Op.getValueType() == MVT::i1 && 4856 "Custom lowering only for i1 results"); 4857 4858 SDLoc DL(Op); 4859 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 4860 Op.getOperand(0)); 4861 } 4862 4863 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 4864 /// possible. 4865 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 4866 // Not FP? Not a fsel. 4867 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 4868 !Op.getOperand(2).getValueType().isFloatingPoint()) 4869 return Op; 4870 4871 // We might be able to do better than this under some circumstances, but in 4872 // general, fsel-based lowering of select is a finite-math-only optimization. 4873 // For more information, see section F.3 of the 2.06 ISA specification. 4874 if (!DAG.getTarget().Options.NoInfsFPMath || 4875 !DAG.getTarget().Options.NoNaNsFPMath) 4876 return Op; 4877 4878 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 4879 4880 EVT ResVT = Op.getValueType(); 4881 EVT CmpVT = Op.getOperand(0).getValueType(); 4882 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 4883 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 4884 SDLoc dl(Op); 4885 4886 // If the RHS of the comparison is a 0.0, we don't need to do the 4887 // subtraction at all. 4888 SDValue Sel1; 4889 if (isFloatingPointZero(RHS)) 4890 switch (CC) { 4891 default: break; // SETUO etc aren't handled by fsel. 4892 case ISD::SETNE: 4893 std::swap(TV, FV); 4894 case ISD::SETEQ: 4895 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4896 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4897 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 4898 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 4899 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 4900 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4901 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 4902 case ISD::SETULT: 4903 case ISD::SETLT: 4904 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 4905 case ISD::SETOGE: 4906 case ISD::SETGE: 4907 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4908 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4909 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 4910 case ISD::SETUGT: 4911 case ISD::SETGT: 4912 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 4913 case ISD::SETOLE: 4914 case ISD::SETLE: 4915 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4916 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4917 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4918 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 4919 } 4920 4921 SDValue Cmp; 4922 switch (CC) { 4923 default: break; // SETUO etc aren't handled by fsel. 4924 case ISD::SETNE: 4925 std::swap(TV, FV); 4926 case ISD::SETEQ: 4927 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4928 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4929 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4930 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4931 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 4932 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 4933 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4934 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 4935 case ISD::SETULT: 4936 case ISD::SETLT: 4937 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4938 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4939 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4940 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 4941 case ISD::SETOGE: 4942 case ISD::SETGE: 4943 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4944 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4945 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4946 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4947 case ISD::SETUGT: 4948 case ISD::SETGT: 4949 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 4950 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4951 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4952 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 4953 case ISD::SETOLE: 4954 case ISD::SETLE: 4955 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 4956 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4957 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4958 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4959 } 4960 return Op; 4961 } 4962 4963 // FIXME: Split this code up when LegalizeDAGTypes lands. 4964 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 4965 SDLoc dl) const { 4966 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 4967 SDValue Src = Op.getOperand(0); 4968 if (Src.getValueType() == MVT::f32) 4969 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 4970 4971 SDValue Tmp; 4972 switch (Op.getSimpleValueType().SimpleTy) { 4973 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 4974 case MVT::i32: 4975 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : 4976 (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ : 4977 PPCISD::FCTIDZ), 4978 dl, MVT::f64, Src); 4979 break; 4980 case MVT::i64: 4981 assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) && 4982 "i64 FP_TO_UINT is supported only with FPCVT"); 4983 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 4984 PPCISD::FCTIDUZ, 4985 dl, MVT::f64, Src); 4986 break; 4987 } 4988 4989 // Convert the FP value to an int value through memory. 4990 bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() && 4991 (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()); 4992 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 4993 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 4994 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); 4995 4996 // Emit a store to the stack slot. 4997 SDValue Chain; 4998 if (i32Stack) { 4999 MachineFunction &MF = DAG.getMachineFunction(); 5000 MachineMemOperand *MMO = 5001 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 5002 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 5003 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 5004 DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), 5005 MVT::i32, MMO); 5006 } else 5007 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 5008 MPI, false, false, 0); 5009 5010 // Result is a load from the stack slot. If loading 4 bytes, make sure to 5011 // add in a bias. 5012 if (Op.getValueType() == MVT::i32 && !i32Stack) { 5013 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 5014 DAG.getConstant(4, FIPtr.getValueType())); 5015 MPI = MachinePointerInfo(); 5016 } 5017 5018 return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI, 5019 false, false, false, 0); 5020 } 5021 5022 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 5023 SelectionDAG &DAG) const { 5024 SDLoc dl(Op); 5025 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 5026 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 5027 return SDValue(); 5028 5029 if (Op.getOperand(0).getValueType() == MVT::i1) 5030 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 5031 DAG.getConstantFP(1.0, Op.getValueType()), 5032 DAG.getConstantFP(0.0, Op.getValueType())); 5033 5034 assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && 5035 "UINT_TO_FP is supported only with FPCVT"); 5036 5037 // If we have FCFIDS, then use it when converting to single-precision. 5038 // Otherwise, convert to double-precision and then round. 5039 unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 5040 (Op.getOpcode() == ISD::UINT_TO_FP ? 5041 PPCISD::FCFIDUS : PPCISD::FCFIDS) : 5042 (Op.getOpcode() == ISD::UINT_TO_FP ? 5043 PPCISD::FCFIDU : PPCISD::FCFID); 5044 MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 5045 MVT::f32 : MVT::f64; 5046 5047 if (Op.getOperand(0).getValueType() == MVT::i64) { 5048 SDValue SINT = Op.getOperand(0); 5049 // When converting to single-precision, we actually need to convert 5050 // to double-precision first and then round to single-precision. 5051 // To avoid double-rounding effects during that operation, we have 5052 // to prepare the input operand. Bits that might be truncated when 5053 // converting to double-precision are replaced by a bit that won't 5054 // be lost at this stage, but is below the single-precision rounding 5055 // position. 5056 // 5057 // However, if -enable-unsafe-fp-math is in effect, accept double 5058 // rounding to avoid the extra overhead. 5059 if (Op.getValueType() == MVT::f32 && 5060 !PPCSubTarget.hasFPCVT() && 5061 !DAG.getTarget().Options.UnsafeFPMath) { 5062 5063 // Twiddle input to make sure the low 11 bits are zero. (If this 5064 // is the case, we are guaranteed the value will fit into the 53 bit 5065 // mantissa of an IEEE double-precision value without rounding.) 5066 // If any of those low 11 bits were not zero originally, make sure 5067 // bit 12 (value 2048) is set instead, so that the final rounding 5068 // to single-precision gets the correct result. 5069 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 5070 SINT, DAG.getConstant(2047, MVT::i64)); 5071 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 5072 Round, DAG.getConstant(2047, MVT::i64)); 5073 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 5074 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 5075 Round, DAG.getConstant(-2048, MVT::i64)); 5076 5077 // However, we cannot use that value unconditionally: if the magnitude 5078 // of the input value is small, the bit-twiddling we did above might 5079 // end up visibly changing the output. Fortunately, in that case, we 5080 // don't need to twiddle bits since the original input will convert 5081 // exactly to double-precision floating-point already. Therefore, 5082 // construct a conditional to use the original value if the top 11 5083 // bits are all sign-bit copies, and use the rounded value computed 5084 // above otherwise. 5085 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 5086 SINT, DAG.getConstant(53, MVT::i32)); 5087 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 5088 Cond, DAG.getConstant(1, MVT::i64)); 5089 Cond = DAG.getSetCC(dl, MVT::i32, 5090 Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); 5091 5092 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 5093 } 5094 5095 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 5096 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 5097 5098 if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) 5099 FP = DAG.getNode(ISD::FP_ROUND, dl, 5100 MVT::f32, FP, DAG.getIntPtrConstant(0)); 5101 return FP; 5102 } 5103 5104 assert(Op.getOperand(0).getValueType() == MVT::i32 && 5105 "Unhandled INT_TO_FP type in custom expander!"); 5106 // Since we only generate this in 64-bit mode, we can take advantage of 5107 // 64-bit registers. In particular, sign extend the input value into the 5108 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 5109 // then lfd it and fcfid it. 5110 MachineFunction &MF = DAG.getMachineFunction(); 5111 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 5112 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5113 5114 SDValue Ld; 5115 if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) { 5116 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 5117 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5118 5119 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 5120 MachinePointerInfo::getFixedStack(FrameIdx), 5121 false, false, 0); 5122 5123 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 5124 "Expected an i32 store"); 5125 MachineMemOperand *MMO = 5126 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), 5127 MachineMemOperand::MOLoad, 4, 4); 5128 SDValue Ops[] = { Store, FIdx }; 5129 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 5130 PPCISD::LFIWZX : PPCISD::LFIWAX, 5131 dl, DAG.getVTList(MVT::f64, MVT::Other), 5132 Ops, 2, MVT::i32, MMO); 5133 } else { 5134 assert(PPCSubTarget.isPPC64() && 5135 "i32->FP without LFIWAX supported only on PPC64"); 5136 5137 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 5138 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5139 5140 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 5141 Op.getOperand(0)); 5142 5143 // STD the extended value into the stack slot. 5144 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, 5145 MachinePointerInfo::getFixedStack(FrameIdx), 5146 false, false, 0); 5147 5148 // Load the value as a double. 5149 Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, 5150 MachinePointerInfo::getFixedStack(FrameIdx), 5151 false, false, false, 0); 5152 } 5153 5154 // FCFID it and return it. 5155 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 5156 if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) 5157 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); 5158 return FP; 5159 } 5160 5161 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 5162 SelectionDAG &DAG) const { 5163 SDLoc dl(Op); 5164 /* 5165 The rounding mode is in bits 30:31 of FPSR, and has the following 5166 settings: 5167 00 Round to nearest 5168 01 Round to 0 5169 10 Round to +inf 5170 11 Round to -inf 5171 5172 FLT_ROUNDS, on the other hand, expects the following: 5173 -1 Undefined 5174 0 Round to 0 5175 1 Round to nearest 5176 2 Round to +inf 5177 3 Round to -inf 5178 5179 To perform the conversion, we do: 5180 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 5181 */ 5182 5183 MachineFunction &MF = DAG.getMachineFunction(); 5184 EVT VT = Op.getValueType(); 5185 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5186 SDValue MFFSreg, InFlag; 5187 5188 // Save FP Control Word to register 5189 EVT NodeTys[] = { 5190 MVT::f64, // return register 5191 MVT::Glue // unused in this context 5192 }; 5193 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); 5194 5195 // Save FP register to stack slot 5196 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 5197 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 5198 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 5199 StackSlot, MachinePointerInfo(), false, false,0); 5200 5201 // Load FP Control Word from low 32 bits of stack slot. 5202 SDValue Four = DAG.getConstant(4, PtrVT); 5203 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 5204 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 5205 false, false, false, 0); 5206 5207 // Transform as necessary 5208 SDValue CWD1 = 5209 DAG.getNode(ISD::AND, dl, MVT::i32, 5210 CWD, DAG.getConstant(3, MVT::i32)); 5211 SDValue CWD2 = 5212 DAG.getNode(ISD::SRL, dl, MVT::i32, 5213 DAG.getNode(ISD::AND, dl, MVT::i32, 5214 DAG.getNode(ISD::XOR, dl, MVT::i32, 5215 CWD, DAG.getConstant(3, MVT::i32)), 5216 DAG.getConstant(3, MVT::i32)), 5217 DAG.getConstant(1, MVT::i32)); 5218 5219 SDValue RetVal = 5220 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 5221 5222 return DAG.getNode((VT.getSizeInBits() < 16 ? 5223 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 5224 } 5225 5226 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5227 EVT VT = Op.getValueType(); 5228 unsigned BitWidth = VT.getSizeInBits(); 5229 SDLoc dl(Op); 5230 assert(Op.getNumOperands() == 3 && 5231 VT == Op.getOperand(1).getValueType() && 5232 "Unexpected SHL!"); 5233 5234 // Expand into a bunch of logical ops. Note that these ops 5235 // depend on the PPC behavior for oversized shift amounts. 5236 SDValue Lo = Op.getOperand(0); 5237 SDValue Hi = Op.getOperand(1); 5238 SDValue Amt = Op.getOperand(2); 5239 EVT AmtVT = Amt.getValueType(); 5240 5241 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5242 DAG.getConstant(BitWidth, AmtVT), Amt); 5243 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 5244 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 5245 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 5246 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5247 DAG.getConstant(-BitWidth, AmtVT)); 5248 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 5249 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5250 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 5251 SDValue OutOps[] = { OutLo, OutHi }; 5252 return DAG.getMergeValues(OutOps, 2, dl); 5253 } 5254 5255 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5256 EVT VT = Op.getValueType(); 5257 SDLoc dl(Op); 5258 unsigned BitWidth = VT.getSizeInBits(); 5259 assert(Op.getNumOperands() == 3 && 5260 VT == Op.getOperand(1).getValueType() && 5261 "Unexpected SRL!"); 5262 5263 // Expand into a bunch of logical ops. Note that these ops 5264 // depend on the PPC behavior for oversized shift amounts. 5265 SDValue Lo = Op.getOperand(0); 5266 SDValue Hi = Op.getOperand(1); 5267 SDValue Amt = Op.getOperand(2); 5268 EVT AmtVT = Amt.getValueType(); 5269 5270 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5271 DAG.getConstant(BitWidth, AmtVT), Amt); 5272 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5273 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5274 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5275 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5276 DAG.getConstant(-BitWidth, AmtVT)); 5277 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 5278 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5279 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 5280 SDValue OutOps[] = { OutLo, OutHi }; 5281 return DAG.getMergeValues(OutOps, 2, dl); 5282 } 5283 5284 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 5285 SDLoc dl(Op); 5286 EVT VT = Op.getValueType(); 5287 unsigned BitWidth = VT.getSizeInBits(); 5288 assert(Op.getNumOperands() == 3 && 5289 VT == Op.getOperand(1).getValueType() && 5290 "Unexpected SRA!"); 5291 5292 // Expand into a bunch of logical ops, followed by a select_cc. 5293 SDValue Lo = Op.getOperand(0); 5294 SDValue Hi = Op.getOperand(1); 5295 SDValue Amt = Op.getOperand(2); 5296 EVT AmtVT = Amt.getValueType(); 5297 5298 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5299 DAG.getConstant(BitWidth, AmtVT), Amt); 5300 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5301 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5302 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5303 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5304 DAG.getConstant(-BitWidth, AmtVT)); 5305 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 5306 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 5307 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), 5308 Tmp4, Tmp6, ISD::SETLE); 5309 SDValue OutOps[] = { OutLo, OutHi }; 5310 return DAG.getMergeValues(OutOps, 2, dl); 5311 } 5312 5313 //===----------------------------------------------------------------------===// 5314 // Vector related lowering. 5315 // 5316 5317 /// BuildSplatI - Build a canonical splati of Val with an element size of 5318 /// SplatSize. Cast the result to VT. 5319 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 5320 SelectionDAG &DAG, SDLoc dl) { 5321 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 5322 5323 static const EVT VTys[] = { // canonical VT to use for each size. 5324 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 5325 }; 5326 5327 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 5328 5329 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 5330 if (Val == -1) 5331 SplatSize = 1; 5332 5333 EVT CanonicalVT = VTys[SplatSize-1]; 5334 5335 // Build a canonical splat for this value. 5336 SDValue Elt = DAG.getConstant(Val, MVT::i32); 5337 SmallVector<SDValue, 8> Ops; 5338 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 5339 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, 5340 &Ops[0], Ops.size()); 5341 return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); 5342 } 5343 5344 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the 5345 /// specified intrinsic ID. 5346 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, 5347 SelectionDAG &DAG, SDLoc dl, 5348 EVT DestVT = MVT::Other) { 5349 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 5350 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5351 DAG.getConstant(IID, MVT::i32), Op); 5352 } 5353 5354 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 5355 /// specified intrinsic ID. 5356 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 5357 SelectionDAG &DAG, SDLoc dl, 5358 EVT DestVT = MVT::Other) { 5359 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 5360 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5361 DAG.getConstant(IID, MVT::i32), LHS, RHS); 5362 } 5363 5364 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 5365 /// specified intrinsic ID. 5366 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 5367 SDValue Op2, SelectionDAG &DAG, 5368 SDLoc dl, EVT DestVT = MVT::Other) { 5369 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 5370 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5371 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 5372 } 5373 5374 5375 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 5376 /// amount. The result has the specified value type. 5377 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 5378 EVT VT, SelectionDAG &DAG, SDLoc dl) { 5379 // Force LHS/RHS to be the right type. 5380 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 5381 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 5382 5383 int Ops[16]; 5384 for (unsigned i = 0; i != 16; ++i) 5385 Ops[i] = i + Amt; 5386 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 5387 return DAG.getNode(ISD::BITCAST, dl, VT, T); 5388 } 5389 5390 // If this is a case we can't handle, return null and let the default 5391 // expansion code take care of it. If we CAN select this case, and if it 5392 // selects to a single instruction, return Op. Otherwise, if we can codegen 5393 // this case more efficiently than a constant pool load, lower it to the 5394 // sequence of ops that should be used. 5395 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 5396 SelectionDAG &DAG) const { 5397 SDLoc dl(Op); 5398 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 5399 assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 5400 5401 // Check if this is a splat of a constant value. 5402 APInt APSplatBits, APSplatUndef; 5403 unsigned SplatBitSize; 5404 bool HasAnyUndefs; 5405 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 5406 HasAnyUndefs, 0, true) || SplatBitSize > 32) 5407 return SDValue(); 5408 5409 unsigned SplatBits = APSplatBits.getZExtValue(); 5410 unsigned SplatUndef = APSplatUndef.getZExtValue(); 5411 unsigned SplatSize = SplatBitSize / 8; 5412 5413 // First, handle single instruction cases. 5414 5415 // All zeros? 5416 if (SplatBits == 0) { 5417 // Canonicalize all zero vectors to be v4i32. 5418 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 5419 SDValue Z = DAG.getConstant(0, MVT::i32); 5420 Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); 5421 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 5422 } 5423 return Op; 5424 } 5425 5426 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 5427 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 5428 (32-SplatBitSize)); 5429 if (SextVal >= -16 && SextVal <= 15) 5430 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 5431 5432 5433 // Two instruction sequences. 5434 5435 // If this value is in the range [-32,30] and is even, use: 5436 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 5437 // If this value is in the range [17,31] and is odd, use: 5438 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 5439 // If this value is in the range [-31,-17] and is odd, use: 5440 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 5441 // Note the last two are three-instruction sequences. 5442 if (SextVal >= -32 && SextVal <= 31) { 5443 // To avoid having these optimizations undone by constant folding, 5444 // we convert to a pseudo that will be expanded later into one of 5445 // the above forms. 5446 SDValue Elt = DAG.getConstant(SextVal, MVT::i32); 5447 EVT VT = Op.getValueType(); 5448 int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); 5449 SDValue EltSize = DAG.getConstant(Size, MVT::i32); 5450 return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 5451 } 5452 5453 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 5454 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 5455 // for fneg/fabs. 5456 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 5457 // Make -1 and vspltisw -1: 5458 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 5459 5460 // Make the VSLW intrinsic, computing 0x8000_0000. 5461 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 5462 OnesV, DAG, dl); 5463 5464 // xor by OnesV to invert it. 5465 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 5466 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5467 } 5468 5469 // Check to see if this is a wide variety of vsplti*, binop self cases. 5470 static const signed char SplatCsts[] = { 5471 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 5472 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 5473 }; 5474 5475 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 5476 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 5477 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 5478 int i = SplatCsts[idx]; 5479 5480 // Figure out what shift amount will be used by altivec if shifted by i in 5481 // this splat size. 5482 unsigned TypeShiftAmt = i & (SplatBitSize-1); 5483 5484 // vsplti + shl self. 5485 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 5486 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5487 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5488 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 5489 Intrinsic::ppc_altivec_vslw 5490 }; 5491 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5492 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5493 } 5494 5495 // vsplti + srl self. 5496 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5497 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5498 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5499 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 5500 Intrinsic::ppc_altivec_vsrw 5501 }; 5502 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5503 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5504 } 5505 5506 // vsplti + sra self. 5507 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5508 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5509 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5510 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 5511 Intrinsic::ppc_altivec_vsraw 5512 }; 5513 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5514 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5515 } 5516 5517 // vsplti + rol self. 5518 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 5519 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 5520 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5521 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5522 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 5523 Intrinsic::ppc_altivec_vrlw 5524 }; 5525 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5526 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5527 } 5528 5529 // t = vsplti c, result = vsldoi t, t, 1 5530 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 5531 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5532 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); 5533 } 5534 // t = vsplti c, result = vsldoi t, t, 2 5535 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 5536 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5537 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); 5538 } 5539 // t = vsplti c, result = vsldoi t, t, 3 5540 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 5541 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5542 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); 5543 } 5544 } 5545 5546 return SDValue(); 5547 } 5548 5549 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 5550 /// the specified operations to build the shuffle. 5551 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 5552 SDValue RHS, SelectionDAG &DAG, 5553 SDLoc dl) { 5554 unsigned OpNum = (PFEntry >> 26) & 0x0F; 5555 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 5556 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 5557 5558 enum { 5559 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 5560 OP_VMRGHW, 5561 OP_VMRGLW, 5562 OP_VSPLTISW0, 5563 OP_VSPLTISW1, 5564 OP_VSPLTISW2, 5565 OP_VSPLTISW3, 5566 OP_VSLDOI4, 5567 OP_VSLDOI8, 5568 OP_VSLDOI12 5569 }; 5570 5571 if (OpNum == OP_COPY) { 5572 if (LHSID == (1*9+2)*9+3) return LHS; 5573 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 5574 return RHS; 5575 } 5576 5577 SDValue OpLHS, OpRHS; 5578 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 5579 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 5580 5581 int ShufIdxs[16]; 5582 switch (OpNum) { 5583 default: llvm_unreachable("Unknown i32 permute!"); 5584 case OP_VMRGHW: 5585 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 5586 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 5587 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 5588 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 5589 break; 5590 case OP_VMRGLW: 5591 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 5592 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 5593 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 5594 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 5595 break; 5596 case OP_VSPLTISW0: 5597 for (unsigned i = 0; i != 16; ++i) 5598 ShufIdxs[i] = (i&3)+0; 5599 break; 5600 case OP_VSPLTISW1: 5601 for (unsigned i = 0; i != 16; ++i) 5602 ShufIdxs[i] = (i&3)+4; 5603 break; 5604 case OP_VSPLTISW2: 5605 for (unsigned i = 0; i != 16; ++i) 5606 ShufIdxs[i] = (i&3)+8; 5607 break; 5608 case OP_VSPLTISW3: 5609 for (unsigned i = 0; i != 16; ++i) 5610 ShufIdxs[i] = (i&3)+12; 5611 break; 5612 case OP_VSLDOI4: 5613 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 5614 case OP_VSLDOI8: 5615 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 5616 case OP_VSLDOI12: 5617 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 5618 } 5619 EVT VT = OpLHS.getValueType(); 5620 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 5621 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 5622 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 5623 return DAG.getNode(ISD::BITCAST, dl, VT, T); 5624 } 5625 5626 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 5627 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 5628 /// return the code it can be lowered into. Worst case, it can always be 5629 /// lowered into a vperm. 5630 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 5631 SelectionDAG &DAG) const { 5632 SDLoc dl(Op); 5633 SDValue V1 = Op.getOperand(0); 5634 SDValue V2 = Op.getOperand(1); 5635 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 5636 EVT VT = Op.getValueType(); 5637 5638 // Cases that are handled by instructions that take permute immediates 5639 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 5640 // selected by the instruction selector. 5641 if (V2.getOpcode() == ISD::UNDEF) { 5642 if (PPC::isSplatShuffleMask(SVOp, 1) || 5643 PPC::isSplatShuffleMask(SVOp, 2) || 5644 PPC::isSplatShuffleMask(SVOp, 4) || 5645 PPC::isVPKUWUMShuffleMask(SVOp, true) || 5646 PPC::isVPKUHUMShuffleMask(SVOp, true) || 5647 PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || 5648 PPC::isVMRGLShuffleMask(SVOp, 1, true) || 5649 PPC::isVMRGLShuffleMask(SVOp, 2, true) || 5650 PPC::isVMRGLShuffleMask(SVOp, 4, true) || 5651 PPC::isVMRGHShuffleMask(SVOp, 1, true) || 5652 PPC::isVMRGHShuffleMask(SVOp, 2, true) || 5653 PPC::isVMRGHShuffleMask(SVOp, 4, true)) { 5654 return Op; 5655 } 5656 } 5657 5658 // Altivec has a variety of "shuffle immediates" that take two vector inputs 5659 // and produce a fixed permutation. If any of these match, do not lower to 5660 // VPERM. 5661 if (PPC::isVPKUWUMShuffleMask(SVOp, false) || 5662 PPC::isVPKUHUMShuffleMask(SVOp, false) || 5663 PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || 5664 PPC::isVMRGLShuffleMask(SVOp, 1, false) || 5665 PPC::isVMRGLShuffleMask(SVOp, 2, false) || 5666 PPC::isVMRGLShuffleMask(SVOp, 4, false) || 5667 PPC::isVMRGHShuffleMask(SVOp, 1, false) || 5668 PPC::isVMRGHShuffleMask(SVOp, 2, false) || 5669 PPC::isVMRGHShuffleMask(SVOp, 4, false)) 5670 return Op; 5671 5672 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 5673 // perfect shuffle table to emit an optimal matching sequence. 5674 ArrayRef<int> PermMask = SVOp->getMask(); 5675 5676 unsigned PFIndexes[4]; 5677 bool isFourElementShuffle = true; 5678 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 5679 unsigned EltNo = 8; // Start out undef. 5680 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 5681 if (PermMask[i*4+j] < 0) 5682 continue; // Undef, ignore it. 5683 5684 unsigned ByteSource = PermMask[i*4+j]; 5685 if ((ByteSource & 3) != j) { 5686 isFourElementShuffle = false; 5687 break; 5688 } 5689 5690 if (EltNo == 8) { 5691 EltNo = ByteSource/4; 5692 } else if (EltNo != ByteSource/4) { 5693 isFourElementShuffle = false; 5694 break; 5695 } 5696 } 5697 PFIndexes[i] = EltNo; 5698 } 5699 5700 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 5701 // perfect shuffle vector to determine if it is cost effective to do this as 5702 // discrete instructions, or whether we should use a vperm. 5703 if (isFourElementShuffle) { 5704 // Compute the index in the perfect shuffle table. 5705 unsigned PFTableIndex = 5706 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5707 5708 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5709 unsigned Cost = (PFEntry >> 30); 5710 5711 // Determining when to avoid vperm is tricky. Many things affect the cost 5712 // of vperm, particularly how many times the perm mask needs to be computed. 5713 // For example, if the perm mask can be hoisted out of a loop or is already 5714 // used (perhaps because there are multiple permutes with the same shuffle 5715 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 5716 // the loop requires an extra register. 5717 // 5718 // As a compromise, we only emit discrete instructions if the shuffle can be 5719 // generated in 3 or fewer operations. When we have loop information 5720 // available, if this block is within a loop, we should avoid using vperm 5721 // for 3-operation perms and use a constant pool load instead. 5722 if (Cost < 3) 5723 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 5724 } 5725 5726 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 5727 // vector that will get spilled to the constant pool. 5728 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 5729 5730 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 5731 // that it is in input element units, not in bytes. Convert now. 5732 EVT EltVT = V1.getValueType().getVectorElementType(); 5733 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 5734 5735 SmallVector<SDValue, 16> ResultMask; 5736 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 5737 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 5738 5739 for (unsigned j = 0; j != BytesPerElement; ++j) 5740 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 5741 MVT::i32)); 5742 } 5743 5744 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 5745 &ResultMask[0], ResultMask.size()); 5746 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); 5747 } 5748 5749 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 5750 /// altivec comparison. If it is, return true and fill in Opc/isDot with 5751 /// information about the intrinsic. 5752 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 5753 bool &isDot) { 5754 unsigned IntrinsicID = 5755 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 5756 CompareOpc = -1; 5757 isDot = false; 5758 switch (IntrinsicID) { 5759 default: return false; 5760 // Comparison predicates. 5761 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 5762 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 5763 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 5764 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 5765 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 5766 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 5767 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 5768 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 5769 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 5770 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 5771 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 5772 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 5773 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 5774 5775 // Normal Comparisons. 5776 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 5777 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 5778 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 5779 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 5780 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 5781 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 5782 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 5783 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 5784 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 5785 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 5786 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 5787 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 5788 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 5789 } 5790 return true; 5791 } 5792 5793 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 5794 /// lower, do it, otherwise return null. 5795 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 5796 SelectionDAG &DAG) const { 5797 // If this is a lowered altivec predicate compare, CompareOpc is set to the 5798 // opcode number of the comparison. 5799 SDLoc dl(Op); 5800 int CompareOpc; 5801 bool isDot; 5802 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 5803 return SDValue(); // Don't custom lower most intrinsics. 5804 5805 // If this is a non-dot comparison, make the VCMP node and we are done. 5806 if (!isDot) { 5807 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 5808 Op.getOperand(1), Op.getOperand(2), 5809 DAG.getConstant(CompareOpc, MVT::i32)); 5810 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 5811 } 5812 5813 // Create the PPCISD altivec 'dot' comparison node. 5814 SDValue Ops[] = { 5815 Op.getOperand(2), // LHS 5816 Op.getOperand(3), // RHS 5817 DAG.getConstant(CompareOpc, MVT::i32) 5818 }; 5819 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 5820 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 5821 5822 // Now that we have the comparison, emit a copy from the CR to a GPR. 5823 // This is flagged to the above dot comparison. 5824 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 5825 DAG.getRegister(PPC::CR6, MVT::i32), 5826 CompNode.getValue(1)); 5827 5828 // Unpack the result based on how the target uses it. 5829 unsigned BitNo; // Bit # of CR6. 5830 bool InvertBit; // Invert result? 5831 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 5832 default: // Can't happen, don't crash on invalid number though. 5833 case 0: // Return the value of the EQ bit of CR6. 5834 BitNo = 0; InvertBit = false; 5835 break; 5836 case 1: // Return the inverted value of the EQ bit of CR6. 5837 BitNo = 0; InvertBit = true; 5838 break; 5839 case 2: // Return the value of the LT bit of CR6. 5840 BitNo = 2; InvertBit = false; 5841 break; 5842 case 3: // Return the inverted value of the LT bit of CR6. 5843 BitNo = 2; InvertBit = true; 5844 break; 5845 } 5846 5847 // Shift the bit into the low position. 5848 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 5849 DAG.getConstant(8-(3-BitNo), MVT::i32)); 5850 // Isolate the bit. 5851 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 5852 DAG.getConstant(1, MVT::i32)); 5853 5854 // If we are supposed to, toggle the bit. 5855 if (InvertBit) 5856 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 5857 DAG.getConstant(1, MVT::i32)); 5858 return Flags; 5859 } 5860 5861 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 5862 SelectionDAG &DAG) const { 5863 SDLoc dl(Op); 5864 // Create a stack slot that is 16-byte aligned. 5865 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 5866 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 5867 EVT PtrVT = getPointerTy(); 5868 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5869 5870 // Store the input value into Value#0 of the stack slot. 5871 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 5872 Op.getOperand(0), FIdx, MachinePointerInfo(), 5873 false, false, 0); 5874 // Load it out. 5875 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 5876 false, false, false, 0); 5877 } 5878 5879 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 5880 SDLoc dl(Op); 5881 if (Op.getValueType() == MVT::v4i32) { 5882 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5883 5884 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 5885 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 5886 5887 SDValue RHSSwap = // = vrlw RHS, 16 5888 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 5889 5890 // Shrinkify inputs to v8i16. 5891 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 5892 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 5893 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 5894 5895 // Low parts multiplied together, generating 32-bit results (we ignore the 5896 // top parts). 5897 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 5898 LHS, RHS, DAG, dl, MVT::v4i32); 5899 5900 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 5901 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 5902 // Shift the high parts up 16 bits. 5903 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 5904 Neg16, DAG, dl); 5905 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 5906 } else if (Op.getValueType() == MVT::v8i16) { 5907 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5908 5909 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 5910 5911 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 5912 LHS, RHS, Zero, DAG, dl); 5913 } else if (Op.getValueType() == MVT::v16i8) { 5914 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5915 5916 // Multiply the even 8-bit parts, producing 16-bit sums. 5917 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 5918 LHS, RHS, DAG, dl, MVT::v8i16); 5919 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 5920 5921 // Multiply the odd 8-bit parts, producing 16-bit sums. 5922 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 5923 LHS, RHS, DAG, dl, MVT::v8i16); 5924 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 5925 5926 // Merge the results together. 5927 int Ops[16]; 5928 for (unsigned i = 0; i != 8; ++i) { 5929 Ops[i*2 ] = 2*i+1; 5930 Ops[i*2+1] = 2*i+1+16; 5931 } 5932 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 5933 } else { 5934 llvm_unreachable("Unknown mul to lower!"); 5935 } 5936 } 5937 5938 /// LowerOperation - Provide custom lowering hooks for some operations. 5939 /// 5940 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 5941 switch (Op.getOpcode()) { 5942 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 5943 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5944 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 5945 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 5946 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5947 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5948 case ISD::SETCC: return LowerSETCC(Op, DAG); 5949 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 5950 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 5951 case ISD::VASTART: 5952 return LowerVASTART(Op, DAG, PPCSubTarget); 5953 5954 case ISD::VAARG: 5955 return LowerVAARG(Op, DAG, PPCSubTarget); 5956 5957 case ISD::VACOPY: 5958 return LowerVACOPY(Op, DAG, PPCSubTarget); 5959 5960 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); 5961 case ISD::DYNAMIC_STACKALLOC: 5962 return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); 5963 5964 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 5965 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 5966 5967 case ISD::LOAD: return LowerLOAD(Op, DAG); 5968 case ISD::STORE: return LowerSTORE(Op, DAG); 5969 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 5970 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 5971 case ISD::FP_TO_UINT: 5972 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 5973 SDLoc(Op)); 5974 case ISD::UINT_TO_FP: 5975 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 5976 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 5977 5978 // Lower 64-bit shifts. 5979 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 5980 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 5981 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 5982 5983 // Vector-related lowering. 5984 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 5985 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5986 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5987 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 5988 case ISD::MUL: return LowerMUL(Op, DAG); 5989 5990 // For counter-based loop handling. 5991 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 5992 5993 // Frame & Return address. 5994 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5995 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5996 } 5997 } 5998 5999 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 6000 SmallVectorImpl<SDValue>&Results, 6001 SelectionDAG &DAG) const { 6002 const TargetMachine &TM = getTargetMachine(); 6003 SDLoc dl(N); 6004 switch (N->getOpcode()) { 6005 default: 6006 llvm_unreachable("Do not know how to custom type legalize this operation!"); 6007 case ISD::INTRINSIC_W_CHAIN: { 6008 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 6009 Intrinsic::ppc_is_decremented_ctr_nonzero) 6010 break; 6011 6012 assert(N->getValueType(0) == MVT::i1 && 6013 "Unexpected result type for CTR decrement intrinsic"); 6014 EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); 6015 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 6016 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 6017 N->getOperand(1)); 6018 6019 Results.push_back(NewInt); 6020 Results.push_back(NewInt.getValue(1)); 6021 break; 6022 } 6023 case ISD::VAARG: { 6024 if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() 6025 || TM.getSubtarget<PPCSubtarget>().isPPC64()) 6026 return; 6027 6028 EVT VT = N->getValueType(0); 6029 6030 if (VT == MVT::i64) { 6031 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); 6032 6033 Results.push_back(NewNode); 6034 Results.push_back(NewNode.getValue(1)); 6035 } 6036 return; 6037 } 6038 case ISD::FP_ROUND_INREG: { 6039 assert(N->getValueType(0) == MVT::ppcf128); 6040 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 6041 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 6042 MVT::f64, N->getOperand(0), 6043 DAG.getIntPtrConstant(0)); 6044 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 6045 MVT::f64, N->getOperand(0), 6046 DAG.getIntPtrConstant(1)); 6047 6048 // Add the two halves of the long double in round-to-zero mode. 6049 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 6050 6051 // We know the low half is about to be thrown away, so just use something 6052 // convenient. 6053 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 6054 FPreg, FPreg)); 6055 return; 6056 } 6057 case ISD::FP_TO_SINT: 6058 // LowerFP_TO_INT() can only handle f32 and f64. 6059 if (N->getOperand(0).getValueType() == MVT::ppcf128) 6060 return; 6061 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 6062 return; 6063 } 6064 } 6065 6066 6067 //===----------------------------------------------------------------------===// 6068 // Other Lowering Code 6069 //===----------------------------------------------------------------------===// 6070 6071 MachineBasicBlock * 6072 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 6073 bool is64bit, unsigned BinOpcode) const { 6074 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6075 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6076 6077 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6078 MachineFunction *F = BB->getParent(); 6079 MachineFunction::iterator It = BB; 6080 ++It; 6081 6082 unsigned dest = MI->getOperand(0).getReg(); 6083 unsigned ptrA = MI->getOperand(1).getReg(); 6084 unsigned ptrB = MI->getOperand(2).getReg(); 6085 unsigned incr = MI->getOperand(3).getReg(); 6086 DebugLoc dl = MI->getDebugLoc(); 6087 6088 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 6089 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6090 F->insert(It, loopMBB); 6091 F->insert(It, exitMBB); 6092 exitMBB->splice(exitMBB->begin(), BB, 6093 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6094 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6095 6096 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6097 unsigned TmpReg = (!BinOpcode) ? incr : 6098 RegInfo.createVirtualRegister( 6099 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 6100 (const TargetRegisterClass *) &PPC::GPRCRegClass); 6101 6102 // thisMBB: 6103 // ... 6104 // fallthrough --> loopMBB 6105 BB->addSuccessor(loopMBB); 6106 6107 // loopMBB: 6108 // l[wd]arx dest, ptr 6109 // add r0, dest, incr 6110 // st[wd]cx. r0, ptr 6111 // bne- loopMBB 6112 // fallthrough --> exitMBB 6113 BB = loopMBB; 6114 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 6115 .addReg(ptrA).addReg(ptrB); 6116 if (BinOpcode) 6117 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 6118 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6119 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 6120 BuildMI(BB, dl, TII->get(PPC::BCC)) 6121 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 6122 BB->addSuccessor(loopMBB); 6123 BB->addSuccessor(exitMBB); 6124 6125 // exitMBB: 6126 // ... 6127 BB = exitMBB; 6128 return BB; 6129 } 6130 6131 MachineBasicBlock * 6132 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 6133 MachineBasicBlock *BB, 6134 bool is8bit, // operation 6135 unsigned BinOpcode) const { 6136 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6137 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6138 // In 64 bit mode we have to use 64 bits for addresses, even though the 6139 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 6140 // registers without caring whether they're 32 or 64, but here we're 6141 // doing actual arithmetic on the addresses. 6142 bool is64bit = PPCSubTarget.isPPC64(); 6143 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 6144 6145 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6146 MachineFunction *F = BB->getParent(); 6147 MachineFunction::iterator It = BB; 6148 ++It; 6149 6150 unsigned dest = MI->getOperand(0).getReg(); 6151 unsigned ptrA = MI->getOperand(1).getReg(); 6152 unsigned ptrB = MI->getOperand(2).getReg(); 6153 unsigned incr = MI->getOperand(3).getReg(); 6154 DebugLoc dl = MI->getDebugLoc(); 6155 6156 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 6157 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6158 F->insert(It, loopMBB); 6159 F->insert(It, exitMBB); 6160 exitMBB->splice(exitMBB->begin(), BB, 6161 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6162 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6163 6164 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6165 const TargetRegisterClass *RC = 6166 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 6167 (const TargetRegisterClass *) &PPC::GPRCRegClass; 6168 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 6169 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 6170 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 6171 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 6172 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 6173 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 6174 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 6175 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 6176 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 6177 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 6178 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 6179 unsigned Ptr1Reg; 6180 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 6181 6182 // thisMBB: 6183 // ... 6184 // fallthrough --> loopMBB 6185 BB->addSuccessor(loopMBB); 6186 6187 // The 4-byte load must be aligned, while a char or short may be 6188 // anywhere in the word. Hence all this nasty bookkeeping code. 6189 // add ptr1, ptrA, ptrB [copy if ptrA==0] 6190 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 6191 // xori shift, shift1, 24 [16] 6192 // rlwinm ptr, ptr1, 0, 0, 29 6193 // slw incr2, incr, shift 6194 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 6195 // slw mask, mask2, shift 6196 // loopMBB: 6197 // lwarx tmpDest, ptr 6198 // add tmp, tmpDest, incr2 6199 // andc tmp2, tmpDest, mask 6200 // and tmp3, tmp, mask 6201 // or tmp4, tmp3, tmp2 6202 // stwcx. tmp4, ptr 6203 // bne- loopMBB 6204 // fallthrough --> exitMBB 6205 // srw dest, tmpDest, shift 6206 if (ptrA != ZeroReg) { 6207 Ptr1Reg = RegInfo.createVirtualRegister(RC); 6208 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 6209 .addReg(ptrA).addReg(ptrB); 6210 } else { 6211 Ptr1Reg = ptrB; 6212 } 6213 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 6214 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 6215 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 6216 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 6217 if (is64bit) 6218 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 6219 .addReg(Ptr1Reg).addImm(0).addImm(61); 6220 else 6221 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 6222 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 6223 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 6224 .addReg(incr).addReg(ShiftReg); 6225 if (is8bit) 6226 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 6227 else { 6228 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 6229 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 6230 } 6231 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 6232 .addReg(Mask2Reg).addReg(ShiftReg); 6233 6234 BB = loopMBB; 6235 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6236 .addReg(ZeroReg).addReg(PtrReg); 6237 if (BinOpcode) 6238 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 6239 .addReg(Incr2Reg).addReg(TmpDestReg); 6240 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 6241 .addReg(TmpDestReg).addReg(MaskReg); 6242 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 6243 .addReg(TmpReg).addReg(MaskReg); 6244 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 6245 .addReg(Tmp3Reg).addReg(Tmp2Reg); 6246 BuildMI(BB, dl, TII->get(PPC::STWCX)) 6247 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 6248 BuildMI(BB, dl, TII->get(PPC::BCC)) 6249 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 6250 BB->addSuccessor(loopMBB); 6251 BB->addSuccessor(exitMBB); 6252 6253 // exitMBB: 6254 // ... 6255 BB = exitMBB; 6256 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 6257 .addReg(ShiftReg); 6258 return BB; 6259 } 6260 6261 llvm::MachineBasicBlock* 6262 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, 6263 MachineBasicBlock *MBB) const { 6264 DebugLoc DL = MI->getDebugLoc(); 6265 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6266 6267 MachineFunction *MF = MBB->getParent(); 6268 MachineRegisterInfo &MRI = MF->getRegInfo(); 6269 6270 const BasicBlock *BB = MBB->getBasicBlock(); 6271 MachineFunction::iterator I = MBB; 6272 ++I; 6273 6274 // Memory Reference 6275 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 6276 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 6277 6278 unsigned DstReg = MI->getOperand(0).getReg(); 6279 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 6280 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 6281 unsigned mainDstReg = MRI.createVirtualRegister(RC); 6282 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 6283 6284 MVT PVT = getPointerTy(); 6285 assert((PVT == MVT::i64 || PVT == MVT::i32) && 6286 "Invalid Pointer Size!"); 6287 // For v = setjmp(buf), we generate 6288 // 6289 // thisMBB: 6290 // SjLjSetup mainMBB 6291 // bl mainMBB 6292 // v_restore = 1 6293 // b sinkMBB 6294 // 6295 // mainMBB: 6296 // buf[LabelOffset] = LR 6297 // v_main = 0 6298 // 6299 // sinkMBB: 6300 // v = phi(main, restore) 6301 // 6302 6303 MachineBasicBlock *thisMBB = MBB; 6304 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 6305 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 6306 MF->insert(I, mainMBB); 6307 MF->insert(I, sinkMBB); 6308 6309 MachineInstrBuilder MIB; 6310 6311 // Transfer the remainder of BB and its successor edges to sinkMBB. 6312 sinkMBB->splice(sinkMBB->begin(), MBB, 6313 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 6314 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 6315 6316 // Note that the structure of the jmp_buf used here is not compatible 6317 // with that used by libc, and is not designed to be. Specifically, it 6318 // stores only those 'reserved' registers that LLVM does not otherwise 6319 // understand how to spill. Also, by convention, by the time this 6320 // intrinsic is called, Clang has already stored the frame address in the 6321 // first slot of the buffer and stack address in the third. Following the 6322 // X86 target code, we'll store the jump address in the second slot. We also 6323 // need to save the TOC pointer (R2) to handle jumps between shared 6324 // libraries, and that will be stored in the fourth slot. The thread 6325 // identifier (R13) is not affected. 6326 6327 // thisMBB: 6328 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 6329 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 6330 const int64_t BPOffset = 4 * PVT.getStoreSize(); 6331 6332 // Prepare IP either in reg. 6333 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 6334 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 6335 unsigned BufReg = MI->getOperand(1).getReg(); 6336 6337 if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { 6338 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 6339 .addReg(PPC::X2) 6340 .addImm(TOCOffset) 6341 .addReg(BufReg); 6342 MIB.setMemRefs(MMOBegin, MMOEnd); 6343 } 6344 6345 // Naked functions never have a base pointer, and so we use r1. For all 6346 // other functions, this decision must be delayed until during PEI. 6347 unsigned BaseReg; 6348 if (MF->getFunction()->getAttributes().hasAttribute( 6349 AttributeSet::FunctionIndex, Attribute::Naked)) 6350 BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1; 6351 else 6352 BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP; 6353 6354 MIB = BuildMI(*thisMBB, MI, DL, 6355 TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW)) 6356 .addReg(BaseReg) 6357 .addImm(BPOffset) 6358 .addReg(BufReg); 6359 MIB.setMemRefs(MMOBegin, MMOEnd); 6360 6361 // Setup 6362 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 6363 const PPCRegisterInfo *TRI = 6364 static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo()); 6365 MIB.addRegMask(TRI->getNoPreservedMask()); 6366 6367 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 6368 6369 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 6370 .addMBB(mainMBB); 6371 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 6372 6373 thisMBB->addSuccessor(mainMBB, /* weight */ 0); 6374 thisMBB->addSuccessor(sinkMBB, /* weight */ 1); 6375 6376 // mainMBB: 6377 // mainDstReg = 0 6378 MIB = BuildMI(mainMBB, DL, 6379 TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 6380 6381 // Store IP 6382 if (PPCSubTarget.isPPC64()) { 6383 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 6384 .addReg(LabelReg) 6385 .addImm(LabelOffset) 6386 .addReg(BufReg); 6387 } else { 6388 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 6389 .addReg(LabelReg) 6390 .addImm(LabelOffset) 6391 .addReg(BufReg); 6392 } 6393 6394 MIB.setMemRefs(MMOBegin, MMOEnd); 6395 6396 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 6397 mainMBB->addSuccessor(sinkMBB); 6398 6399 // sinkMBB: 6400 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 6401 TII->get(PPC::PHI), DstReg) 6402 .addReg(mainDstReg).addMBB(mainMBB) 6403 .addReg(restoreDstReg).addMBB(thisMBB); 6404 6405 MI->eraseFromParent(); 6406 return sinkMBB; 6407 } 6408 6409 MachineBasicBlock * 6410 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, 6411 MachineBasicBlock *MBB) const { 6412 DebugLoc DL = MI->getDebugLoc(); 6413 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6414 6415 MachineFunction *MF = MBB->getParent(); 6416 MachineRegisterInfo &MRI = MF->getRegInfo(); 6417 6418 // Memory Reference 6419 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 6420 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 6421 6422 MVT PVT = getPointerTy(); 6423 assert((PVT == MVT::i64 || PVT == MVT::i32) && 6424 "Invalid Pointer Size!"); 6425 6426 const TargetRegisterClass *RC = 6427 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 6428 unsigned Tmp = MRI.createVirtualRegister(RC); 6429 // Since FP is only updated here but NOT referenced, it's treated as GPR. 6430 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 6431 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 6432 unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30; 6433 6434 MachineInstrBuilder MIB; 6435 6436 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 6437 const int64_t SPOffset = 2 * PVT.getStoreSize(); 6438 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 6439 const int64_t BPOffset = 4 * PVT.getStoreSize(); 6440 6441 unsigned BufReg = MI->getOperand(0).getReg(); 6442 6443 // Reload FP (the jumped-to function may not have had a 6444 // frame pointer, and if so, then its r31 will be restored 6445 // as necessary). 6446 if (PVT == MVT::i64) { 6447 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 6448 .addImm(0) 6449 .addReg(BufReg); 6450 } else { 6451 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 6452 .addImm(0) 6453 .addReg(BufReg); 6454 } 6455 MIB.setMemRefs(MMOBegin, MMOEnd); 6456 6457 // Reload IP 6458 if (PVT == MVT::i64) { 6459 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 6460 .addImm(LabelOffset) 6461 .addReg(BufReg); 6462 } else { 6463 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 6464 .addImm(LabelOffset) 6465 .addReg(BufReg); 6466 } 6467 MIB.setMemRefs(MMOBegin, MMOEnd); 6468 6469 // Reload SP 6470 if (PVT == MVT::i64) { 6471 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 6472 .addImm(SPOffset) 6473 .addReg(BufReg); 6474 } else { 6475 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 6476 .addImm(SPOffset) 6477 .addReg(BufReg); 6478 } 6479 MIB.setMemRefs(MMOBegin, MMOEnd); 6480 6481 // Reload BP 6482 if (PVT == MVT::i64) { 6483 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 6484 .addImm(BPOffset) 6485 .addReg(BufReg); 6486 } else { 6487 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 6488 .addImm(BPOffset) 6489 .addReg(BufReg); 6490 } 6491 MIB.setMemRefs(MMOBegin, MMOEnd); 6492 6493 // Reload TOC 6494 if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { 6495 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 6496 .addImm(TOCOffset) 6497 .addReg(BufReg); 6498 6499 MIB.setMemRefs(MMOBegin, MMOEnd); 6500 } 6501 6502 // Jump 6503 BuildMI(*MBB, MI, DL, 6504 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 6505 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 6506 6507 MI->eraseFromParent(); 6508 return MBB; 6509 } 6510 6511 MachineBasicBlock * 6512 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 6513 MachineBasicBlock *BB) const { 6514 if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || 6515 MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { 6516 return emitEHSjLjSetJmp(MI, BB); 6517 } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || 6518 MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { 6519 return emitEHSjLjLongJmp(MI, BB); 6520 } 6521 6522 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6523 6524 // To "insert" these instructions we actually have to insert their 6525 // control-flow patterns. 6526 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6527 MachineFunction::iterator It = BB; 6528 ++It; 6529 6530 MachineFunction *F = BB->getParent(); 6531 6532 if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || 6533 MI->getOpcode() == PPC::SELECT_CC_I8 || 6534 MI->getOpcode() == PPC::SELECT_I4 || 6535 MI->getOpcode() == PPC::SELECT_I8)) { 6536 SmallVector<MachineOperand, 2> Cond; 6537 if (MI->getOpcode() == PPC::SELECT_CC_I4 || 6538 MI->getOpcode() == PPC::SELECT_CC_I8) 6539 Cond.push_back(MI->getOperand(4)); 6540 else 6541 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 6542 Cond.push_back(MI->getOperand(1)); 6543 6544 DebugLoc dl = MI->getDebugLoc(); 6545 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6546 TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), 6547 Cond, MI->getOperand(2).getReg(), 6548 MI->getOperand(3).getReg()); 6549 } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || 6550 MI->getOpcode() == PPC::SELECT_CC_I8 || 6551 MI->getOpcode() == PPC::SELECT_CC_F4 || 6552 MI->getOpcode() == PPC::SELECT_CC_F8 || 6553 MI->getOpcode() == PPC::SELECT_CC_VRRC || 6554 MI->getOpcode() == PPC::SELECT_I4 || 6555 MI->getOpcode() == PPC::SELECT_I8 || 6556 MI->getOpcode() == PPC::SELECT_F4 || 6557 MI->getOpcode() == PPC::SELECT_F8 || 6558 MI->getOpcode() == PPC::SELECT_VRRC) { 6559 // The incoming instruction knows the destination vreg to set, the 6560 // condition code register to branch on, the true/false values to 6561 // select between, and a branch opcode to use. 6562 6563 // thisMBB: 6564 // ... 6565 // TrueVal = ... 6566 // cmpTY ccX, r1, r2 6567 // bCC copy1MBB 6568 // fallthrough --> copy0MBB 6569 MachineBasicBlock *thisMBB = BB; 6570 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 6571 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 6572 DebugLoc dl = MI->getDebugLoc(); 6573 F->insert(It, copy0MBB); 6574 F->insert(It, sinkMBB); 6575 6576 // Transfer the remainder of BB and its successor edges to sinkMBB. 6577 sinkMBB->splice(sinkMBB->begin(), BB, 6578 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6579 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 6580 6581 // Next, add the true and fallthrough blocks as its successors. 6582 BB->addSuccessor(copy0MBB); 6583 BB->addSuccessor(sinkMBB); 6584 6585 if (MI->getOpcode() == PPC::SELECT_I4 || 6586 MI->getOpcode() == PPC::SELECT_I8 || 6587 MI->getOpcode() == PPC::SELECT_F4 || 6588 MI->getOpcode() == PPC::SELECT_F8 || 6589 MI->getOpcode() == PPC::SELECT_VRRC) { 6590 BuildMI(BB, dl, TII->get(PPC::BC)) 6591 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 6592 } else { 6593 unsigned SelectPred = MI->getOperand(4).getImm(); 6594 BuildMI(BB, dl, TII->get(PPC::BCC)) 6595 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 6596 } 6597 6598 // copy0MBB: 6599 // %FalseValue = ... 6600 // # fallthrough to sinkMBB 6601 BB = copy0MBB; 6602 6603 // Update machine-CFG edges 6604 BB->addSuccessor(sinkMBB); 6605 6606 // sinkMBB: 6607 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 6608 // ... 6609 BB = sinkMBB; 6610 BuildMI(*BB, BB->begin(), dl, 6611 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 6612 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 6613 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 6614 } 6615 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 6616 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 6617 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 6618 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 6619 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 6620 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 6621 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 6622 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 6623 6624 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 6625 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 6626 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 6627 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 6628 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 6629 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 6630 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 6631 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 6632 6633 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 6634 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 6635 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 6636 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 6637 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 6638 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 6639 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 6640 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 6641 6642 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 6643 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 6644 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 6645 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 6646 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 6647 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 6648 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 6649 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 6650 6651 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 6652 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC); 6653 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 6654 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC); 6655 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 6656 BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC); 6657 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 6658 BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8); 6659 6660 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 6661 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 6662 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 6663 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 6664 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 6665 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 6666 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 6667 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 6668 6669 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 6670 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 6671 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 6672 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 6673 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 6674 BB = EmitAtomicBinary(MI, BB, false, 0); 6675 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 6676 BB = EmitAtomicBinary(MI, BB, true, 0); 6677 6678 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 6679 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 6680 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 6681 6682 unsigned dest = MI->getOperand(0).getReg(); 6683 unsigned ptrA = MI->getOperand(1).getReg(); 6684 unsigned ptrB = MI->getOperand(2).getReg(); 6685 unsigned oldval = MI->getOperand(3).getReg(); 6686 unsigned newval = MI->getOperand(4).getReg(); 6687 DebugLoc dl = MI->getDebugLoc(); 6688 6689 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 6690 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 6691 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 6692 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6693 F->insert(It, loop1MBB); 6694 F->insert(It, loop2MBB); 6695 F->insert(It, midMBB); 6696 F->insert(It, exitMBB); 6697 exitMBB->splice(exitMBB->begin(), BB, 6698 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6699 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6700 6701 // thisMBB: 6702 // ... 6703 // fallthrough --> loopMBB 6704 BB->addSuccessor(loop1MBB); 6705 6706 // loop1MBB: 6707 // l[wd]arx dest, ptr 6708 // cmp[wd] dest, oldval 6709 // bne- midMBB 6710 // loop2MBB: 6711 // st[wd]cx. newval, ptr 6712 // bne- loopMBB 6713 // b exitBB 6714 // midMBB: 6715 // st[wd]cx. dest, ptr 6716 // exitBB: 6717 BB = loop1MBB; 6718 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 6719 .addReg(ptrA).addReg(ptrB); 6720 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 6721 .addReg(oldval).addReg(dest); 6722 BuildMI(BB, dl, TII->get(PPC::BCC)) 6723 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 6724 BB->addSuccessor(loop2MBB); 6725 BB->addSuccessor(midMBB); 6726 6727 BB = loop2MBB; 6728 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6729 .addReg(newval).addReg(ptrA).addReg(ptrB); 6730 BuildMI(BB, dl, TII->get(PPC::BCC)) 6731 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 6732 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6733 BB->addSuccessor(loop1MBB); 6734 BB->addSuccessor(exitMBB); 6735 6736 BB = midMBB; 6737 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6738 .addReg(dest).addReg(ptrA).addReg(ptrB); 6739 BB->addSuccessor(exitMBB); 6740 6741 // exitMBB: 6742 // ... 6743 BB = exitMBB; 6744 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 6745 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 6746 // We must use 64-bit registers for addresses when targeting 64-bit, 6747 // since we're actually doing arithmetic on them. Other registers 6748 // can be 32-bit. 6749 bool is64bit = PPCSubTarget.isPPC64(); 6750 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 6751 6752 unsigned dest = MI->getOperand(0).getReg(); 6753 unsigned ptrA = MI->getOperand(1).getReg(); 6754 unsigned ptrB = MI->getOperand(2).getReg(); 6755 unsigned oldval = MI->getOperand(3).getReg(); 6756 unsigned newval = MI->getOperand(4).getReg(); 6757 DebugLoc dl = MI->getDebugLoc(); 6758 6759 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 6760 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 6761 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 6762 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6763 F->insert(It, loop1MBB); 6764 F->insert(It, loop2MBB); 6765 F->insert(It, midMBB); 6766 F->insert(It, exitMBB); 6767 exitMBB->splice(exitMBB->begin(), BB, 6768 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6769 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6770 6771 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6772 const TargetRegisterClass *RC = 6773 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 6774 (const TargetRegisterClass *) &PPC::GPRCRegClass; 6775 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 6776 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 6777 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 6778 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 6779 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 6780 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 6781 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 6782 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 6783 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 6784 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 6785 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 6786 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 6787 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 6788 unsigned Ptr1Reg; 6789 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 6790 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 6791 // thisMBB: 6792 // ... 6793 // fallthrough --> loopMBB 6794 BB->addSuccessor(loop1MBB); 6795 6796 // The 4-byte load must be aligned, while a char or short may be 6797 // anywhere in the word. Hence all this nasty bookkeeping code. 6798 // add ptr1, ptrA, ptrB [copy if ptrA==0] 6799 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 6800 // xori shift, shift1, 24 [16] 6801 // rlwinm ptr, ptr1, 0, 0, 29 6802 // slw newval2, newval, shift 6803 // slw oldval2, oldval,shift 6804 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 6805 // slw mask, mask2, shift 6806 // and newval3, newval2, mask 6807 // and oldval3, oldval2, mask 6808 // loop1MBB: 6809 // lwarx tmpDest, ptr 6810 // and tmp, tmpDest, mask 6811 // cmpw tmp, oldval3 6812 // bne- midMBB 6813 // loop2MBB: 6814 // andc tmp2, tmpDest, mask 6815 // or tmp4, tmp2, newval3 6816 // stwcx. tmp4, ptr 6817 // bne- loop1MBB 6818 // b exitBB 6819 // midMBB: 6820 // stwcx. tmpDest, ptr 6821 // exitBB: 6822 // srw dest, tmpDest, shift 6823 if (ptrA != ZeroReg) { 6824 Ptr1Reg = RegInfo.createVirtualRegister(RC); 6825 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 6826 .addReg(ptrA).addReg(ptrB); 6827 } else { 6828 Ptr1Reg = ptrB; 6829 } 6830 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 6831 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 6832 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 6833 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 6834 if (is64bit) 6835 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 6836 .addReg(Ptr1Reg).addImm(0).addImm(61); 6837 else 6838 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 6839 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 6840 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 6841 .addReg(newval).addReg(ShiftReg); 6842 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 6843 .addReg(oldval).addReg(ShiftReg); 6844 if (is8bit) 6845 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 6846 else { 6847 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 6848 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 6849 .addReg(Mask3Reg).addImm(65535); 6850 } 6851 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 6852 .addReg(Mask2Reg).addReg(ShiftReg); 6853 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 6854 .addReg(NewVal2Reg).addReg(MaskReg); 6855 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 6856 .addReg(OldVal2Reg).addReg(MaskReg); 6857 6858 BB = loop1MBB; 6859 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6860 .addReg(ZeroReg).addReg(PtrReg); 6861 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 6862 .addReg(TmpDestReg).addReg(MaskReg); 6863 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 6864 .addReg(TmpReg).addReg(OldVal3Reg); 6865 BuildMI(BB, dl, TII->get(PPC::BCC)) 6866 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 6867 BB->addSuccessor(loop2MBB); 6868 BB->addSuccessor(midMBB); 6869 6870 BB = loop2MBB; 6871 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 6872 .addReg(TmpDestReg).addReg(MaskReg); 6873 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 6874 .addReg(Tmp2Reg).addReg(NewVal3Reg); 6875 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 6876 .addReg(ZeroReg).addReg(PtrReg); 6877 BuildMI(BB, dl, TII->get(PPC::BCC)) 6878 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 6879 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6880 BB->addSuccessor(loop1MBB); 6881 BB->addSuccessor(exitMBB); 6882 6883 BB = midMBB; 6884 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 6885 .addReg(ZeroReg).addReg(PtrReg); 6886 BB->addSuccessor(exitMBB); 6887 6888 // exitMBB: 6889 // ... 6890 BB = exitMBB; 6891 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 6892 .addReg(ShiftReg); 6893 } else if (MI->getOpcode() == PPC::FADDrtz) { 6894 // This pseudo performs an FADD with rounding mode temporarily forced 6895 // to round-to-zero. We emit this via custom inserter since the FPSCR 6896 // is not modeled at the SelectionDAG level. 6897 unsigned Dest = MI->getOperand(0).getReg(); 6898 unsigned Src1 = MI->getOperand(1).getReg(); 6899 unsigned Src2 = MI->getOperand(2).getReg(); 6900 DebugLoc dl = MI->getDebugLoc(); 6901 6902 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6903 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 6904 6905 // Save FPSCR value. 6906 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 6907 6908 // Set rounding mode to round-to-zero. 6909 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 6910 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 6911 6912 // Perform addition. 6913 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 6914 6915 // Restore FPSCR value. 6916 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); 6917 } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 6918 MI->getOpcode() == PPC::ANDIo_1_GT_BIT || 6919 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 6920 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { 6921 unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 6922 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? 6923 PPC::ANDIo8 : PPC::ANDIo; 6924 bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 6925 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); 6926 6927 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6928 unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? 6929 &PPC::GPRCRegClass : 6930 &PPC::G8RCRegClass); 6931 6932 DebugLoc dl = MI->getDebugLoc(); 6933 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 6934 .addReg(MI->getOperand(1).getReg()).addImm(1); 6935 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 6936 MI->getOperand(0).getReg()) 6937 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 6938 } else { 6939 llvm_unreachable("Unexpected instr type to insert"); 6940 } 6941 6942 MI->eraseFromParent(); // The pseudo instruction is gone now. 6943 return BB; 6944 } 6945 6946 //===----------------------------------------------------------------------===// 6947 // Target Optimization Hooks 6948 //===----------------------------------------------------------------------===// 6949 6950 SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, 6951 DAGCombinerInfo &DCI) const { 6952 if (DCI.isAfterLegalizeVectorOps()) 6953 return SDValue(); 6954 6955 EVT VT = Op.getValueType(); 6956 6957 if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) || 6958 (VT == MVT::f64 && PPCSubTarget.hasFRE()) || 6959 (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) || 6960 (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) { 6961 6962 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 6963 // For the reciprocal, we need to find the zero of the function: 6964 // F(X) = A X - 1 [which has a zero at X = 1/A] 6965 // => 6966 // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 6967 // does not require additional intermediate precision] 6968 6969 // Convergence is quadratic, so we essentially double the number of digits 6970 // correct after every iteration. The minimum architected relative 6971 // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has 6972 // 23 digits and double has 52 digits. 6973 int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; 6974 if (VT.getScalarType() == MVT::f64) 6975 ++Iterations; 6976 6977 SelectionDAG &DAG = DCI.DAG; 6978 SDLoc dl(Op); 6979 6980 SDValue FPOne = 6981 DAG.getConstantFP(1.0, VT.getScalarType()); 6982 if (VT.isVector()) { 6983 assert(VT.getVectorNumElements() == 4 && 6984 "Unknown vector type"); 6985 FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, 6986 FPOne, FPOne, FPOne, FPOne); 6987 } 6988 6989 SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op); 6990 DCI.AddToWorklist(Est.getNode()); 6991 6992 // Newton iterations: Est = Est + Est (1 - Arg * Est) 6993 for (int i = 0; i < Iterations; ++i) { 6994 SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est); 6995 DCI.AddToWorklist(NewEst.getNode()); 6996 6997 NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst); 6998 DCI.AddToWorklist(NewEst.getNode()); 6999 7000 NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); 7001 DCI.AddToWorklist(NewEst.getNode()); 7002 7003 Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst); 7004 DCI.AddToWorklist(Est.getNode()); 7005 } 7006 7007 return Est; 7008 } 7009 7010 return SDValue(); 7011 } 7012 7013 SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, 7014 DAGCombinerInfo &DCI) const { 7015 if (DCI.isAfterLegalizeVectorOps()) 7016 return SDValue(); 7017 7018 EVT VT = Op.getValueType(); 7019 7020 if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) || 7021 (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) || 7022 (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) || 7023 (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) { 7024 7025 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 7026 // For the reciprocal sqrt, we need to find the zero of the function: 7027 // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 7028 // => 7029 // X_{i+1} = X_i (1.5 - A X_i^2 / 2) 7030 // As a result, we precompute A/2 prior to the iteration loop. 7031 7032 // Convergence is quadratic, so we essentially double the number of digits 7033 // correct after every iteration. The minimum architected relative 7034 // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has 7035 // 23 digits and double has 52 digits. 7036 int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; 7037 if (VT.getScalarType() == MVT::f64) 7038 ++Iterations; 7039 7040 SelectionDAG &DAG = DCI.DAG; 7041 SDLoc dl(Op); 7042 7043 SDValue FPThreeHalves = 7044 DAG.getConstantFP(1.5, VT.getScalarType()); 7045 if (VT.isVector()) { 7046 assert(VT.getVectorNumElements() == 4 && 7047 "Unknown vector type"); 7048 FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, 7049 FPThreeHalves, FPThreeHalves, 7050 FPThreeHalves, FPThreeHalves); 7051 } 7052 7053 SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op); 7054 DCI.AddToWorklist(Est.getNode()); 7055 7056 // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that 7057 // this entire sequence requires only one FP constant. 7058 SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op); 7059 DCI.AddToWorklist(HalfArg.getNode()); 7060 7061 HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op); 7062 DCI.AddToWorklist(HalfArg.getNode()); 7063 7064 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 7065 for (int i = 0; i < Iterations; ++i) { 7066 SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est); 7067 DCI.AddToWorklist(NewEst.getNode()); 7068 7069 NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst); 7070 DCI.AddToWorklist(NewEst.getNode()); 7071 7072 NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst); 7073 DCI.AddToWorklist(NewEst.getNode()); 7074 7075 Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); 7076 DCI.AddToWorklist(Est.getNode()); 7077 } 7078 7079 return Est; 7080 } 7081 7082 return SDValue(); 7083 } 7084 7085 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 7086 // not enforce equality of the chain operands. 7087 static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, 7088 unsigned Bytes, int Dist, 7089 SelectionDAG &DAG) { 7090 EVT VT = LS->getMemoryVT(); 7091 if (VT.getSizeInBits() / 8 != Bytes) 7092 return false; 7093 7094 SDValue Loc = LS->getBasePtr(); 7095 SDValue BaseLoc = Base->getBasePtr(); 7096 if (Loc.getOpcode() == ISD::FrameIndex) { 7097 if (BaseLoc.getOpcode() != ISD::FrameIndex) 7098 return false; 7099 const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 7100 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 7101 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 7102 int FS = MFI->getObjectSize(FI); 7103 int BFS = MFI->getObjectSize(BFI); 7104 if (FS != BFS || FS != (int)Bytes) return false; 7105 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); 7106 } 7107 7108 // Handle X+C 7109 if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && 7110 cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) 7111 return true; 7112 7113 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7114 const GlobalValue *GV1 = NULL; 7115 const GlobalValue *GV2 = NULL; 7116 int64_t Offset1 = 0; 7117 int64_t Offset2 = 0; 7118 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 7119 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 7120 if (isGA1 && isGA2 && GV1 == GV2) 7121 return Offset1 == (Offset2 + Dist*Bytes); 7122 return false; 7123 } 7124 7125 // Return true is there is a nearyby consecutive load to the one provided 7126 // (regardless of alignment). We search up and down the chain, looking though 7127 // token factors and other loads (but nothing else). As a result, a true 7128 // results indicates that it is safe to create a new consecutive load adjacent 7129 // to the load provided. 7130 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 7131 SDValue Chain = LD->getChain(); 7132 EVT VT = LD->getMemoryVT(); 7133 7134 SmallSet<SDNode *, 16> LoadRoots; 7135 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 7136 SmallSet<SDNode *, 16> Visited; 7137 7138 // First, search up the chain, branching to follow all token-factor operands. 7139 // If we find a consecutive load, then we're done, otherwise, record all 7140 // nodes just above the top-level loads and token factors. 7141 while (!Queue.empty()) { 7142 SDNode *ChainNext = Queue.pop_back_val(); 7143 if (!Visited.insert(ChainNext)) 7144 continue; 7145 7146 if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) { 7147 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 7148 return true; 7149 7150 if (!Visited.count(ChainLD->getChain().getNode())) 7151 Queue.push_back(ChainLD->getChain().getNode()); 7152 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 7153 for (SDNode::op_iterator O = ChainNext->op_begin(), 7154 OE = ChainNext->op_end(); O != OE; ++O) 7155 if (!Visited.count(O->getNode())) 7156 Queue.push_back(O->getNode()); 7157 } else 7158 LoadRoots.insert(ChainNext); 7159 } 7160 7161 // Second, search down the chain, starting from the top-level nodes recorded 7162 // in the first phase. These top-level nodes are the nodes just above all 7163 // loads and token factors. Starting with their uses, recursively look though 7164 // all loads (just the chain uses) and token factors to find a consecutive 7165 // load. 7166 Visited.clear(); 7167 Queue.clear(); 7168 7169 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 7170 IE = LoadRoots.end(); I != IE; ++I) { 7171 Queue.push_back(*I); 7172 7173 while (!Queue.empty()) { 7174 SDNode *LoadRoot = Queue.pop_back_val(); 7175 if (!Visited.insert(LoadRoot)) 7176 continue; 7177 7178 if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot)) 7179 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 7180 return true; 7181 7182 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 7183 UE = LoadRoot->use_end(); UI != UE; ++UI) 7184 if (((isa<LoadSDNode>(*UI) && 7185 cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) || 7186 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 7187 Queue.push_back(*UI); 7188 } 7189 } 7190 7191 return false; 7192 } 7193 7194 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 7195 DAGCombinerInfo &DCI) const { 7196 SelectionDAG &DAG = DCI.DAG; 7197 SDLoc dl(N); 7198 7199 assert(PPCSubTarget.useCRBits() && 7200 "Expecting to be tracking CR bits"); 7201 // If we're tracking CR bits, we need to be careful that we don't have: 7202 // trunc(binary-ops(zext(x), zext(y))) 7203 // or 7204 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 7205 // such that we're unnecessarily moving things into GPRs when it would be 7206 // better to keep them in CR bits. 7207 7208 // Note that trunc here can be an actual i1 trunc, or can be the effective 7209 // truncation that comes from a setcc or select_cc. 7210 if (N->getOpcode() == ISD::TRUNCATE && 7211 N->getValueType(0) != MVT::i1) 7212 return SDValue(); 7213 7214 if (N->getOperand(0).getValueType() != MVT::i32 && 7215 N->getOperand(0).getValueType() != MVT::i64) 7216 return SDValue(); 7217 7218 if (N->getOpcode() == ISD::SETCC || 7219 N->getOpcode() == ISD::SELECT_CC) { 7220 // If we're looking at a comparison, then we need to make sure that the 7221 // high bits (all except for the first) don't matter the result. 7222 ISD::CondCode CC = 7223 cast<CondCodeSDNode>(N->getOperand( 7224 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 7225 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 7226 7227 if (ISD::isSignedIntSetCC(CC)) { 7228 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 7229 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 7230 return SDValue(); 7231 } else if (ISD::isUnsignedIntSetCC(CC)) { 7232 if (!DAG.MaskedValueIsZero(N->getOperand(0), 7233 APInt::getHighBitsSet(OpBits, OpBits-1)) || 7234 !DAG.MaskedValueIsZero(N->getOperand(1), 7235 APInt::getHighBitsSet(OpBits, OpBits-1))) 7236 return SDValue(); 7237 } else { 7238 // This is neither a signed nor an unsigned comparison, just make sure 7239 // that the high bits are equal. 7240 APInt Op1Zero, Op1One; 7241 APInt Op2Zero, Op2One; 7242 DAG.ComputeMaskedBits(N->getOperand(0), Op1Zero, Op1One); 7243 DAG.ComputeMaskedBits(N->getOperand(1), Op2Zero, Op2One); 7244 7245 // We don't really care about what is known about the first bit (if 7246 // anything), so clear it in all masks prior to comparing them. 7247 Op1Zero.clearBit(0); Op1One.clearBit(0); 7248 Op2Zero.clearBit(0); Op2One.clearBit(0); 7249 7250 if (Op1Zero != Op2Zero || Op1One != Op2One) 7251 return SDValue(); 7252 } 7253 } 7254 7255 // We now know that the higher-order bits are irrelevant, we just need to 7256 // make sure that all of the intermediate operations are bit operations, and 7257 // all inputs are extensions. 7258 if (N->getOperand(0).getOpcode() != ISD::AND && 7259 N->getOperand(0).getOpcode() != ISD::OR && 7260 N->getOperand(0).getOpcode() != ISD::XOR && 7261 N->getOperand(0).getOpcode() != ISD::SELECT && 7262 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 7263 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 7264 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 7265 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 7266 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 7267 return SDValue(); 7268 7269 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 7270 N->getOperand(1).getOpcode() != ISD::AND && 7271 N->getOperand(1).getOpcode() != ISD::OR && 7272 N->getOperand(1).getOpcode() != ISD::XOR && 7273 N->getOperand(1).getOpcode() != ISD::SELECT && 7274 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 7275 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 7276 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 7277 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 7278 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 7279 return SDValue(); 7280 7281 SmallVector<SDValue, 4> Inputs; 7282 SmallVector<SDValue, 8> BinOps, PromOps; 7283 SmallPtrSet<SDNode *, 16> Visited; 7284 7285 for (unsigned i = 0; i < 2; ++i) { 7286 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 7287 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 7288 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 7289 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 7290 isa<ConstantSDNode>(N->getOperand(i))) 7291 Inputs.push_back(N->getOperand(i)); 7292 else 7293 BinOps.push_back(N->getOperand(i)); 7294 7295 if (N->getOpcode() == ISD::TRUNCATE) 7296 break; 7297 } 7298 7299 // Visit all inputs, collect all binary operations (and, or, xor and 7300 // select) that are all fed by extensions. 7301 while (!BinOps.empty()) { 7302 SDValue BinOp = BinOps.back(); 7303 BinOps.pop_back(); 7304 7305 if (!Visited.insert(BinOp.getNode())) 7306 continue; 7307 7308 PromOps.push_back(BinOp); 7309 7310 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 7311 // The condition of the select is not promoted. 7312 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 7313 continue; 7314 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 7315 continue; 7316 7317 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 7318 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 7319 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 7320 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 7321 isa<ConstantSDNode>(BinOp.getOperand(i))) { 7322 Inputs.push_back(BinOp.getOperand(i)); 7323 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 7324 BinOp.getOperand(i).getOpcode() == ISD::OR || 7325 BinOp.getOperand(i).getOpcode() == ISD::XOR || 7326 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 7327 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 7328 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 7329 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 7330 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 7331 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 7332 BinOps.push_back(BinOp.getOperand(i)); 7333 } else { 7334 // We have an input that is not an extension or another binary 7335 // operation; we'll abort this transformation. 7336 return SDValue(); 7337 } 7338 } 7339 } 7340 7341 // Make sure that this is a self-contained cluster of operations (which 7342 // is not quite the same thing as saying that everything has only one 7343 // use). 7344 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 7345 if (isa<ConstantSDNode>(Inputs[i])) 7346 continue; 7347 7348 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 7349 UE = Inputs[i].getNode()->use_end(); 7350 UI != UE; ++UI) { 7351 SDNode *User = *UI; 7352 if (User != N && !Visited.count(User)) 7353 return SDValue(); 7354 7355 // Make sure that we're not going to promote the non-output-value 7356 // operand(s) or SELECT or SELECT_CC. 7357 // FIXME: Although we could sometimes handle this, and it does occur in 7358 // practice that one of the condition inputs to the select is also one of 7359 // the outputs, we currently can't deal with this. 7360 if (User->getOpcode() == ISD::SELECT) { 7361 if (User->getOperand(0) == Inputs[i]) 7362 return SDValue(); 7363 } else if (User->getOpcode() == ISD::SELECT_CC) { 7364 if (User->getOperand(0) == Inputs[i] || 7365 User->getOperand(1) == Inputs[i]) 7366 return SDValue(); 7367 } 7368 } 7369 } 7370 7371 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 7372 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 7373 UE = PromOps[i].getNode()->use_end(); 7374 UI != UE; ++UI) { 7375 SDNode *User = *UI; 7376 if (User != N && !Visited.count(User)) 7377 return SDValue(); 7378 7379 // Make sure that we're not going to promote the non-output-value 7380 // operand(s) or SELECT or SELECT_CC. 7381 // FIXME: Although we could sometimes handle this, and it does occur in 7382 // practice that one of the condition inputs to the select is also one of 7383 // the outputs, we currently can't deal with this. 7384 if (User->getOpcode() == ISD::SELECT) { 7385 if (User->getOperand(0) == PromOps[i]) 7386 return SDValue(); 7387 } else if (User->getOpcode() == ISD::SELECT_CC) { 7388 if (User->getOperand(0) == PromOps[i] || 7389 User->getOperand(1) == PromOps[i]) 7390 return SDValue(); 7391 } 7392 } 7393 } 7394 7395 // Replace all inputs with the extension operand. 7396 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 7397 // Constants may have users outside the cluster of to-be-promoted nodes, 7398 // and so we need to replace those as we do the promotions. 7399 if (isa<ConstantSDNode>(Inputs[i])) 7400 continue; 7401 else 7402 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 7403 } 7404 7405 // Replace all operations (these are all the same, but have a different 7406 // (i1) return type). DAG.getNode will validate that the types of 7407 // a binary operator match, so go through the list in reverse so that 7408 // we've likely promoted both operands first. Any intermediate truncations or 7409 // extensions disappear. 7410 while (!PromOps.empty()) { 7411 SDValue PromOp = PromOps.back(); 7412 PromOps.pop_back(); 7413 7414 if (PromOp.getOpcode() == ISD::TRUNCATE || 7415 PromOp.getOpcode() == ISD::SIGN_EXTEND || 7416 PromOp.getOpcode() == ISD::ZERO_EXTEND || 7417 PromOp.getOpcode() == ISD::ANY_EXTEND) { 7418 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 7419 PromOp.getOperand(0).getValueType() != MVT::i1) { 7420 // The operand is not yet ready (see comment below). 7421 PromOps.insert(PromOps.begin(), PromOp); 7422 continue; 7423 } 7424 7425 SDValue RepValue = PromOp.getOperand(0); 7426 if (isa<ConstantSDNode>(RepValue)) 7427 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 7428 7429 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 7430 continue; 7431 } 7432 7433 unsigned C; 7434 switch (PromOp.getOpcode()) { 7435 default: C = 0; break; 7436 case ISD::SELECT: C = 1; break; 7437 case ISD::SELECT_CC: C = 2; break; 7438 } 7439 7440 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 7441 PromOp.getOperand(C).getValueType() != MVT::i1) || 7442 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 7443 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 7444 // The to-be-promoted operands of this node have not yet been 7445 // promoted (this should be rare because we're going through the 7446 // list backward, but if one of the operands has several users in 7447 // this cluster of to-be-promoted nodes, it is possible). 7448 PromOps.insert(PromOps.begin(), PromOp); 7449 continue; 7450 } 7451 7452 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 7453 PromOp.getNode()->op_end()); 7454 7455 // If there are any constant inputs, make sure they're replaced now. 7456 for (unsigned i = 0; i < 2; ++i) 7457 if (isa<ConstantSDNode>(Ops[C+i])) 7458 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 7459 7460 DAG.ReplaceAllUsesOfValueWith(PromOp, 7461 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, 7462 Ops.data(), Ops.size())); 7463 } 7464 7465 // Now we're left with the initial truncation itself. 7466 if (N->getOpcode() == ISD::TRUNCATE) 7467 return N->getOperand(0); 7468 7469 // Otherwise, this is a comparison. The operands to be compared have just 7470 // changed type (to i1), but everything else is the same. 7471 return SDValue(N, 0); 7472 } 7473 7474 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 7475 DAGCombinerInfo &DCI) const { 7476 SelectionDAG &DAG = DCI.DAG; 7477 SDLoc dl(N); 7478 7479 // If we're tracking CR bits, we need to be careful that we don't have: 7480 // zext(binary-ops(trunc(x), trunc(y))) 7481 // or 7482 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 7483 // such that we're unnecessarily moving things into CR bits that can more 7484 // efficiently stay in GPRs. Note that if we're not certain that the high 7485 // bits are set as required by the final extension, we still may need to do 7486 // some masking to get the proper behavior. 7487 7488 // This same functionality is important on PPC64 when dealing with 7489 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 7490 // the return values of functions. Because it is so similar, it is handled 7491 // here as well. 7492 7493 if (N->getValueType(0) != MVT::i32 && 7494 N->getValueType(0) != MVT::i64) 7495 return SDValue(); 7496 7497 if (!((N->getOperand(0).getValueType() == MVT::i1 && 7498 PPCSubTarget.useCRBits()) || 7499 (N->getOperand(0).getValueType() == MVT::i32 && 7500 PPCSubTarget.isPPC64()))) 7501 return SDValue(); 7502 7503 if (N->getOperand(0).getOpcode() != ISD::AND && 7504 N->getOperand(0).getOpcode() != ISD::OR && 7505 N->getOperand(0).getOpcode() != ISD::XOR && 7506 N->getOperand(0).getOpcode() != ISD::SELECT && 7507 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 7508 return SDValue(); 7509 7510 SmallVector<SDValue, 4> Inputs; 7511 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 7512 SmallPtrSet<SDNode *, 16> Visited; 7513 7514 // Visit all inputs, collect all binary operations (and, or, xor and 7515 // select) that are all fed by truncations. 7516 while (!BinOps.empty()) { 7517 SDValue BinOp = BinOps.back(); 7518 BinOps.pop_back(); 7519 7520 if (!Visited.insert(BinOp.getNode())) 7521 continue; 7522 7523 PromOps.push_back(BinOp); 7524 7525 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 7526 // The condition of the select is not promoted. 7527 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 7528 continue; 7529 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 7530 continue; 7531 7532 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 7533 isa<ConstantSDNode>(BinOp.getOperand(i))) { 7534 Inputs.push_back(BinOp.getOperand(i)); 7535 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 7536 BinOp.getOperand(i).getOpcode() == ISD::OR || 7537 BinOp.getOperand(i).getOpcode() == ISD::XOR || 7538 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 7539 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 7540 BinOps.push_back(BinOp.getOperand(i)); 7541 } else { 7542 // We have an input that is not a truncation or another binary 7543 // operation; we'll abort this transformation. 7544 return SDValue(); 7545 } 7546 } 7547 } 7548 7549 // Make sure that this is a self-contained cluster of operations (which 7550 // is not quite the same thing as saying that everything has only one 7551 // use). 7552 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 7553 if (isa<ConstantSDNode>(Inputs[i])) 7554 continue; 7555 7556 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 7557 UE = Inputs[i].getNode()->use_end(); 7558 UI != UE; ++UI) { 7559 SDNode *User = *UI; 7560 if (User != N && !Visited.count(User)) 7561 return SDValue(); 7562 7563 // Make sure that we're not going to promote the non-output-value 7564 // operand(s) or SELECT or SELECT_CC. 7565 // FIXME: Although we could sometimes handle this, and it does occur in 7566 // practice that one of the condition inputs to the select is also one of 7567 // the outputs, we currently can't deal with this. 7568 if (User->getOpcode() == ISD::SELECT) { 7569 if (User->getOperand(0) == Inputs[i]) 7570 return SDValue(); 7571 } else if (User->getOpcode() == ISD::SELECT_CC) { 7572 if (User->getOperand(0) == Inputs[i] || 7573 User->getOperand(1) == Inputs[i]) 7574 return SDValue(); 7575 } 7576 } 7577 } 7578 7579 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 7580 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 7581 UE = PromOps[i].getNode()->use_end(); 7582 UI != UE; ++UI) { 7583 SDNode *User = *UI; 7584 if (User != N && !Visited.count(User)) 7585 return SDValue(); 7586 7587 // Make sure that we're not going to promote the non-output-value 7588 // operand(s) or SELECT or SELECT_CC. 7589 // FIXME: Although we could sometimes handle this, and it does occur in 7590 // practice that one of the condition inputs to the select is also one of 7591 // the outputs, we currently can't deal with this. 7592 if (User->getOpcode() == ISD::SELECT) { 7593 if (User->getOperand(0) == PromOps[i]) 7594 return SDValue(); 7595 } else if (User->getOpcode() == ISD::SELECT_CC) { 7596 if (User->getOperand(0) == PromOps[i] || 7597 User->getOperand(1) == PromOps[i]) 7598 return SDValue(); 7599 } 7600 } 7601 } 7602 7603 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 7604 bool ReallyNeedsExt = false; 7605 if (N->getOpcode() != ISD::ANY_EXTEND) { 7606 // If all of the inputs are not already sign/zero extended, then 7607 // we'll still need to do that at the end. 7608 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 7609 if (isa<ConstantSDNode>(Inputs[i])) 7610 continue; 7611 7612 unsigned OpBits = 7613 Inputs[i].getOperand(0).getValueSizeInBits(); 7614 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 7615 7616 if ((N->getOpcode() == ISD::ZERO_EXTEND && 7617 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 7618 APInt::getHighBitsSet(OpBits, 7619 OpBits-PromBits))) || 7620 (N->getOpcode() == ISD::SIGN_EXTEND && 7621 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 7622 (OpBits-(PromBits-1)))) { 7623 ReallyNeedsExt = true; 7624 break; 7625 } 7626 } 7627 } 7628 7629 // Replace all inputs, either with the truncation operand, or a 7630 // truncation or extension to the final output type. 7631 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 7632 // Constant inputs need to be replaced with the to-be-promoted nodes that 7633 // use them because they might have users outside of the cluster of 7634 // promoted nodes. 7635 if (isa<ConstantSDNode>(Inputs[i])) 7636 continue; 7637 7638 SDValue InSrc = Inputs[i].getOperand(0); 7639 if (Inputs[i].getValueType() == N->getValueType(0)) 7640 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 7641 else if (N->getOpcode() == ISD::SIGN_EXTEND) 7642 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 7643 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 7644 else if (N->getOpcode() == ISD::ZERO_EXTEND) 7645 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 7646 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 7647 else 7648 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 7649 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 7650 } 7651 7652 // Replace all operations (these are all the same, but have a different 7653 // (promoted) return type). DAG.getNode will validate that the types of 7654 // a binary operator match, so go through the list in reverse so that 7655 // we've likely promoted both operands first. 7656 while (!PromOps.empty()) { 7657 SDValue PromOp = PromOps.back(); 7658 PromOps.pop_back(); 7659 7660 unsigned C; 7661 switch (PromOp.getOpcode()) { 7662 default: C = 0; break; 7663 case ISD::SELECT: C = 1; break; 7664 case ISD::SELECT_CC: C = 2; break; 7665 } 7666 7667 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 7668 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 7669 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 7670 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 7671 // The to-be-promoted operands of this node have not yet been 7672 // promoted (this should be rare because we're going through the 7673 // list backward, but if one of the operands has several users in 7674 // this cluster of to-be-promoted nodes, it is possible). 7675 PromOps.insert(PromOps.begin(), PromOp); 7676 continue; 7677 } 7678 7679 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 7680 PromOp.getNode()->op_end()); 7681 7682 // If this node has constant inputs, then they'll need to be promoted here. 7683 for (unsigned i = 0; i < 2; ++i) { 7684 if (!isa<ConstantSDNode>(Ops[C+i])) 7685 continue; 7686 if (Ops[C+i].getValueType() == N->getValueType(0)) 7687 continue; 7688 7689 if (N->getOpcode() == ISD::SIGN_EXTEND) 7690 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 7691 else if (N->getOpcode() == ISD::ZERO_EXTEND) 7692 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 7693 else 7694 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 7695 } 7696 7697 DAG.ReplaceAllUsesOfValueWith(PromOp, 7698 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), 7699 Ops.data(), Ops.size())); 7700 } 7701 7702 // Now we're left with the initial extension itself. 7703 if (!ReallyNeedsExt) 7704 return N->getOperand(0); 7705 7706 // To zero extend, just mask off everything except for the first bit (in the 7707 // i1 case). 7708 if (N->getOpcode() == ISD::ZERO_EXTEND) 7709 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 7710 DAG.getConstant(APInt::getLowBitsSet( 7711 N->getValueSizeInBits(0), PromBits), 7712 N->getValueType(0))); 7713 7714 assert(N->getOpcode() == ISD::SIGN_EXTEND && 7715 "Invalid extension type"); 7716 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); 7717 SDValue ShiftCst = 7718 DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy); 7719 return DAG.getNode(ISD::SRA, dl, N->getValueType(0), 7720 DAG.getNode(ISD::SHL, dl, N->getValueType(0), 7721 N->getOperand(0), ShiftCst), ShiftCst); 7722 } 7723 7724 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 7725 DAGCombinerInfo &DCI) const { 7726 const TargetMachine &TM = getTargetMachine(); 7727 SelectionDAG &DAG = DCI.DAG; 7728 SDLoc dl(N); 7729 switch (N->getOpcode()) { 7730 default: break; 7731 case PPCISD::SHL: 7732 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 7733 if (C->isNullValue()) // 0 << V -> 0. 7734 return N->getOperand(0); 7735 } 7736 break; 7737 case PPCISD::SRL: 7738 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 7739 if (C->isNullValue()) // 0 >>u V -> 0. 7740 return N->getOperand(0); 7741 } 7742 break; 7743 case PPCISD::SRA: 7744 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 7745 if (C->isNullValue() || // 0 >>s V -> 0. 7746 C->isAllOnesValue()) // -1 >>s V -> -1. 7747 return N->getOperand(0); 7748 } 7749 break; 7750 case ISD::SIGN_EXTEND: 7751 case ISD::ZERO_EXTEND: 7752 case ISD::ANY_EXTEND: 7753 return DAGCombineExtBoolTrunc(N, DCI); 7754 case ISD::TRUNCATE: 7755 case ISD::SETCC: 7756 case ISD::SELECT_CC: 7757 return DAGCombineTruncBoolExt(N, DCI); 7758 case ISD::FDIV: { 7759 assert(TM.Options.UnsafeFPMath && 7760 "Reciprocal estimates require UnsafeFPMath"); 7761 7762 if (N->getOperand(1).getOpcode() == ISD::FSQRT) { 7763 SDValue RV = 7764 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI); 7765 if (RV.getNode() != 0) { 7766 DCI.AddToWorklist(RV.getNode()); 7767 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 7768 N->getOperand(0), RV); 7769 } 7770 } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND && 7771 N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { 7772 SDValue RV = 7773 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), 7774 DCI); 7775 if (RV.getNode() != 0) { 7776 DCI.AddToWorklist(RV.getNode()); 7777 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)), 7778 N->getValueType(0), RV); 7779 DCI.AddToWorklist(RV.getNode()); 7780 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 7781 N->getOperand(0), RV); 7782 } 7783 } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND && 7784 N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { 7785 SDValue RV = 7786 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), 7787 DCI); 7788 if (RV.getNode() != 0) { 7789 DCI.AddToWorklist(RV.getNode()); 7790 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)), 7791 N->getValueType(0), RV, 7792 N->getOperand(1).getOperand(1)); 7793 DCI.AddToWorklist(RV.getNode()); 7794 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 7795 N->getOperand(0), RV); 7796 } 7797 } 7798 7799 SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI); 7800 if (RV.getNode() != 0) { 7801 DCI.AddToWorklist(RV.getNode()); 7802 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 7803 N->getOperand(0), RV); 7804 } 7805 7806 } 7807 break; 7808 case ISD::FSQRT: { 7809 assert(TM.Options.UnsafeFPMath && 7810 "Reciprocal estimates require UnsafeFPMath"); 7811 7812 // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the 7813 // reciprocal sqrt. 7814 SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI); 7815 if (RV.getNode() != 0) { 7816 DCI.AddToWorklist(RV.getNode()); 7817 RV = DAGCombineFastRecip(RV, DCI); 7818 if (RV.getNode() != 0) { 7819 // Unfortunately, RV is now NaN if the input was exactly 0. Select out 7820 // this case and force the answer to 0. 7821 7822 EVT VT = RV.getValueType(); 7823 7824 SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType()); 7825 if (VT.isVector()) { 7826 assert(VT.getVectorNumElements() == 4 && "Unknown vector type"); 7827 Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero); 7828 } 7829 7830 SDValue ZeroCmp = 7831 DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT), 7832 N->getOperand(0), Zero, ISD::SETEQ); 7833 DCI.AddToWorklist(ZeroCmp.getNode()); 7834 DCI.AddToWorklist(RV.getNode()); 7835 7836 RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT, 7837 ZeroCmp, Zero, RV); 7838 return RV; 7839 } 7840 } 7841 7842 } 7843 break; 7844 case ISD::SINT_TO_FP: 7845 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 7846 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 7847 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 7848 // We allow the src/dst to be either f32/f64, but the intermediate 7849 // type must be i64. 7850 if (N->getOperand(0).getValueType() == MVT::i64 && 7851 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 7852 SDValue Val = N->getOperand(0).getOperand(0); 7853 if (Val.getValueType() == MVT::f32) { 7854 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 7855 DCI.AddToWorklist(Val.getNode()); 7856 } 7857 7858 Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val); 7859 DCI.AddToWorklist(Val.getNode()); 7860 Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val); 7861 DCI.AddToWorklist(Val.getNode()); 7862 if (N->getValueType(0) == MVT::f32) { 7863 Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val, 7864 DAG.getIntPtrConstant(0)); 7865 DCI.AddToWorklist(Val.getNode()); 7866 } 7867 return Val; 7868 } else if (N->getOperand(0).getValueType() == MVT::i32) { 7869 // If the intermediate type is i32, we can avoid the load/store here 7870 // too. 7871 } 7872 } 7873 } 7874 break; 7875 case ISD::STORE: 7876 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 7877 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 7878 !cast<StoreSDNode>(N)->isTruncatingStore() && 7879 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 7880 N->getOperand(1).getValueType() == MVT::i32 && 7881 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 7882 SDValue Val = N->getOperand(1).getOperand(0); 7883 if (Val.getValueType() == MVT::f32) { 7884 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 7885 DCI.AddToWorklist(Val.getNode()); 7886 } 7887 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 7888 DCI.AddToWorklist(Val.getNode()); 7889 7890 SDValue Ops[] = { 7891 N->getOperand(0), Val, N->getOperand(2), 7892 DAG.getValueType(N->getOperand(1).getValueType()) 7893 }; 7894 7895 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 7896 DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), 7897 cast<StoreSDNode>(N)->getMemoryVT(), 7898 cast<StoreSDNode>(N)->getMemOperand()); 7899 DCI.AddToWorklist(Val.getNode()); 7900 return Val; 7901 } 7902 7903 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 7904 if (cast<StoreSDNode>(N)->isUnindexed() && 7905 N->getOperand(1).getOpcode() == ISD::BSWAP && 7906 N->getOperand(1).getNode()->hasOneUse() && 7907 (N->getOperand(1).getValueType() == MVT::i32 || 7908 N->getOperand(1).getValueType() == MVT::i16 || 7909 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 7910 TM.getSubtarget<PPCSubtarget>().isPPC64() && 7911 N->getOperand(1).getValueType() == MVT::i64))) { 7912 SDValue BSwapOp = N->getOperand(1).getOperand(0); 7913 // Do an any-extend to 32-bits if this is a half-word input. 7914 if (BSwapOp.getValueType() == MVT::i16) 7915 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 7916 7917 SDValue Ops[] = { 7918 N->getOperand(0), BSwapOp, N->getOperand(2), 7919 DAG.getValueType(N->getOperand(1).getValueType()) 7920 }; 7921 return 7922 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 7923 Ops, array_lengthof(Ops), 7924 cast<StoreSDNode>(N)->getMemoryVT(), 7925 cast<StoreSDNode>(N)->getMemOperand()); 7926 } 7927 break; 7928 case ISD::LOAD: { 7929 LoadSDNode *LD = cast<LoadSDNode>(N); 7930 EVT VT = LD->getValueType(0); 7931 Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); 7932 unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); 7933 if (ISD::isNON_EXTLoad(N) && VT.isVector() && 7934 TM.getSubtarget<PPCSubtarget>().hasAltivec() && 7935 // FIXME: Update this for VSX! 7936 (VT == MVT::v16i8 || VT == MVT::v8i16 || 7937 VT == MVT::v4i32 || VT == MVT::v4f32) && 7938 LD->getAlignment() < ABIAlignment) { 7939 // This is a type-legal unaligned Altivec load. 7940 SDValue Chain = LD->getChain(); 7941 SDValue Ptr = LD->getBasePtr(); 7942 7943 // This implements the loading of unaligned vectors as described in 7944 // the venerable Apple Velocity Engine overview. Specifically: 7945 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 7946 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 7947 // 7948 // The general idea is to expand a sequence of one or more unaligned 7949 // loads into a alignment-based permutation-control instruction (lvsl), 7950 // a series of regular vector loads (which always truncate their 7951 // input address to an aligned address), and a series of permutations. 7952 // The results of these permutations are the requested loaded values. 7953 // The trick is that the last "extra" load is not taken from the address 7954 // you might suspect (sizeof(vector) bytes after the last requested 7955 // load), but rather sizeof(vector) - 1 bytes after the last 7956 // requested vector. The point of this is to avoid a page fault if the 7957 // base address happened to be aligned. This works because if the base 7958 // address is aligned, then adding less than a full vector length will 7959 // cause the last vector in the sequence to be (re)loaded. Otherwise, 7960 // the next vector will be fetched as you might suspect was necessary. 7961 7962 // We might be able to reuse the permutation generation from 7963 // a different base address offset from this one by an aligned amount. 7964 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 7965 // optimization later. 7966 SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr, 7967 DAG, dl, MVT::v16i8); 7968 7969 // Refine the alignment of the original load (a "new" load created here 7970 // which was identical to the first except for the alignment would be 7971 // merged with the existing node regardless). 7972 MachineFunction &MF = DAG.getMachineFunction(); 7973 MachineMemOperand *MMO = 7974 MF.getMachineMemOperand(LD->getPointerInfo(), 7975 LD->getMemOperand()->getFlags(), 7976 LD->getMemoryVT().getStoreSize(), 7977 ABIAlignment); 7978 LD->refineAlignment(MMO); 7979 SDValue BaseLoad = SDValue(LD, 0); 7980 7981 // Note that the value of IncOffset (which is provided to the next 7982 // load's pointer info offset value, and thus used to calculate the 7983 // alignment), and the value of IncValue (which is actually used to 7984 // increment the pointer value) are different! This is because we 7985 // require the next load to appear to be aligned, even though it 7986 // is actually offset from the base pointer by a lesser amount. 7987 int IncOffset = VT.getSizeInBits() / 8; 7988 int IncValue = IncOffset; 7989 7990 // Walk (both up and down) the chain looking for another load at the real 7991 // (aligned) offset (the alignment of the other load does not matter in 7992 // this case). If found, then do not use the offset reduction trick, as 7993 // that will prevent the loads from being later combined (as they would 7994 // otherwise be duplicates). 7995 if (!findConsecutiveLoad(LD, DAG)) 7996 --IncValue; 7997 7998 SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); 7999 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 8000 8001 SDValue ExtraLoad = 8002 DAG.getLoad(VT, dl, Chain, Ptr, 8003 LD->getPointerInfo().getWithOffset(IncOffset), 8004 LD->isVolatile(), LD->isNonTemporal(), 8005 LD->isInvariant(), ABIAlignment); 8006 8007 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 8008 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 8009 8010 if (BaseLoad.getValueType() != MVT::v4i32) 8011 BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad); 8012 8013 if (ExtraLoad.getValueType() != MVT::v4i32) 8014 ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad); 8015 8016 SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, 8017 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 8018 8019 if (VT != MVT::v4i32) 8020 Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); 8021 8022 // Now we need to be really careful about how we update the users of the 8023 // original load. We cannot just call DCI.CombineTo (or 8024 // DAG.ReplaceAllUsesWith for that matter), because the load still has 8025 // uses created here (the permutation for example) that need to stay. 8026 SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 8027 while (UI != UE) { 8028 SDUse &Use = UI.getUse(); 8029 SDNode *User = *UI; 8030 // Note: BaseLoad is checked here because it might not be N, but a 8031 // bitcast of N. 8032 if (User == Perm.getNode() || User == BaseLoad.getNode() || 8033 User == TF.getNode() || Use.getResNo() > 1) { 8034 ++UI; 8035 continue; 8036 } 8037 8038 SDValue To = Use.getResNo() ? TF : Perm; 8039 ++UI; 8040 8041 SmallVector<SDValue, 8> Ops; 8042 for (SDNode::op_iterator O = User->op_begin(), 8043 OE = User->op_end(); O != OE; ++O) { 8044 if (*O == Use) 8045 Ops.push_back(To); 8046 else 8047 Ops.push_back(*O); 8048 } 8049 8050 DAG.UpdateNodeOperands(User, Ops.data(), Ops.size()); 8051 } 8052 8053 return SDValue(N, 0); 8054 } 8055 } 8056 break; 8057 case ISD::INTRINSIC_WO_CHAIN: 8058 if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == 8059 Intrinsic::ppc_altivec_lvsl && 8060 N->getOperand(1)->getOpcode() == ISD::ADD) { 8061 SDValue Add = N->getOperand(1); 8062 8063 if (DAG.MaskedValueIsZero(Add->getOperand(1), 8064 APInt::getAllOnesValue(4 /* 16 byte alignment */).zext( 8065 Add.getValueType().getScalarType().getSizeInBits()))) { 8066 SDNode *BasePtr = Add->getOperand(0).getNode(); 8067 for (SDNode::use_iterator UI = BasePtr->use_begin(), 8068 UE = BasePtr->use_end(); UI != UE; ++UI) { 8069 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 8070 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == 8071 Intrinsic::ppc_altivec_lvsl) { 8072 // We've found another LVSL, and this address if an aligned 8073 // multiple of that one. The results will be the same, so use the 8074 // one we've just found instead. 8075 8076 return SDValue(*UI, 0); 8077 } 8078 } 8079 } 8080 } 8081 8082 break; 8083 case ISD::BSWAP: 8084 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 8085 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 8086 N->getOperand(0).hasOneUse() && 8087 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 8088 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 8089 TM.getSubtarget<PPCSubtarget>().isPPC64() && 8090 N->getValueType(0) == MVT::i64))) { 8091 SDValue Load = N->getOperand(0); 8092 LoadSDNode *LD = cast<LoadSDNode>(Load); 8093 // Create the byte-swapping load. 8094 SDValue Ops[] = { 8095 LD->getChain(), // Chain 8096 LD->getBasePtr(), // Ptr 8097 DAG.getValueType(N->getValueType(0)) // VT 8098 }; 8099 SDValue BSLoad = 8100 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 8101 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 8102 MVT::i64 : MVT::i32, MVT::Other), 8103 Ops, 3, LD->getMemoryVT(), LD->getMemOperand()); 8104 8105 // If this is an i16 load, insert the truncate. 8106 SDValue ResVal = BSLoad; 8107 if (N->getValueType(0) == MVT::i16) 8108 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 8109 8110 // First, combine the bswap away. This makes the value produced by the 8111 // load dead. 8112 DCI.CombineTo(N, ResVal); 8113 8114 // Next, combine the load away, we give it a bogus result value but a real 8115 // chain result. The result value is dead because the bswap is dead. 8116 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 8117 8118 // Return N so it doesn't get rechecked! 8119 return SDValue(N, 0); 8120 } 8121 8122 break; 8123 case PPCISD::VCMP: { 8124 // If a VCMPo node already exists with exactly the same operands as this 8125 // node, use its result instead of this node (VCMPo computes both a CR6 and 8126 // a normal output). 8127 // 8128 if (!N->getOperand(0).hasOneUse() && 8129 !N->getOperand(1).hasOneUse() && 8130 !N->getOperand(2).hasOneUse()) { 8131 8132 // Scan all of the users of the LHS, looking for VCMPo's that match. 8133 SDNode *VCMPoNode = 0; 8134 8135 SDNode *LHSN = N->getOperand(0).getNode(); 8136 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 8137 UI != E; ++UI) 8138 if (UI->getOpcode() == PPCISD::VCMPo && 8139 UI->getOperand(1) == N->getOperand(1) && 8140 UI->getOperand(2) == N->getOperand(2) && 8141 UI->getOperand(0) == N->getOperand(0)) { 8142 VCMPoNode = *UI; 8143 break; 8144 } 8145 8146 // If there is no VCMPo node, or if the flag value has a single use, don't 8147 // transform this. 8148 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 8149 break; 8150 8151 // Look at the (necessarily single) use of the flag value. If it has a 8152 // chain, this transformation is more complex. Note that multiple things 8153 // could use the value result, which we should ignore. 8154 SDNode *FlagUser = 0; 8155 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 8156 FlagUser == 0; ++UI) { 8157 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 8158 SDNode *User = *UI; 8159 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 8160 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 8161 FlagUser = User; 8162 break; 8163 } 8164 } 8165 } 8166 8167 // If the user is a MFOCRF instruction, we know this is safe. 8168 // Otherwise we give up for right now. 8169 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 8170 return SDValue(VCMPoNode, 0); 8171 } 8172 break; 8173 } 8174 case ISD::BRCOND: { 8175 SDValue Cond = N->getOperand(1); 8176 SDValue Target = N->getOperand(2); 8177 8178 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 8179 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 8180 Intrinsic::ppc_is_decremented_ctr_nonzero) { 8181 8182 // We now need to make the intrinsic dead (it cannot be instruction 8183 // selected). 8184 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 8185 assert(Cond.getNode()->hasOneUse() && 8186 "Counter decrement has more than one use"); 8187 8188 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 8189 N->getOperand(0), Target); 8190 } 8191 } 8192 break; 8193 case ISD::BR_CC: { 8194 // If this is a branch on an altivec predicate comparison, lower this so 8195 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 8196 // lowering is done pre-legalize, because the legalizer lowers the predicate 8197 // compare down to code that is difficult to reassemble. 8198 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 8199 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 8200 8201 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 8202 // value. If so, pass-through the AND to get to the intrinsic. 8203 if (LHS.getOpcode() == ISD::AND && 8204 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 8205 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 8206 Intrinsic::ppc_is_decremented_ctr_nonzero && 8207 isa<ConstantSDNode>(LHS.getOperand(1)) && 8208 !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()-> 8209 isZero()) 8210 LHS = LHS.getOperand(0); 8211 8212 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 8213 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 8214 Intrinsic::ppc_is_decremented_ctr_nonzero && 8215 isa<ConstantSDNode>(RHS)) { 8216 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 8217 "Counter decrement comparison is not EQ or NE"); 8218 8219 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 8220 bool isBDNZ = (CC == ISD::SETEQ && Val) || 8221 (CC == ISD::SETNE && !Val); 8222 8223 // We now need to make the intrinsic dead (it cannot be instruction 8224 // selected). 8225 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 8226 assert(LHS.getNode()->hasOneUse() && 8227 "Counter decrement has more than one use"); 8228 8229 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 8230 N->getOperand(0), N->getOperand(4)); 8231 } 8232 8233 int CompareOpc; 8234 bool isDot; 8235 8236 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 8237 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 8238 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 8239 assert(isDot && "Can't compare against a vector result!"); 8240 8241 // If this is a comparison against something other than 0/1, then we know 8242 // that the condition is never/always true. 8243 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 8244 if (Val != 0 && Val != 1) { 8245 if (CC == ISD::SETEQ) // Cond never true, remove branch. 8246 return N->getOperand(0); 8247 // Always !=, turn it into an unconditional branch. 8248 return DAG.getNode(ISD::BR, dl, MVT::Other, 8249 N->getOperand(0), N->getOperand(4)); 8250 } 8251 8252 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 8253 8254 // Create the PPCISD altivec 'dot' comparison node. 8255 SDValue Ops[] = { 8256 LHS.getOperand(2), // LHS of compare 8257 LHS.getOperand(3), // RHS of compare 8258 DAG.getConstant(CompareOpc, MVT::i32) 8259 }; 8260 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 8261 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 8262 8263 // Unpack the result based on how the target uses it. 8264 PPC::Predicate CompOpc; 8265 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 8266 default: // Can't happen, don't crash on invalid number though. 8267 case 0: // Branch on the value of the EQ bit of CR6. 8268 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 8269 break; 8270 case 1: // Branch on the inverted value of the EQ bit of CR6. 8271 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 8272 break; 8273 case 2: // Branch on the value of the LT bit of CR6. 8274 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 8275 break; 8276 case 3: // Branch on the inverted value of the LT bit of CR6. 8277 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 8278 break; 8279 } 8280 8281 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 8282 DAG.getConstant(CompOpc, MVT::i32), 8283 DAG.getRegister(PPC::CR6, MVT::i32), 8284 N->getOperand(4), CompNode.getValue(1)); 8285 } 8286 break; 8287 } 8288 } 8289 8290 return SDValue(); 8291 } 8292 8293 //===----------------------------------------------------------------------===// 8294 // Inline Assembly Support 8295 //===----------------------------------------------------------------------===// 8296 8297 void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 8298 APInt &KnownZero, 8299 APInt &KnownOne, 8300 const SelectionDAG &DAG, 8301 unsigned Depth) const { 8302 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 8303 switch (Op.getOpcode()) { 8304 default: break; 8305 case PPCISD::LBRX: { 8306 // lhbrx is known to have the top bits cleared out. 8307 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 8308 KnownZero = 0xFFFF0000; 8309 break; 8310 } 8311 case ISD::INTRINSIC_WO_CHAIN: { 8312 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 8313 default: break; 8314 case Intrinsic::ppc_altivec_vcmpbfp_p: 8315 case Intrinsic::ppc_altivec_vcmpeqfp_p: 8316 case Intrinsic::ppc_altivec_vcmpequb_p: 8317 case Intrinsic::ppc_altivec_vcmpequh_p: 8318 case Intrinsic::ppc_altivec_vcmpequw_p: 8319 case Intrinsic::ppc_altivec_vcmpgefp_p: 8320 case Intrinsic::ppc_altivec_vcmpgtfp_p: 8321 case Intrinsic::ppc_altivec_vcmpgtsb_p: 8322 case Intrinsic::ppc_altivec_vcmpgtsh_p: 8323 case Intrinsic::ppc_altivec_vcmpgtsw_p: 8324 case Intrinsic::ppc_altivec_vcmpgtub_p: 8325 case Intrinsic::ppc_altivec_vcmpgtuh_p: 8326 case Intrinsic::ppc_altivec_vcmpgtuw_p: 8327 KnownZero = ~1U; // All bits but the low one are known to be zero. 8328 break; 8329 } 8330 } 8331 } 8332 } 8333 8334 8335 /// getConstraintType - Given a constraint, return the type of 8336 /// constraint it is for this target. 8337 PPCTargetLowering::ConstraintType 8338 PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 8339 if (Constraint.size() == 1) { 8340 switch (Constraint[0]) { 8341 default: break; 8342 case 'b': 8343 case 'r': 8344 case 'f': 8345 case 'v': 8346 case 'y': 8347 return C_RegisterClass; 8348 case 'Z': 8349 // FIXME: While Z does indicate a memory constraint, it specifically 8350 // indicates an r+r address (used in conjunction with the 'y' modifier 8351 // in the replacement string). Currently, we're forcing the base 8352 // register to be r0 in the asm printer (which is interpreted as zero) 8353 // and forming the complete address in the second register. This is 8354 // suboptimal. 8355 return C_Memory; 8356 } 8357 } else if (Constraint == "wc") { // individual CR bits. 8358 return C_RegisterClass; 8359 } else if (Constraint == "wa" || Constraint == "wd" || 8360 Constraint == "wf" || Constraint == "ws") { 8361 return C_RegisterClass; // VSX registers. 8362 } 8363 return TargetLowering::getConstraintType(Constraint); 8364 } 8365 8366 /// Examine constraint type and operand type and determine a weight value. 8367 /// This object must already have been set up with the operand type 8368 /// and the current alternative constraint selected. 8369 TargetLowering::ConstraintWeight 8370 PPCTargetLowering::getSingleConstraintMatchWeight( 8371 AsmOperandInfo &info, const char *constraint) const { 8372 ConstraintWeight weight = CW_Invalid; 8373 Value *CallOperandVal = info.CallOperandVal; 8374 // If we don't have a value, we can't do a match, 8375 // but allow it at the lowest weight. 8376 if (CallOperandVal == NULL) 8377 return CW_Default; 8378 Type *type = CallOperandVal->getType(); 8379 8380 // Look at the constraint type. 8381 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 8382 return CW_Register; // an individual CR bit. 8383 else if ((StringRef(constraint) == "wa" || 8384 StringRef(constraint) == "wd" || 8385 StringRef(constraint) == "wf") && 8386 type->isVectorTy()) 8387 return CW_Register; 8388 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 8389 return CW_Register; 8390 8391 switch (*constraint) { 8392 default: 8393 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 8394 break; 8395 case 'b': 8396 if (type->isIntegerTy()) 8397 weight = CW_Register; 8398 break; 8399 case 'f': 8400 if (type->isFloatTy()) 8401 weight = CW_Register; 8402 break; 8403 case 'd': 8404 if (type->isDoubleTy()) 8405 weight = CW_Register; 8406 break; 8407 case 'v': 8408 if (type->isVectorTy()) 8409 weight = CW_Register; 8410 break; 8411 case 'y': 8412 weight = CW_Register; 8413 break; 8414 case 'Z': 8415 weight = CW_Memory; 8416 break; 8417 } 8418 return weight; 8419 } 8420 8421 std::pair<unsigned, const TargetRegisterClass*> 8422 PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 8423 MVT VT) const { 8424 if (Constraint.size() == 1) { 8425 // GCC RS6000 Constraint Letters 8426 switch (Constraint[0]) { 8427 case 'b': // R1-R31 8428 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 8429 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 8430 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 8431 case 'r': // R0-R31 8432 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 8433 return std::make_pair(0U, &PPC::G8RCRegClass); 8434 return std::make_pair(0U, &PPC::GPRCRegClass); 8435 case 'f': 8436 if (VT == MVT::f32 || VT == MVT::i32) 8437 return std::make_pair(0U, &PPC::F4RCRegClass); 8438 if (VT == MVT::f64 || VT == MVT::i64) 8439 return std::make_pair(0U, &PPC::F8RCRegClass); 8440 break; 8441 case 'v': 8442 return std::make_pair(0U, &PPC::VRRCRegClass); 8443 case 'y': // crrc 8444 return std::make_pair(0U, &PPC::CRRCRegClass); 8445 } 8446 } else if (Constraint == "wc") { // an individual CR bit. 8447 return std::make_pair(0U, &PPC::CRBITRCRegClass); 8448 } else if (Constraint == "wa" || Constraint == "wd" || 8449 Constraint == "wf" || Constraint == "ws") { 8450 return std::make_pair(0U, &PPC::VSRCRegClass); 8451 } 8452 8453 std::pair<unsigned, const TargetRegisterClass*> R = 8454 TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 8455 8456 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 8457 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 8458 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 8459 // register. 8460 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 8461 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 8462 if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() && 8463 PPC::GPRCRegClass.contains(R.first)) { 8464 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 8465 return std::make_pair(TRI->getMatchingSuperReg(R.first, 8466 PPC::sub_32, &PPC::G8RCRegClass), 8467 &PPC::G8RCRegClass); 8468 } 8469 8470 return R; 8471 } 8472 8473 8474 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 8475 /// vector. If it is invalid, don't add anything to Ops. 8476 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 8477 std::string &Constraint, 8478 std::vector<SDValue>&Ops, 8479 SelectionDAG &DAG) const { 8480 SDValue Result(0,0); 8481 8482 // Only support length 1 constraints. 8483 if (Constraint.length() > 1) return; 8484 8485 char Letter = Constraint[0]; 8486 switch (Letter) { 8487 default: break; 8488 case 'I': 8489 case 'J': 8490 case 'K': 8491 case 'L': 8492 case 'M': 8493 case 'N': 8494 case 'O': 8495 case 'P': { 8496 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 8497 if (!CST) return; // Must be an immediate to match. 8498 unsigned Value = CST->getZExtValue(); 8499 switch (Letter) { 8500 default: llvm_unreachable("Unknown constraint letter!"); 8501 case 'I': // "I" is a signed 16-bit constant. 8502 if ((short)Value == (int)Value) 8503 Result = DAG.getTargetConstant(Value, Op.getValueType()); 8504 break; 8505 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 8506 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 8507 if ((short)Value == 0) 8508 Result = DAG.getTargetConstant(Value, Op.getValueType()); 8509 break; 8510 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 8511 if ((Value >> 16) == 0) 8512 Result = DAG.getTargetConstant(Value, Op.getValueType()); 8513 break; 8514 case 'M': // "M" is a constant that is greater than 31. 8515 if (Value > 31) 8516 Result = DAG.getTargetConstant(Value, Op.getValueType()); 8517 break; 8518 case 'N': // "N" is a positive constant that is an exact power of two. 8519 if ((int)Value > 0 && isPowerOf2_32(Value)) 8520 Result = DAG.getTargetConstant(Value, Op.getValueType()); 8521 break; 8522 case 'O': // "O" is the constant zero. 8523 if (Value == 0) 8524 Result = DAG.getTargetConstant(Value, Op.getValueType()); 8525 break; 8526 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 8527 if ((short)-Value == (int)-Value) 8528 Result = DAG.getTargetConstant(Value, Op.getValueType()); 8529 break; 8530 } 8531 break; 8532 } 8533 } 8534 8535 if (Result.getNode()) { 8536 Ops.push_back(Result); 8537 return; 8538 } 8539 8540 // Handle standard constraint letters. 8541 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 8542 } 8543 8544 // isLegalAddressingMode - Return true if the addressing mode represented 8545 // by AM is legal for this target, for a load/store of the specified type. 8546 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 8547 Type *Ty) const { 8548 // FIXME: PPC does not allow r+i addressing modes for vectors! 8549 8550 // PPC allows a sign-extended 16-bit immediate field. 8551 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 8552 return false; 8553 8554 // No global is ever allowed as a base. 8555 if (AM.BaseGV) 8556 return false; 8557 8558 // PPC only support r+r, 8559 switch (AM.Scale) { 8560 case 0: // "r+i" or just "i", depending on HasBaseReg. 8561 break; 8562 case 1: 8563 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 8564 return false; 8565 // Otherwise we have r+r or r+i. 8566 break; 8567 case 2: 8568 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 8569 return false; 8570 // Allow 2*r as r+r. 8571 break; 8572 default: 8573 // No other scales are supported. 8574 return false; 8575 } 8576 8577 return true; 8578 } 8579 8580 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 8581 SelectionDAG &DAG) const { 8582 MachineFunction &MF = DAG.getMachineFunction(); 8583 MachineFrameInfo *MFI = MF.getFrameInfo(); 8584 MFI->setReturnAddressIsTaken(true); 8585 8586 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 8587 return SDValue(); 8588 8589 SDLoc dl(Op); 8590 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 8591 8592 // Make sure the function does not optimize away the store of the RA to 8593 // the stack. 8594 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 8595 FuncInfo->setLRStoreRequired(); 8596 bool isPPC64 = PPCSubTarget.isPPC64(); 8597 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 8598 8599 if (Depth > 0) { 8600 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 8601 SDValue Offset = 8602 8603 DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), 8604 isPPC64? MVT::i64 : MVT::i32); 8605 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 8606 DAG.getNode(ISD::ADD, dl, getPointerTy(), 8607 FrameAddr, Offset), 8608 MachinePointerInfo(), false, false, false, 0); 8609 } 8610 8611 // Just load the return address off the stack. 8612 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 8613 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 8614 RetAddrFI, MachinePointerInfo(), false, false, false, 0); 8615 } 8616 8617 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 8618 SelectionDAG &DAG) const { 8619 SDLoc dl(Op); 8620 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 8621 8622 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 8623 bool isPPC64 = PtrVT == MVT::i64; 8624 8625 MachineFunction &MF = DAG.getMachineFunction(); 8626 MachineFrameInfo *MFI = MF.getFrameInfo(); 8627 MFI->setFrameAddressIsTaken(true); 8628 8629 // Naked functions never have a frame pointer, and so we use r1. For all 8630 // other functions, this decision must be delayed until during PEI. 8631 unsigned FrameReg; 8632 if (MF.getFunction()->getAttributes().hasAttribute( 8633 AttributeSet::FunctionIndex, Attribute::Naked)) 8634 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 8635 else 8636 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 8637 8638 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 8639 PtrVT); 8640 while (Depth--) 8641 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 8642 FrameAddr, MachinePointerInfo(), false, false, 8643 false, 0); 8644 return FrameAddr; 8645 } 8646 8647 bool 8648 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 8649 // The PowerPC target isn't yet aware of offsets. 8650 return false; 8651 } 8652 8653 /// getOptimalMemOpType - Returns the target specific optimal type for load 8654 /// and store operations as a result of memset, memcpy, and memmove 8655 /// lowering. If DstAlign is zero that means it's safe to destination 8656 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 8657 /// means there isn't a need to check it against alignment requirement, 8658 /// probably because the source does not need to be loaded. If 'IsMemset' is 8659 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 8660 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 8661 /// source is constant so it does not need to be loaded. 8662 /// It returns EVT::Other if the type should be determined using generic 8663 /// target-independent logic. 8664 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 8665 unsigned DstAlign, unsigned SrcAlign, 8666 bool IsMemset, bool ZeroMemset, 8667 bool MemcpyStrSrc, 8668 MachineFunction &MF) const { 8669 if (this->PPCSubTarget.isPPC64()) { 8670 return MVT::i64; 8671 } else { 8672 return MVT::i32; 8673 } 8674 } 8675 8676 bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, 8677 unsigned, 8678 bool *Fast) const { 8679 if (DisablePPCUnaligned) 8680 return false; 8681 8682 // PowerPC supports unaligned memory access for simple non-vector types. 8683 // Although accessing unaligned addresses is not as efficient as accessing 8684 // aligned addresses, it is generally more efficient than manual expansion, 8685 // and generally only traps for software emulation when crossing page 8686 // boundaries. 8687 8688 if (!VT.isSimple()) 8689 return false; 8690 8691 if (VT.getSimpleVT().isVector()) 8692 return false; 8693 8694 if (VT == MVT::ppcf128) 8695 return false; 8696 8697 if (Fast) 8698 *Fast = true; 8699 8700 return true; 8701 } 8702 8703 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 8704 VT = VT.getScalarType(); 8705 8706 if (!VT.isSimple()) 8707 return false; 8708 8709 switch (VT.getSimpleVT().SimpleTy) { 8710 case MVT::f32: 8711 case MVT::f64: 8712 return true; 8713 default: 8714 break; 8715 } 8716 8717 return false; 8718 } 8719 8720 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 8721 if (DisableILPPref || PPCSubTarget.enableMachineScheduler()) 8722 return TargetLowering::getSchedulingPreference(N); 8723 8724 return Sched::ILP; 8725 } 8726 8727 // Create a fast isel object. 8728 FastISel * 8729 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 8730 const TargetLibraryInfo *LibInfo) const { 8731 return PPC::createFastISel(FuncInfo, LibInfo); 8732 } 8733