1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PPCISelLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCISelLowering.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPCMachineFunctionInfo.h" 17 #include "PPCPerfectShuffle.h" 18 #include "PPCTargetMachine.h" 19 #include "PPCTargetObjectFile.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringSwitch.h" 22 #include "llvm/ADT/Triple.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/SelectionDAG.h" 29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 30 #include "llvm/IR/CallingConv.h" 31 #include "llvm/IR/Constants.h" 32 #include "llvm/IR/DerivedTypes.h" 33 #include "llvm/IR/Function.h" 34 #include "llvm/IR/Intrinsics.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/MathExtras.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Target/TargetOptions.h" 40 using namespace llvm; 41 42 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 43 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 44 45 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 46 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 47 48 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 49 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 50 51 // FIXME: Remove this once the bug has been fixed! 52 extern cl::opt<bool> ANDIGlueBug; 53 54 static TargetLoweringObjectFile *createTLOF(const Triple &TT) { 55 // If it isn't a Mach-O file then it's going to be a linux ELF 56 // object file. 57 if (TT.isOSDarwin()) 58 return new TargetLoweringObjectFileMachO(); 59 60 return new PPC64LinuxTargetObjectFile(); 61 } 62 63 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 64 : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))), 65 Subtarget(*TM.getSubtargetImpl()) { 66 setPow2DivIsCheap(); 67 68 // Use _setjmp/_longjmp instead of setjmp/longjmp. 69 setUseUnderscoreSetJmp(true); 70 setUseUnderscoreLongJmp(true); 71 72 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 73 // arguments are at least 4/8 bytes aligned. 74 bool isPPC64 = Subtarget.isPPC64(); 75 setMinStackArgumentAlignment(isPPC64 ? 8:4); 76 77 // Set up the register classes. 78 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 79 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 80 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 81 82 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 83 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 84 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); 85 86 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 87 88 // PowerPC has pre-inc load and store's. 89 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 90 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 91 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 92 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 93 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 94 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 95 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 96 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 97 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 98 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 99 100 if (Subtarget.useCRBits()) { 101 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 102 103 if (isPPC64 || Subtarget.hasFPCVT()) { 104 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 105 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 106 isPPC64 ? MVT::i64 : MVT::i32); 107 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 108 AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 109 isPPC64 ? MVT::i64 : MVT::i32); 110 } else { 111 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 112 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 113 } 114 115 // PowerPC does not support direct load / store of condition registers 116 setOperationAction(ISD::LOAD, MVT::i1, Custom); 117 setOperationAction(ISD::STORE, MVT::i1, Custom); 118 119 // FIXME: Remove this once the ANDI glue bug is fixed: 120 if (ANDIGlueBug) 121 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 122 123 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 124 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 125 setTruncStoreAction(MVT::i64, MVT::i1, Expand); 126 setTruncStoreAction(MVT::i32, MVT::i1, Expand); 127 setTruncStoreAction(MVT::i16, MVT::i1, Expand); 128 setTruncStoreAction(MVT::i8, MVT::i1, Expand); 129 130 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 131 } 132 133 // This is used in the ppcf128->int sequence. Note it has different semantics 134 // from FP_ROUND: that rounds to nearest, this rounds to zero. 135 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 136 137 // We do not currently implement these libm ops for PowerPC. 138 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 139 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 140 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 141 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 142 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 143 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 144 145 // PowerPC has no SREM/UREM instructions 146 setOperationAction(ISD::SREM, MVT::i32, Expand); 147 setOperationAction(ISD::UREM, MVT::i32, Expand); 148 setOperationAction(ISD::SREM, MVT::i64, Expand); 149 setOperationAction(ISD::UREM, MVT::i64, Expand); 150 151 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 152 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 153 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 154 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 155 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 156 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 157 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 158 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 159 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 160 161 // We don't support sin/cos/sqrt/fmod/pow 162 setOperationAction(ISD::FSIN , MVT::f64, Expand); 163 setOperationAction(ISD::FCOS , MVT::f64, Expand); 164 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 165 setOperationAction(ISD::FREM , MVT::f64, Expand); 166 setOperationAction(ISD::FPOW , MVT::f64, Expand); 167 setOperationAction(ISD::FMA , MVT::f64, Legal); 168 setOperationAction(ISD::FSIN , MVT::f32, Expand); 169 setOperationAction(ISD::FCOS , MVT::f32, Expand); 170 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 171 setOperationAction(ISD::FREM , MVT::f32, Expand); 172 setOperationAction(ISD::FPOW , MVT::f32, Expand); 173 setOperationAction(ISD::FMA , MVT::f32, Legal); 174 175 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 176 177 // If we're enabling GP optimizations, use hardware square root 178 if (!Subtarget.hasFSQRT() && 179 !(TM.Options.UnsafeFPMath && 180 Subtarget.hasFRSQRTE() && Subtarget.hasFRE())) 181 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 182 183 if (!Subtarget.hasFSQRT() && 184 !(TM.Options.UnsafeFPMath && 185 Subtarget.hasFRSQRTES() && Subtarget.hasFRES())) 186 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 187 188 if (Subtarget.hasFCPSGN()) { 189 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 190 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 191 } else { 192 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 193 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 194 } 195 196 if (Subtarget.hasFPRND()) { 197 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 198 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 199 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 200 setOperationAction(ISD::FROUND, MVT::f64, Legal); 201 202 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 203 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 204 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 205 setOperationAction(ISD::FROUND, MVT::f32, Legal); 206 } 207 208 // PowerPC does not have BSWAP, CTPOP or CTTZ 209 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 210 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 211 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 212 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 213 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 214 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 215 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 216 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 217 218 if (Subtarget.hasPOPCNTD()) { 219 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 220 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 221 } else { 222 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 223 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 224 } 225 226 // PowerPC does not have ROTR 227 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 228 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 229 230 if (!Subtarget.useCRBits()) { 231 // PowerPC does not have Select 232 setOperationAction(ISD::SELECT, MVT::i32, Expand); 233 setOperationAction(ISD::SELECT, MVT::i64, Expand); 234 setOperationAction(ISD::SELECT, MVT::f32, Expand); 235 setOperationAction(ISD::SELECT, MVT::f64, Expand); 236 } 237 238 // PowerPC wants to turn select_cc of FP into fsel when possible. 239 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 240 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 241 242 // PowerPC wants to optimize integer setcc a bit 243 if (!Subtarget.useCRBits()) 244 setOperationAction(ISD::SETCC, MVT::i32, Custom); 245 246 // PowerPC does not have BRCOND which requires SetCC 247 if (!Subtarget.useCRBits()) 248 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 249 250 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 251 252 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 253 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 254 255 // PowerPC does not have [U|S]INT_TO_FP 256 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 257 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 258 259 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 260 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 261 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 262 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 263 264 // We cannot sextinreg(i1). Expand to shifts. 265 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 266 267 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 268 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 269 // support continuation, user-level threading, and etc.. As a result, no 270 // other SjLj exception interfaces are implemented and please don't build 271 // your own exception handling based on them. 272 // LLVM/Clang supports zero-cost DWARF exception handling. 273 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 274 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 275 276 // We want to legalize GlobalAddress and ConstantPool nodes into the 277 // appropriate instructions to materialize the address. 278 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 279 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 280 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 282 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 283 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 284 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 285 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 286 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 287 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 288 289 // TRAP is legal. 290 setOperationAction(ISD::TRAP, MVT::Other, Legal); 291 292 // TRAMPOLINE is custom lowered. 293 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 294 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 295 296 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 297 setOperationAction(ISD::VASTART , MVT::Other, Custom); 298 299 if (Subtarget.isSVR4ABI()) { 300 if (isPPC64) { 301 // VAARG always uses double-word chunks, so promote anything smaller. 302 setOperationAction(ISD::VAARG, MVT::i1, Promote); 303 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 304 setOperationAction(ISD::VAARG, MVT::i8, Promote); 305 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 306 setOperationAction(ISD::VAARG, MVT::i16, Promote); 307 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 308 setOperationAction(ISD::VAARG, MVT::i32, Promote); 309 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 310 setOperationAction(ISD::VAARG, MVT::Other, Expand); 311 } else { 312 // VAARG is custom lowered with the 32-bit SVR4 ABI. 313 setOperationAction(ISD::VAARG, MVT::Other, Custom); 314 setOperationAction(ISD::VAARG, MVT::i64, Custom); 315 } 316 } else 317 setOperationAction(ISD::VAARG, MVT::Other, Expand); 318 319 if (Subtarget.isSVR4ABI() && !isPPC64) 320 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 321 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 322 else 323 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 324 325 // Use the default implementation. 326 setOperationAction(ISD::VAEND , MVT::Other, Expand); 327 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 328 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 329 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 330 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 331 332 // We want to custom lower some of our intrinsics. 333 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 334 335 // To handle counter-based loop conditions. 336 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 337 338 // Comparisons that require checking two conditions. 339 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 340 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 341 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 342 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 343 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 344 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 345 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 346 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 347 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 348 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 349 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 350 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 351 352 if (Subtarget.has64BitSupport()) { 353 // They also have instructions for converting between i64 and fp. 354 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 355 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 356 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 357 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 358 // This is just the low 32 bits of a (signed) fp->i64 conversion. 359 // We cannot do this with Promote because i64 is not a legal type. 360 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 361 362 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 363 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 364 } else { 365 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 366 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 367 } 368 369 // With the instructions enabled under FPCVT, we can do everything. 370 if (Subtarget.hasFPCVT()) { 371 if (Subtarget.has64BitSupport()) { 372 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 373 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 374 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 375 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 376 } 377 378 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 379 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 380 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 381 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 382 } 383 384 if (Subtarget.use64BitRegs()) { 385 // 64-bit PowerPC implementations can support i64 types directly 386 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 387 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 388 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 389 // 64-bit PowerPC wants to expand i128 shifts itself. 390 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 391 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 392 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 393 } else { 394 // 32-bit PowerPC wants to expand i64 shifts itself. 395 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 396 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 397 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 398 } 399 400 if (Subtarget.hasAltivec()) { 401 // First set operation action for all vector types to expand. Then we 402 // will selectively turn on ones that can be effectively codegen'd. 403 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 404 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 405 MVT::SimpleValueType VT = (MVT::SimpleValueType)i; 406 407 // add/sub are legal for all supported vector VT's. 408 setOperationAction(ISD::ADD , VT, Legal); 409 setOperationAction(ISD::SUB , VT, Legal); 410 411 // We promote all shuffles to v16i8. 412 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 413 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 414 415 // We promote all non-typed operations to v4i32. 416 setOperationAction(ISD::AND , VT, Promote); 417 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 418 setOperationAction(ISD::OR , VT, Promote); 419 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 420 setOperationAction(ISD::XOR , VT, Promote); 421 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 422 setOperationAction(ISD::LOAD , VT, Promote); 423 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 424 setOperationAction(ISD::SELECT, VT, Promote); 425 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 426 setOperationAction(ISD::STORE, VT, Promote); 427 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 428 429 // No other operations are legal. 430 setOperationAction(ISD::MUL , VT, Expand); 431 setOperationAction(ISD::SDIV, VT, Expand); 432 setOperationAction(ISD::SREM, VT, Expand); 433 setOperationAction(ISD::UDIV, VT, Expand); 434 setOperationAction(ISD::UREM, VT, Expand); 435 setOperationAction(ISD::FDIV, VT, Expand); 436 setOperationAction(ISD::FREM, VT, Expand); 437 setOperationAction(ISD::FNEG, VT, Expand); 438 setOperationAction(ISD::FSQRT, VT, Expand); 439 setOperationAction(ISD::FLOG, VT, Expand); 440 setOperationAction(ISD::FLOG10, VT, Expand); 441 setOperationAction(ISD::FLOG2, VT, Expand); 442 setOperationAction(ISD::FEXP, VT, Expand); 443 setOperationAction(ISD::FEXP2, VT, Expand); 444 setOperationAction(ISD::FSIN, VT, Expand); 445 setOperationAction(ISD::FCOS, VT, Expand); 446 setOperationAction(ISD::FABS, VT, Expand); 447 setOperationAction(ISD::FPOWI, VT, Expand); 448 setOperationAction(ISD::FFLOOR, VT, Expand); 449 setOperationAction(ISD::FCEIL, VT, Expand); 450 setOperationAction(ISD::FTRUNC, VT, Expand); 451 setOperationAction(ISD::FRINT, VT, Expand); 452 setOperationAction(ISD::FNEARBYINT, VT, Expand); 453 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 454 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 455 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 456 setOperationAction(ISD::MULHU, VT, Expand); 457 setOperationAction(ISD::MULHS, VT, Expand); 458 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 459 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 460 setOperationAction(ISD::UDIVREM, VT, Expand); 461 setOperationAction(ISD::SDIVREM, VT, Expand); 462 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 463 setOperationAction(ISD::FPOW, VT, Expand); 464 setOperationAction(ISD::BSWAP, VT, Expand); 465 setOperationAction(ISD::CTPOP, VT, Expand); 466 setOperationAction(ISD::CTLZ, VT, Expand); 467 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 468 setOperationAction(ISD::CTTZ, VT, Expand); 469 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 470 setOperationAction(ISD::VSELECT, VT, Expand); 471 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 472 473 for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 474 j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { 475 MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j; 476 setTruncStoreAction(VT, InnerVT, Expand); 477 } 478 setLoadExtAction(ISD::SEXTLOAD, VT, Expand); 479 setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); 480 setLoadExtAction(ISD::EXTLOAD, VT, Expand); 481 } 482 483 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 484 // with merges, splats, etc. 485 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 486 487 setOperationAction(ISD::AND , MVT::v4i32, Legal); 488 setOperationAction(ISD::OR , MVT::v4i32, Legal); 489 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 490 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 491 setOperationAction(ISD::SELECT, MVT::v4i32, 492 Subtarget.useCRBits() ? Legal : Expand); 493 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 494 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 495 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 496 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 497 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 498 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 499 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 500 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 501 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 502 503 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 504 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 505 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 506 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 507 508 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 509 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 510 511 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 512 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 513 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 514 } 515 516 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 517 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 518 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 519 520 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 521 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 522 523 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 524 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 525 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 526 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 527 528 // Altivec does not contain unordered floating-point compare instructions 529 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 530 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 531 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 532 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 533 534 if (Subtarget.hasVSX()) { 535 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 536 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 537 538 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 539 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 540 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 541 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 542 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 543 544 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 545 546 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 547 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 548 549 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 550 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 551 552 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 553 setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); 554 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); 555 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 556 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); 557 558 // Share the Altivec comparison restrictions. 559 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 560 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 561 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 562 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 563 564 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 565 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 566 567 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 568 569 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 570 571 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 572 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 573 574 // VSX v2i64 only supports non-arithmetic operations. 575 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 576 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 577 578 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 579 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 580 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 581 582 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 583 584 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 585 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 586 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 587 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 588 589 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 590 591 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 592 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 593 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 594 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 595 596 // Vector operation legalization checks the result type of 597 // SIGN_EXTEND_INREG, overall legalization checks the inner type. 598 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); 599 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); 600 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); 601 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); 602 603 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 604 } 605 } 606 607 if (Subtarget.has64BitSupport()) { 608 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 609 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); 610 } 611 612 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); 613 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); 614 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 615 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 616 617 setBooleanContents(ZeroOrOneBooleanContent); 618 // Altivec instructions set fields to all zeros or all ones. 619 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 620 621 if (!isPPC64) { 622 // These libcalls are not available in 32-bit. 623 setLibcallName(RTLIB::SHL_I128, nullptr); 624 setLibcallName(RTLIB::SRL_I128, nullptr); 625 setLibcallName(RTLIB::SRA_I128, nullptr); 626 } 627 628 if (isPPC64) { 629 setStackPointerRegisterToSaveRestore(PPC::X1); 630 setExceptionPointerRegister(PPC::X3); 631 setExceptionSelectorRegister(PPC::X4); 632 } else { 633 setStackPointerRegisterToSaveRestore(PPC::R1); 634 setExceptionPointerRegister(PPC::R3); 635 setExceptionSelectorRegister(PPC::R4); 636 } 637 638 // We have target-specific dag combine patterns for the following nodes: 639 setTargetDAGCombine(ISD::SINT_TO_FP); 640 setTargetDAGCombine(ISD::LOAD); 641 setTargetDAGCombine(ISD::STORE); 642 setTargetDAGCombine(ISD::BR_CC); 643 if (Subtarget.useCRBits()) 644 setTargetDAGCombine(ISD::BRCOND); 645 setTargetDAGCombine(ISD::BSWAP); 646 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 647 648 setTargetDAGCombine(ISD::SIGN_EXTEND); 649 setTargetDAGCombine(ISD::ZERO_EXTEND); 650 setTargetDAGCombine(ISD::ANY_EXTEND); 651 652 if (Subtarget.useCRBits()) { 653 setTargetDAGCombine(ISD::TRUNCATE); 654 setTargetDAGCombine(ISD::SETCC); 655 setTargetDAGCombine(ISD::SELECT_CC); 656 } 657 658 // Use reciprocal estimates. 659 if (TM.Options.UnsafeFPMath) { 660 setTargetDAGCombine(ISD::FDIV); 661 setTargetDAGCombine(ISD::FSQRT); 662 } 663 664 // Darwin long double math library functions have $LDBL128 appended. 665 if (Subtarget.isDarwin()) { 666 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 667 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 668 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 669 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 670 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 671 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 672 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 673 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 674 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 675 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 676 } 677 678 // With 32 condition bits, we don't need to sink (and duplicate) compares 679 // aggressively in CodeGenPrep. 680 if (Subtarget.useCRBits()) 681 setHasMultipleConditionRegisters(); 682 683 setMinFunctionAlignment(2); 684 if (Subtarget.isDarwin()) 685 setPrefFunctionAlignment(4); 686 687 if (isPPC64 && Subtarget.isJITCodeModel()) 688 // Temporary workaround for the inability of PPC64 JIT to handle jump 689 // tables. 690 setSupportJumpTables(false); 691 692 setInsertFencesForAtomic(true); 693 694 if (Subtarget.enableMachineScheduler()) 695 setSchedulingPreference(Sched::Source); 696 else 697 setSchedulingPreference(Sched::Hybrid); 698 699 computeRegisterProperties(); 700 701 // The Freescale cores does better with aggressive inlining of memcpy and 702 // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). 703 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 704 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 705 MaxStoresPerMemset = 32; 706 MaxStoresPerMemsetOptSize = 16; 707 MaxStoresPerMemcpy = 32; 708 MaxStoresPerMemcpyOptSize = 8; 709 MaxStoresPerMemmove = 32; 710 MaxStoresPerMemmoveOptSize = 8; 711 712 setPrefFunctionAlignment(4); 713 } 714 } 715 716 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 717 /// the desired ByVal argument alignment. 718 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 719 unsigned MaxMaxAlign) { 720 if (MaxAlign == MaxMaxAlign) 721 return; 722 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 723 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 724 MaxAlign = 32; 725 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 726 MaxAlign = 16; 727 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 728 unsigned EltAlign = 0; 729 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 730 if (EltAlign > MaxAlign) 731 MaxAlign = EltAlign; 732 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 733 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 734 unsigned EltAlign = 0; 735 getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); 736 if (EltAlign > MaxAlign) 737 MaxAlign = EltAlign; 738 if (MaxAlign == MaxMaxAlign) 739 break; 740 } 741 } 742 } 743 744 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 745 /// function arguments in the caller parameter area. 746 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { 747 // Darwin passes everything on 4 byte boundary. 748 if (Subtarget.isDarwin()) 749 return 4; 750 751 // 16byte and wider vectors are passed on 16byte boundary. 752 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 753 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 754 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 755 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 756 return Align; 757 } 758 759 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 760 switch (Opcode) { 761 default: return nullptr; 762 case PPCISD::FSEL: return "PPCISD::FSEL"; 763 case PPCISD::FCFID: return "PPCISD::FCFID"; 764 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 765 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 766 case PPCISD::FRE: return "PPCISD::FRE"; 767 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 768 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 769 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 770 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 771 case PPCISD::VPERM: return "PPCISD::VPERM"; 772 case PPCISD::Hi: return "PPCISD::Hi"; 773 case PPCISD::Lo: return "PPCISD::Lo"; 774 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 775 case PPCISD::LOAD: return "PPCISD::LOAD"; 776 case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; 777 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 778 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 779 case PPCISD::SRL: return "PPCISD::SRL"; 780 case PPCISD::SRA: return "PPCISD::SRA"; 781 case PPCISD::SHL: return "PPCISD::SHL"; 782 case PPCISD::CALL: return "PPCISD::CALL"; 783 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 784 case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS"; 785 case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS"; 786 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 787 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 788 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 789 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 790 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 791 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 792 case PPCISD::VCMP: return "PPCISD::VCMP"; 793 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 794 case PPCISD::LBRX: return "PPCISD::LBRX"; 795 case PPCISD::STBRX: return "PPCISD::STBRX"; 796 case PPCISD::LARX: return "PPCISD::LARX"; 797 case PPCISD::STCX: return "PPCISD::STCX"; 798 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 799 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 800 case PPCISD::BDZ: return "PPCISD::BDZ"; 801 case PPCISD::MFFS: return "PPCISD::MFFS"; 802 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 803 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 804 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 805 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 806 case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; 807 case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; 808 case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; 809 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 810 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 811 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 812 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 813 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 814 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 815 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 816 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 817 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 818 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 819 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 820 case PPCISD::SC: return "PPCISD::SC"; 821 } 822 } 823 824 EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 825 if (!VT.isVector()) 826 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 827 return VT.changeVectorElementTypeToInteger(); 828 } 829 830 //===----------------------------------------------------------------------===// 831 // Node matching predicates, for use by the tblgen matching code. 832 //===----------------------------------------------------------------------===// 833 834 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 835 static bool isFloatingPointZero(SDValue Op) { 836 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 837 return CFP->getValueAPF().isZero(); 838 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 839 // Maybe this has already been legalized into the constant pool? 840 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 841 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 842 return CFP->getValueAPF().isZero(); 843 } 844 return false; 845 } 846 847 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 848 /// true if Op is undef or if it matches the specified value. 849 static bool isConstantOrUndef(int Op, int Val) { 850 return Op < 0 || Op == Val; 851 } 852 853 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 854 /// VPKUHUM instruction. 855 /// The ShuffleKind distinguishes between big-endian operations with 856 /// two different inputs (0), either-endian operations with two identical 857 /// inputs (1), and little-endian operantion with two different inputs (2). 858 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 859 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 860 SelectionDAG &DAG) { 861 if (ShuffleKind == 0) { 862 if (DAG.getTarget().getDataLayout()->isLittleEndian()) 863 return false; 864 for (unsigned i = 0; i != 16; ++i) 865 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 866 return false; 867 } else if (ShuffleKind == 2) { 868 if (!DAG.getTarget().getDataLayout()->isLittleEndian()) 869 return false; 870 for (unsigned i = 0; i != 16; ++i) 871 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 872 return false; 873 } else if (ShuffleKind == 1) { 874 unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1; 875 for (unsigned i = 0; i != 8; ++i) 876 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 877 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 878 return false; 879 } 880 return true; 881 } 882 883 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 884 /// VPKUWUM instruction. 885 /// The ShuffleKind distinguishes between big-endian operations with 886 /// two different inputs (0), either-endian operations with two identical 887 /// inputs (1), and little-endian operantion with two different inputs (2). 888 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 889 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 890 SelectionDAG &DAG) { 891 if (ShuffleKind == 0) { 892 if (DAG.getTarget().getDataLayout()->isLittleEndian()) 893 return false; 894 for (unsigned i = 0; i != 16; i += 2) 895 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 896 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 897 return false; 898 } else if (ShuffleKind == 2) { 899 if (!DAG.getTarget().getDataLayout()->isLittleEndian()) 900 return false; 901 for (unsigned i = 0; i != 16; i += 2) 902 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 903 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 904 return false; 905 } else if (ShuffleKind == 1) { 906 unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 2; 907 for (unsigned i = 0; i != 8; i += 2) 908 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 909 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 910 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 911 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 912 return false; 913 } 914 return true; 915 } 916 917 /// isVMerge - Common function, used to match vmrg* shuffles. 918 /// 919 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 920 unsigned LHSStart, unsigned RHSStart) { 921 if (N->getValueType(0) != MVT::v16i8) 922 return false; 923 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 924 "Unsupported merge size!"); 925 926 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 927 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 928 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 929 LHSStart+j+i*UnitSize) || 930 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 931 RHSStart+j+i*UnitSize)) 932 return false; 933 } 934 return true; 935 } 936 937 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 938 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 939 /// The ShuffleKind distinguishes between big-endian merges with two 940 /// different inputs (0), either-endian merges with two identical inputs (1), 941 /// and little-endian merges with two different inputs (2). For the latter, 942 /// the input operands are swapped (see PPCInstrAltivec.td). 943 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 944 unsigned ShuffleKind, SelectionDAG &DAG) { 945 if (DAG.getTarget().getDataLayout()->isLittleEndian()) { 946 if (ShuffleKind == 1) // unary 947 return isVMerge(N, UnitSize, 0, 0); 948 else if (ShuffleKind == 2) // swapped 949 return isVMerge(N, UnitSize, 0, 16); 950 else 951 return false; 952 } else { 953 if (ShuffleKind == 1) // unary 954 return isVMerge(N, UnitSize, 8, 8); 955 else if (ShuffleKind == 0) // normal 956 return isVMerge(N, UnitSize, 8, 24); 957 else 958 return false; 959 } 960 } 961 962 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 963 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 964 /// The ShuffleKind distinguishes between big-endian merges with two 965 /// different inputs (0), either-endian merges with two identical inputs (1), 966 /// and little-endian merges with two different inputs (2). For the latter, 967 /// the input operands are swapped (see PPCInstrAltivec.td). 968 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 969 unsigned ShuffleKind, SelectionDAG &DAG) { 970 if (DAG.getTarget().getDataLayout()->isLittleEndian()) { 971 if (ShuffleKind == 1) // unary 972 return isVMerge(N, UnitSize, 8, 8); 973 else if (ShuffleKind == 2) // swapped 974 return isVMerge(N, UnitSize, 8, 24); 975 else 976 return false; 977 } else { 978 if (ShuffleKind == 1) // unary 979 return isVMerge(N, UnitSize, 0, 0); 980 else if (ShuffleKind == 0) // normal 981 return isVMerge(N, UnitSize, 0, 16); 982 else 983 return false; 984 } 985 } 986 987 988 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 989 /// amount, otherwise return -1. 990 /// The ShuffleKind distinguishes between big-endian operations with two 991 /// different inputs (0), either-endian operations with two identical inputs 992 /// (1), and little-endian operations with two different inputs (2). For the 993 /// latter, the input operands are swapped (see PPCInstrAltivec.td). 994 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 995 SelectionDAG &DAG) { 996 if (N->getValueType(0) != MVT::v16i8) 997 return -1; 998 999 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1000 1001 // Find the first non-undef value in the shuffle mask. 1002 unsigned i; 1003 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1004 /*search*/; 1005 1006 if (i == 16) return -1; // all undef. 1007 1008 // Otherwise, check to see if the rest of the elements are consecutively 1009 // numbered from this value. 1010 unsigned ShiftAmt = SVOp->getMaskElt(i); 1011 if (ShiftAmt < i) return -1; 1012 1013 ShiftAmt -= i; 1014 bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian(); 1015 1016 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1017 // Check the rest of the elements to see if they are consecutive. 1018 for (++i; i != 16; ++i) 1019 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1020 return -1; 1021 } else if (ShuffleKind == 1) { 1022 // Check the rest of the elements to see if they are consecutive. 1023 for (++i; i != 16; ++i) 1024 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1025 return -1; 1026 } else 1027 return -1; 1028 1029 if (ShuffleKind == 2 && isLE) 1030 ShiftAmt = 16 - ShiftAmt; 1031 1032 return ShiftAmt; 1033 } 1034 1035 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1036 /// specifies a splat of a single element that is suitable for input to 1037 /// VSPLTB/VSPLTH/VSPLTW. 1038 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1039 assert(N->getValueType(0) == MVT::v16i8 && 1040 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1041 1042 // This is a splat operation if each element of the permute is the same, and 1043 // if the value doesn't reference the second vector. 1044 unsigned ElementBase = N->getMaskElt(0); 1045 1046 // FIXME: Handle UNDEF elements too! 1047 if (ElementBase >= 16) 1048 return false; 1049 1050 // Check that the indices are consecutive, in the case of a multi-byte element 1051 // splatted with a v16i8 mask. 1052 for (unsigned i = 1; i != EltSize; ++i) 1053 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1054 return false; 1055 1056 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1057 if (N->getMaskElt(i) < 0) continue; 1058 for (unsigned j = 0; j != EltSize; ++j) 1059 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1060 return false; 1061 } 1062 return true; 1063 } 1064 1065 /// isAllNegativeZeroVector - Returns true if all elements of build_vector 1066 /// are -0.0. 1067 bool PPC::isAllNegativeZeroVector(SDNode *N) { 1068 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); 1069 1070 APInt APVal, APUndef; 1071 unsigned BitSize; 1072 bool HasAnyUndefs; 1073 1074 if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) 1075 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 1076 return CFP->getValueAPF().isNegZero(); 1077 1078 return false; 1079 } 1080 1081 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 1082 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 1083 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 1084 SelectionDAG &DAG) { 1085 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1086 assert(isSplatShuffleMask(SVOp, EltSize)); 1087 if (DAG.getTarget().getDataLayout()->isLittleEndian()) 1088 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 1089 else 1090 return SVOp->getMaskElt(0) / EltSize; 1091 } 1092 1093 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 1094 /// by using a vspltis[bhw] instruction of the specified element size, return 1095 /// the constant being splatted. The ByteSize field indicates the number of 1096 /// bytes of each element [124] -> [bhw]. 1097 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 1098 SDValue OpVal(nullptr, 0); 1099 1100 // If ByteSize of the splat is bigger than the element size of the 1101 // build_vector, then we have a case where we are checking for a splat where 1102 // multiple elements of the buildvector are folded together into a single 1103 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 1104 unsigned EltSize = 16/N->getNumOperands(); 1105 if (EltSize < ByteSize) { 1106 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 1107 SDValue UniquedVals[4]; 1108 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 1109 1110 // See if all of the elements in the buildvector agree across. 1111 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1112 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1113 // If the element isn't a constant, bail fully out. 1114 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 1115 1116 1117 if (!UniquedVals[i&(Multiple-1)].getNode()) 1118 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 1119 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 1120 return SDValue(); // no match. 1121 } 1122 1123 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 1124 // either constant or undef values that are identical for each chunk. See 1125 // if these chunks can form into a larger vspltis*. 1126 1127 // Check to see if all of the leading entries are either 0 or -1. If 1128 // neither, then this won't fit into the immediate field. 1129 bool LeadingZero = true; 1130 bool LeadingOnes = true; 1131 for (unsigned i = 0; i != Multiple-1; ++i) { 1132 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 1133 1134 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 1135 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 1136 } 1137 // Finally, check the least significant entry. 1138 if (LeadingZero) { 1139 if (!UniquedVals[Multiple-1].getNode()) 1140 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 1141 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 1142 if (Val < 16) 1143 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 1144 } 1145 if (LeadingOnes) { 1146 if (!UniquedVals[Multiple-1].getNode()) 1147 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 1148 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 1149 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 1150 return DAG.getTargetConstant(Val, MVT::i32); 1151 } 1152 1153 return SDValue(); 1154 } 1155 1156 // Check to see if this buildvec has a single non-undef value in its elements. 1157 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1158 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1159 if (!OpVal.getNode()) 1160 OpVal = N->getOperand(i); 1161 else if (OpVal != N->getOperand(i)) 1162 return SDValue(); 1163 } 1164 1165 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 1166 1167 unsigned ValSizeInBytes = EltSize; 1168 uint64_t Value = 0; 1169 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1170 Value = CN->getZExtValue(); 1171 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1172 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 1173 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 1174 } 1175 1176 // If the splat value is larger than the element value, then we can never do 1177 // this splat. The only case that we could fit the replicated bits into our 1178 // immediate field for would be zero, and we prefer to use vxor for it. 1179 if (ValSizeInBytes < ByteSize) return SDValue(); 1180 1181 // If the element value is larger than the splat value, cut it in half and 1182 // check to see if the two halves are equal. Continue doing this until we 1183 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 1184 while (ValSizeInBytes > ByteSize) { 1185 ValSizeInBytes >>= 1; 1186 1187 // If the top half equals the bottom half, we're still ok. 1188 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 1189 (Value & ((1 << (8*ValSizeInBytes))-1))) 1190 return SDValue(); 1191 } 1192 1193 // Properly sign extend the value. 1194 int MaskVal = SignExtend32(Value, ByteSize * 8); 1195 1196 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 1197 if (MaskVal == 0) return SDValue(); 1198 1199 // Finally, if this value fits in a 5 bit sext field, return it 1200 if (SignExtend32<5>(MaskVal) == MaskVal) 1201 return DAG.getTargetConstant(MaskVal, MVT::i32); 1202 return SDValue(); 1203 } 1204 1205 //===----------------------------------------------------------------------===// 1206 // Addressing Mode Selection 1207 //===----------------------------------------------------------------------===// 1208 1209 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 1210 /// or 64-bit immediate, and if the value can be accurately represented as a 1211 /// sign extension from a 16-bit value. If so, this returns true and the 1212 /// immediate. 1213 static bool isIntS16Immediate(SDNode *N, short &Imm) { 1214 if (!isa<ConstantSDNode>(N)) 1215 return false; 1216 1217 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1218 if (N->getValueType(0) == MVT::i32) 1219 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1220 else 1221 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1222 } 1223 static bool isIntS16Immediate(SDValue Op, short &Imm) { 1224 return isIntS16Immediate(Op.getNode(), Imm); 1225 } 1226 1227 1228 /// SelectAddressRegReg - Given the specified addressed, check to see if it 1229 /// can be represented as an indexed [r+r] operation. Returns false if it 1230 /// can be more efficiently represented with [r+imm]. 1231 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1232 SDValue &Index, 1233 SelectionDAG &DAG) const { 1234 short imm = 0; 1235 if (N.getOpcode() == ISD::ADD) { 1236 if (isIntS16Immediate(N.getOperand(1), imm)) 1237 return false; // r+i 1238 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1239 return false; // r+i 1240 1241 Base = N.getOperand(0); 1242 Index = N.getOperand(1); 1243 return true; 1244 } else if (N.getOpcode() == ISD::OR) { 1245 if (isIntS16Immediate(N.getOperand(1), imm)) 1246 return false; // r+i can fold it if we can. 1247 1248 // If this is an or of disjoint bitfields, we can codegen this as an add 1249 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1250 // disjoint. 1251 APInt LHSKnownZero, LHSKnownOne; 1252 APInt RHSKnownZero, RHSKnownOne; 1253 DAG.computeKnownBits(N.getOperand(0), 1254 LHSKnownZero, LHSKnownOne); 1255 1256 if (LHSKnownZero.getBoolValue()) { 1257 DAG.computeKnownBits(N.getOperand(1), 1258 RHSKnownZero, RHSKnownOne); 1259 // If all of the bits are known zero on the LHS or RHS, the add won't 1260 // carry. 1261 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1262 Base = N.getOperand(0); 1263 Index = N.getOperand(1); 1264 return true; 1265 } 1266 } 1267 } 1268 1269 return false; 1270 } 1271 1272 // If we happen to be doing an i64 load or store into a stack slot that has 1273 // less than a 4-byte alignment, then the frame-index elimination may need to 1274 // use an indexed load or store instruction (because the offset may not be a 1275 // multiple of 4). The extra register needed to hold the offset comes from the 1276 // register scavenger, and it is possible that the scavenger will need to use 1277 // an emergency spill slot. As a result, we need to make sure that a spill slot 1278 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1279 // stack slot. 1280 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1281 // FIXME: This does not handle the LWA case. 1282 if (VT != MVT::i64) 1283 return; 1284 1285 // NOTE: We'll exclude negative FIs here, which come from argument 1286 // lowering, because there are no known test cases triggering this problem 1287 // using packed structures (or similar). We can remove this exclusion if 1288 // we find such a test case. The reason why this is so test-case driven is 1289 // because this entire 'fixup' is only to prevent crashes (from the 1290 // register scavenger) on not-really-valid inputs. For example, if we have: 1291 // %a = alloca i1 1292 // %b = bitcast i1* %a to i64* 1293 // store i64* a, i64 b 1294 // then the store should really be marked as 'align 1', but is not. If it 1295 // were marked as 'align 1' then the indexed form would have been 1296 // instruction-selected initially, and the problem this 'fixup' is preventing 1297 // won't happen regardless. 1298 if (FrameIdx < 0) 1299 return; 1300 1301 MachineFunction &MF = DAG.getMachineFunction(); 1302 MachineFrameInfo *MFI = MF.getFrameInfo(); 1303 1304 unsigned Align = MFI->getObjectAlignment(FrameIdx); 1305 if (Align >= 4) 1306 return; 1307 1308 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1309 FuncInfo->setHasNonRISpills(); 1310 } 1311 1312 /// Returns true if the address N can be represented by a base register plus 1313 /// a signed 16-bit displacement [r+imm], and if it is not better 1314 /// represented as reg+reg. If Aligned is true, only accept displacements 1315 /// suitable for STD and friends, i.e. multiples of 4. 1316 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1317 SDValue &Base, 1318 SelectionDAG &DAG, 1319 bool Aligned) const { 1320 // FIXME dl should come from parent load or store, not from address 1321 SDLoc dl(N); 1322 // If this can be more profitably realized as r+r, fail. 1323 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1324 return false; 1325 1326 if (N.getOpcode() == ISD::ADD) { 1327 short imm = 0; 1328 if (isIntS16Immediate(N.getOperand(1), imm) && 1329 (!Aligned || (imm & 3) == 0)) { 1330 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1331 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1332 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1333 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1334 } else { 1335 Base = N.getOperand(0); 1336 } 1337 return true; // [r+i] 1338 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1339 // Match LOAD (ADD (X, Lo(G))). 1340 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1341 && "Cannot handle constant offsets yet!"); 1342 Disp = N.getOperand(1).getOperand(0); // The global address. 1343 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1344 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1345 Disp.getOpcode() == ISD::TargetConstantPool || 1346 Disp.getOpcode() == ISD::TargetJumpTable); 1347 Base = N.getOperand(0); 1348 return true; // [&g+r] 1349 } 1350 } else if (N.getOpcode() == ISD::OR) { 1351 short imm = 0; 1352 if (isIntS16Immediate(N.getOperand(1), imm) && 1353 (!Aligned || (imm & 3) == 0)) { 1354 // If this is an or of disjoint bitfields, we can codegen this as an add 1355 // (for better address arithmetic) if the LHS and RHS of the OR are 1356 // provably disjoint. 1357 APInt LHSKnownZero, LHSKnownOne; 1358 DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1359 1360 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1361 // If all of the bits are known zero on the LHS or RHS, the add won't 1362 // carry. 1363 if (FrameIndexSDNode *FI = 1364 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1365 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1366 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1367 } else { 1368 Base = N.getOperand(0); 1369 } 1370 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1371 return true; 1372 } 1373 } 1374 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1375 // Loading from a constant address. 1376 1377 // If this address fits entirely in a 16-bit sext immediate field, codegen 1378 // this as "d, 0" 1379 short Imm; 1380 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1381 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 1382 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1383 CN->getValueType(0)); 1384 return true; 1385 } 1386 1387 // Handle 32-bit sext immediates with LIS + addr mode. 1388 if ((CN->getValueType(0) == MVT::i32 || 1389 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1390 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1391 int Addr = (int)CN->getZExtValue(); 1392 1393 // Otherwise, break this down into an LIS + disp. 1394 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 1395 1396 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 1397 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1398 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1399 return true; 1400 } 1401 } 1402 1403 Disp = DAG.getTargetConstant(0, getPointerTy()); 1404 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1405 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1406 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1407 } else 1408 Base = N; 1409 return true; // [r+0] 1410 } 1411 1412 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1413 /// represented as an indexed [r+r] operation. 1414 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1415 SDValue &Index, 1416 SelectionDAG &DAG) const { 1417 // Check to see if we can easily represent this as an [r+r] address. This 1418 // will fail if it thinks that the address is more profitably represented as 1419 // reg+imm, e.g. where imm = 0. 1420 if (SelectAddressRegReg(N, Base, Index, DAG)) 1421 return true; 1422 1423 // If the operand is an addition, always emit this as [r+r], since this is 1424 // better (for code size, and execution, as the memop does the add for free) 1425 // than emitting an explicit add. 1426 if (N.getOpcode() == ISD::ADD) { 1427 Base = N.getOperand(0); 1428 Index = N.getOperand(1); 1429 return true; 1430 } 1431 1432 // Otherwise, do it the hard way, using R0 as the base register. 1433 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1434 N.getValueType()); 1435 Index = N; 1436 return true; 1437 } 1438 1439 /// getPreIndexedAddressParts - returns true by value, base pointer and 1440 /// offset pointer and addressing mode by reference if the node's address 1441 /// can be legally represented as pre-indexed load / store address. 1442 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1443 SDValue &Offset, 1444 ISD::MemIndexedMode &AM, 1445 SelectionDAG &DAG) const { 1446 if (DisablePPCPreinc) return false; 1447 1448 bool isLoad = true; 1449 SDValue Ptr; 1450 EVT VT; 1451 unsigned Alignment; 1452 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1453 Ptr = LD->getBasePtr(); 1454 VT = LD->getMemoryVT(); 1455 Alignment = LD->getAlignment(); 1456 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1457 Ptr = ST->getBasePtr(); 1458 VT = ST->getMemoryVT(); 1459 Alignment = ST->getAlignment(); 1460 isLoad = false; 1461 } else 1462 return false; 1463 1464 // PowerPC doesn't have preinc load/store instructions for vectors. 1465 if (VT.isVector()) 1466 return false; 1467 1468 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 1469 1470 // Common code will reject creating a pre-inc form if the base pointer 1471 // is a frame index, or if N is a store and the base pointer is either 1472 // the same as or a predecessor of the value being stored. Check for 1473 // those situations here, and try with swapped Base/Offset instead. 1474 bool Swap = false; 1475 1476 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 1477 Swap = true; 1478 else if (!isLoad) { 1479 SDValue Val = cast<StoreSDNode>(N)->getValue(); 1480 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 1481 Swap = true; 1482 } 1483 1484 if (Swap) 1485 std::swap(Base, Offset); 1486 1487 AM = ISD::PRE_INC; 1488 return true; 1489 } 1490 1491 // LDU/STU can only handle immediates that are a multiple of 4. 1492 if (VT != MVT::i64) { 1493 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 1494 return false; 1495 } else { 1496 // LDU/STU need an address with at least 4-byte alignment. 1497 if (Alignment < 4) 1498 return false; 1499 1500 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 1501 return false; 1502 } 1503 1504 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1505 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1506 // sext i32 to i64 when addr mode is r+i. 1507 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1508 LD->getExtensionType() == ISD::SEXTLOAD && 1509 isa<ConstantSDNode>(Offset)) 1510 return false; 1511 } 1512 1513 AM = ISD::PRE_INC; 1514 return true; 1515 } 1516 1517 //===----------------------------------------------------------------------===// 1518 // LowerOperation implementation 1519 //===----------------------------------------------------------------------===// 1520 1521 /// GetLabelAccessInfo - Return true if we should reference labels using a 1522 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1523 static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, 1524 unsigned &LoOpFlags, 1525 const GlobalValue *GV = nullptr) { 1526 HiOpFlags = PPCII::MO_HA; 1527 LoOpFlags = PPCII::MO_LO; 1528 1529 // Don't use the pic base if not in PIC relocation model. 1530 bool isPIC = TM.getRelocationModel() == Reloc::PIC_; 1531 1532 if (isPIC) { 1533 HiOpFlags |= PPCII::MO_PIC_FLAG; 1534 LoOpFlags |= PPCII::MO_PIC_FLAG; 1535 } 1536 1537 // If this is a reference to a global value that requires a non-lazy-ptr, make 1538 // sure that instruction lowering adds it. 1539 if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { 1540 HiOpFlags |= PPCII::MO_NLP_FLAG; 1541 LoOpFlags |= PPCII::MO_NLP_FLAG; 1542 1543 if (GV->hasHiddenVisibility()) { 1544 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1545 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1546 } 1547 } 1548 1549 return isPIC; 1550 } 1551 1552 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 1553 SelectionDAG &DAG) { 1554 EVT PtrVT = HiPart.getValueType(); 1555 SDValue Zero = DAG.getConstant(0, PtrVT); 1556 SDLoc DL(HiPart); 1557 1558 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 1559 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 1560 1561 // With PIC, the first instruction is actually "GR+hi(&G)". 1562 if (isPIC) 1563 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 1564 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 1565 1566 // Generate non-pic code that has direct accesses to the constant pool. 1567 // The address of the global is just (hi(&g)+lo(&g)). 1568 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 1569 } 1570 1571 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1572 SelectionDAG &DAG) const { 1573 EVT PtrVT = Op.getValueType(); 1574 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1575 const Constant *C = CP->getConstVal(); 1576 1577 // 64-bit SVR4 ABI code is always position-independent. 1578 // The actual address of the GlobalValue is stored in the TOC. 1579 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1580 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 1581 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, 1582 DAG.getRegister(PPC::X2, MVT::i64)); 1583 } 1584 1585 unsigned MOHiFlag, MOLoFlag; 1586 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1587 1588 if (isPIC && Subtarget.isSVR4ABI()) { 1589 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 1590 PPCII::MO_PIC_FLAG); 1591 SDLoc DL(CP); 1592 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, 1593 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); 1594 } 1595 1596 SDValue CPIHi = 1597 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 1598 SDValue CPILo = 1599 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 1600 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 1601 } 1602 1603 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 1604 EVT PtrVT = Op.getValueType(); 1605 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1606 1607 // 64-bit SVR4 ABI code is always position-independent. 1608 // The actual address of the GlobalValue is stored in the TOC. 1609 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1610 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1611 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, 1612 DAG.getRegister(PPC::X2, MVT::i64)); 1613 } 1614 1615 unsigned MOHiFlag, MOLoFlag; 1616 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1617 1618 if (isPIC && Subtarget.isSVR4ABI()) { 1619 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 1620 PPCII::MO_PIC_FLAG); 1621 SDLoc DL(GA); 1622 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA, 1623 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); 1624 } 1625 1626 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 1627 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 1628 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 1629 } 1630 1631 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 1632 SelectionDAG &DAG) const { 1633 EVT PtrVT = Op.getValueType(); 1634 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 1635 const BlockAddress *BA = BASDN->getBlockAddress(); 1636 1637 // 64-bit SVR4 ABI code is always position-independent. 1638 // The actual BlockAddress is stored in the TOC. 1639 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1640 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 1641 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA, 1642 DAG.getRegister(PPC::X2, MVT::i64)); 1643 } 1644 1645 unsigned MOHiFlag, MOLoFlag; 1646 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1647 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 1648 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 1649 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 1650 } 1651 1652 // Generate a call to __tls_get_addr for the given GOT entry Op. 1653 std::pair<SDValue,SDValue> 1654 PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl, 1655 SelectionDAG &DAG) const { 1656 1657 Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); 1658 TargetLowering::ArgListTy Args; 1659 TargetLowering::ArgListEntry Entry; 1660 Entry.Node = Op; 1661 Entry.Ty = IntPtrTy; 1662 Args.push_back(Entry); 1663 1664 TargetLowering::CallLoweringInfo CLI(DAG); 1665 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) 1666 .setCallee(CallingConv::C, IntPtrTy, 1667 DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()), 1668 std::move(Args), 0); 1669 1670 return LowerCallTo(CLI); 1671 } 1672 1673 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1674 SelectionDAG &DAG) const { 1675 1676 // FIXME: TLS addresses currently use medium model code sequences, 1677 // which is the most useful form. Eventually support for small and 1678 // large models could be added if users need it, at the cost of 1679 // additional complexity. 1680 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1681 SDLoc dl(GA); 1682 const GlobalValue *GV = GA->getGlobal(); 1683 EVT PtrVT = getPointerTy(); 1684 bool is64bit = Subtarget.isPPC64(); 1685 const Module *M = DAG.getMachineFunction().getFunction()->getParent(); 1686 PICLevel::Level picLevel = M->getPICLevel(); 1687 1688 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 1689 1690 if (Model == TLSModel::LocalExec) { 1691 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1692 PPCII::MO_TPREL_HA); 1693 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1694 PPCII::MO_TPREL_LO); 1695 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 1696 is64bit ? MVT::i64 : MVT::i32); 1697 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 1698 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 1699 } 1700 1701 if (Model == TLSModel::InitialExec) { 1702 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1703 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1704 PPCII::MO_TLS); 1705 SDValue GOTPtr; 1706 if (is64bit) { 1707 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1708 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 1709 PtrVT, GOTReg, TGA); 1710 } else 1711 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 1712 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 1713 PtrVT, TGA, GOTPtr); 1714 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 1715 } 1716 1717 if (Model == TLSModel::GeneralDynamic) { 1718 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1719 PPCII::MO_TLSGD); 1720 SDValue GOTPtr; 1721 if (is64bit) { 1722 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1723 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 1724 GOTReg, TGA); 1725 } else { 1726 if (picLevel == PICLevel::Small) 1727 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 1728 else 1729 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 1730 } 1731 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, 1732 GOTPtr, TGA); 1733 std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); 1734 return CallResult.first; 1735 } 1736 1737 if (Model == TLSModel::LocalDynamic) { 1738 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1739 PPCII::MO_TLSLD); 1740 SDValue GOTPtr; 1741 if (is64bit) { 1742 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1743 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 1744 GOTReg, TGA); 1745 } else { 1746 if (picLevel == PICLevel::Small) 1747 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 1748 else 1749 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 1750 } 1751 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, 1752 GOTPtr, TGA); 1753 std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); 1754 SDValue TLSAddr = CallResult.first; 1755 SDValue Chain = CallResult.second; 1756 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, 1757 Chain, TLSAddr, TGA); 1758 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 1759 } 1760 1761 llvm_unreachable("Unknown TLS model!"); 1762 } 1763 1764 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 1765 SelectionDAG &DAG) const { 1766 EVT PtrVT = Op.getValueType(); 1767 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1768 SDLoc DL(GSDN); 1769 const GlobalValue *GV = GSDN->getGlobal(); 1770 1771 // 64-bit SVR4 ABI code is always position-independent. 1772 // The actual address of the GlobalValue is stored in the TOC. 1773 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1774 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 1775 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, 1776 DAG.getRegister(PPC::X2, MVT::i64)); 1777 } 1778 1779 unsigned MOHiFlag, MOLoFlag; 1780 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); 1781 1782 if (isPIC && Subtarget.isSVR4ABI()) { 1783 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 1784 GSDN->getOffset(), 1785 PPCII::MO_PIC_FLAG); 1786 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, 1787 DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32)); 1788 } 1789 1790 SDValue GAHi = 1791 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 1792 SDValue GALo = 1793 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 1794 1795 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 1796 1797 // If the global reference is actually to a non-lazy-pointer, we have to do an 1798 // extra load to get the address of the global. 1799 if (MOHiFlag & PPCII::MO_NLP_FLAG) 1800 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 1801 false, false, false, 0); 1802 return Ptr; 1803 } 1804 1805 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 1806 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1807 SDLoc dl(Op); 1808 1809 if (Op.getValueType() == MVT::v2i64) { 1810 // When the operands themselves are v2i64 values, we need to do something 1811 // special because VSX has no underlying comparison operations for these. 1812 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 1813 // Equality can be handled by casting to the legal type for Altivec 1814 // comparisons, everything else needs to be expanded. 1815 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 1816 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 1817 DAG.getSetCC(dl, MVT::v4i32, 1818 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 1819 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 1820 CC)); 1821 } 1822 1823 return SDValue(); 1824 } 1825 1826 // We handle most of these in the usual way. 1827 return Op; 1828 } 1829 1830 // If we're comparing for equality to zero, expose the fact that this is 1831 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1832 // fold the new nodes. 1833 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1834 if (C->isNullValue() && CC == ISD::SETEQ) { 1835 EVT VT = Op.getOperand(0).getValueType(); 1836 SDValue Zext = Op.getOperand(0); 1837 if (VT.bitsLT(MVT::i32)) { 1838 VT = MVT::i32; 1839 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 1840 } 1841 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1842 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 1843 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 1844 DAG.getConstant(Log2b, MVT::i32)); 1845 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 1846 } 1847 // Leave comparisons against 0 and -1 alone for now, since they're usually 1848 // optimized. FIXME: revisit this when we can custom lower all setcc 1849 // optimizations. 1850 if (C->isAllOnesValue() || C->isNullValue()) 1851 return SDValue(); 1852 } 1853 1854 // If we have an integer seteq/setne, turn it into a compare against zero 1855 // by xor'ing the rhs with the lhs, which is faster than setting a 1856 // condition register, reading it back out, and masking the correct bit. The 1857 // normal approach here uses sub to do this instead of xor. Using xor exposes 1858 // the result to other bit-twiddling opportunities. 1859 EVT LHSVT = Op.getOperand(0).getValueType(); 1860 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1861 EVT VT = Op.getValueType(); 1862 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 1863 Op.getOperand(1)); 1864 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); 1865 } 1866 return SDValue(); 1867 } 1868 1869 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 1870 const PPCSubtarget &Subtarget) const { 1871 SDNode *Node = Op.getNode(); 1872 EVT VT = Node->getValueType(0); 1873 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1874 SDValue InChain = Node->getOperand(0); 1875 SDValue VAListPtr = Node->getOperand(1); 1876 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 1877 SDLoc dl(Node); 1878 1879 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 1880 1881 // gpr_index 1882 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1883 VAListPtr, MachinePointerInfo(SV), MVT::i8, 1884 false, false, 0); 1885 InChain = GprIndex.getValue(1); 1886 1887 if (VT == MVT::i64) { 1888 // Check if GprIndex is even 1889 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 1890 DAG.getConstant(1, MVT::i32)); 1891 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 1892 DAG.getConstant(0, MVT::i32), ISD::SETNE); 1893 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 1894 DAG.getConstant(1, MVT::i32)); 1895 // Align GprIndex to be even if it isn't 1896 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 1897 GprIndex); 1898 } 1899 1900 // fpr index is 1 byte after gpr 1901 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1902 DAG.getConstant(1, MVT::i32)); 1903 1904 // fpr 1905 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1906 FprPtr, MachinePointerInfo(SV), MVT::i8, 1907 false, false, 0); 1908 InChain = FprIndex.getValue(1); 1909 1910 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1911 DAG.getConstant(8, MVT::i32)); 1912 1913 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1914 DAG.getConstant(4, MVT::i32)); 1915 1916 // areas 1917 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 1918 MachinePointerInfo(), false, false, 1919 false, 0); 1920 InChain = OverflowArea.getValue(1); 1921 1922 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 1923 MachinePointerInfo(), false, false, 1924 false, 0); 1925 InChain = RegSaveArea.getValue(1); 1926 1927 // select overflow_area if index > 8 1928 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 1929 DAG.getConstant(8, MVT::i32), ISD::SETLT); 1930 1931 // adjustment constant gpr_index * 4/8 1932 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 1933 VT.isInteger() ? GprIndex : FprIndex, 1934 DAG.getConstant(VT.isInteger() ? 4 : 8, 1935 MVT::i32)); 1936 1937 // OurReg = RegSaveArea + RegConstant 1938 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 1939 RegConstant); 1940 1941 // Floating types are 32 bytes into RegSaveArea 1942 if (VT.isFloatingPoint()) 1943 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 1944 DAG.getConstant(32, MVT::i32)); 1945 1946 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 1947 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 1948 VT.isInteger() ? GprIndex : FprIndex, 1949 DAG.getConstant(VT == MVT::i64 ? 2 : 1, 1950 MVT::i32)); 1951 1952 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 1953 VT.isInteger() ? VAListPtr : FprPtr, 1954 MachinePointerInfo(SV), 1955 MVT::i8, false, false, 0); 1956 1957 // determine if we should load from reg_save_area or overflow_area 1958 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 1959 1960 // increase overflow_area by 4/8 if gpr/fpr > 8 1961 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 1962 DAG.getConstant(VT.isInteger() ? 4 : 8, 1963 MVT::i32)); 1964 1965 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 1966 OverflowAreaPlusN); 1967 1968 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 1969 OverflowAreaPtr, 1970 MachinePointerInfo(), 1971 MVT::i32, false, false, 0); 1972 1973 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 1974 false, false, false, 0); 1975 } 1976 1977 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, 1978 const PPCSubtarget &Subtarget) const { 1979 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 1980 1981 // We have to copy the entire va_list struct: 1982 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 1983 return DAG.getMemcpy(Op.getOperand(0), Op, 1984 Op.getOperand(1), Op.getOperand(2), 1985 DAG.getConstant(12, MVT::i32), 8, false, true, 1986 MachinePointerInfo(), MachinePointerInfo()); 1987 } 1988 1989 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 1990 SelectionDAG &DAG) const { 1991 return Op.getOperand(0); 1992 } 1993 1994 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 1995 SelectionDAG &DAG) const { 1996 SDValue Chain = Op.getOperand(0); 1997 SDValue Trmp = Op.getOperand(1); // trampoline 1998 SDValue FPtr = Op.getOperand(2); // nested function 1999 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 2000 SDLoc dl(Op); 2001 2002 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2003 bool isPPC64 = (PtrVT == MVT::i64); 2004 Type *IntPtrTy = 2005 DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( 2006 *DAG.getContext()); 2007 2008 TargetLowering::ArgListTy Args; 2009 TargetLowering::ArgListEntry Entry; 2010 2011 Entry.Ty = IntPtrTy; 2012 Entry.Node = Trmp; Args.push_back(Entry); 2013 2014 // TrampSize == (isPPC64 ? 48 : 40); 2015 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, 2016 isPPC64 ? MVT::i64 : MVT::i32); 2017 Args.push_back(Entry); 2018 2019 Entry.Node = FPtr; Args.push_back(Entry); 2020 Entry.Node = Nest; Args.push_back(Entry); 2021 2022 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 2023 TargetLowering::CallLoweringInfo CLI(DAG); 2024 CLI.setDebugLoc(dl).setChain(Chain) 2025 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 2026 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 2027 std::move(Args), 0); 2028 2029 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2030 return CallResult.second; 2031 } 2032 2033 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 2034 const PPCSubtarget &Subtarget) const { 2035 MachineFunction &MF = DAG.getMachineFunction(); 2036 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2037 2038 SDLoc dl(Op); 2039 2040 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 2041 // vastart just stores the address of the VarArgsFrameIndex slot into the 2042 // memory location argument. 2043 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2044 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2045 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2046 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2047 MachinePointerInfo(SV), 2048 false, false, 0); 2049 } 2050 2051 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 2052 // We suppose the given va_list is already allocated. 2053 // 2054 // typedef struct { 2055 // char gpr; /* index into the array of 8 GPRs 2056 // * stored in the register save area 2057 // * gpr=0 corresponds to r3, 2058 // * gpr=1 to r4, etc. 2059 // */ 2060 // char fpr; /* index into the array of 8 FPRs 2061 // * stored in the register save area 2062 // * fpr=0 corresponds to f1, 2063 // * fpr=1 to f2, etc. 2064 // */ 2065 // char *overflow_arg_area; 2066 // /* location on stack that holds 2067 // * the next overflow argument 2068 // */ 2069 // char *reg_save_area; 2070 // /* where r3:r10 and f1:f8 (if saved) 2071 // * are stored 2072 // */ 2073 // } va_list[1]; 2074 2075 2076 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); 2077 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); 2078 2079 2080 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2081 2082 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 2083 PtrVT); 2084 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2085 PtrVT); 2086 2087 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 2088 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 2089 2090 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 2091 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 2092 2093 uint64_t FPROffset = 1; 2094 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 2095 2096 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2097 2098 // Store first byte : number of int regs 2099 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 2100 Op.getOperand(1), 2101 MachinePointerInfo(SV), 2102 MVT::i8, false, false, 0); 2103 uint64_t nextOffset = FPROffset; 2104 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 2105 ConstFPROffset); 2106 2107 // Store second byte : number of float regs 2108 SDValue secondStore = 2109 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 2110 MachinePointerInfo(SV, nextOffset), MVT::i8, 2111 false, false, 0); 2112 nextOffset += StackOffset; 2113 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 2114 2115 // Store second word : arguments given on stack 2116 SDValue thirdStore = 2117 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 2118 MachinePointerInfo(SV, nextOffset), 2119 false, false, 0); 2120 nextOffset += FrameOffset; 2121 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 2122 2123 // Store third word : arguments given in registers 2124 return DAG.getStore(thirdStore, dl, FR, nextPtr, 2125 MachinePointerInfo(SV, nextOffset), 2126 false, false, 0); 2127 2128 } 2129 2130 #include "PPCGenCallingConv.inc" 2131 2132 // Function whose sole purpose is to kill compiler warnings 2133 // stemming from unused functions included from PPCGenCallingConv.inc. 2134 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 2135 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 2136 } 2137 2138 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 2139 CCValAssign::LocInfo &LocInfo, 2140 ISD::ArgFlagsTy &ArgFlags, 2141 CCState &State) { 2142 return true; 2143 } 2144 2145 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 2146 MVT &LocVT, 2147 CCValAssign::LocInfo &LocInfo, 2148 ISD::ArgFlagsTy &ArgFlags, 2149 CCState &State) { 2150 static const MCPhysReg ArgRegs[] = { 2151 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2152 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2153 }; 2154 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2155 2156 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 2157 2158 // Skip one register if the first unallocated register has an even register 2159 // number and there are still argument registers available which have not been 2160 // allocated yet. RegNum is actually an index into ArgRegs, which means we 2161 // need to skip a register if RegNum is odd. 2162 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 2163 State.AllocateReg(ArgRegs[RegNum]); 2164 } 2165 2166 // Always return false here, as this function only makes sure that the first 2167 // unallocated register has an odd register number and does not actually 2168 // allocate a register for the current argument. 2169 return false; 2170 } 2171 2172 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 2173 MVT &LocVT, 2174 CCValAssign::LocInfo &LocInfo, 2175 ISD::ArgFlagsTy &ArgFlags, 2176 CCState &State) { 2177 static const MCPhysReg ArgRegs[] = { 2178 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2179 PPC::F8 2180 }; 2181 2182 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2183 2184 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 2185 2186 // If there is only one Floating-point register left we need to put both f64 2187 // values of a split ppc_fp128 value on the stack. 2188 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 2189 State.AllocateReg(ArgRegs[RegNum]); 2190 } 2191 2192 // Always return false here, as this function only makes sure that the two f64 2193 // values a ppc_fp128 value is split into are both passed in registers or both 2194 // passed on the stack and does not actually allocate a register for the 2195 // current argument. 2196 return false; 2197 } 2198 2199 /// GetFPR - Get the set of FP registers that should be allocated for arguments, 2200 /// on Darwin. 2201 static const MCPhysReg *GetFPR() { 2202 static const MCPhysReg FPR[] = { 2203 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2204 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 2205 }; 2206 2207 return FPR; 2208 } 2209 2210 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 2211 /// the stack. 2212 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 2213 unsigned PtrByteSize) { 2214 unsigned ArgSize = ArgVT.getStoreSize(); 2215 if (Flags.isByVal()) 2216 ArgSize = Flags.getByValSize(); 2217 2218 // Round up to multiples of the pointer size, except for array members, 2219 // which are always packed. 2220 if (!Flags.isInConsecutiveRegs()) 2221 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2222 2223 return ArgSize; 2224 } 2225 2226 /// CalculateStackSlotAlignment - Calculates the alignment of this argument 2227 /// on the stack. 2228 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 2229 ISD::ArgFlagsTy Flags, 2230 unsigned PtrByteSize) { 2231 unsigned Align = PtrByteSize; 2232 2233 // Altivec parameters are padded to a 16 byte boundary. 2234 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2235 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2236 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) 2237 Align = 16; 2238 2239 // ByVal parameters are aligned as requested. 2240 if (Flags.isByVal()) { 2241 unsigned BVAlign = Flags.getByValAlign(); 2242 if (BVAlign > PtrByteSize) { 2243 if (BVAlign % PtrByteSize != 0) 2244 llvm_unreachable( 2245 "ByVal alignment is not a multiple of the pointer size"); 2246 2247 Align = BVAlign; 2248 } 2249 } 2250 2251 // Array members are always packed to their original alignment. 2252 if (Flags.isInConsecutiveRegs()) { 2253 // If the array member was split into multiple registers, the first 2254 // needs to be aligned to the size of the full type. (Except for 2255 // ppcf128, which is only aligned as its f64 components.) 2256 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 2257 Align = OrigVT.getStoreSize(); 2258 else 2259 Align = ArgVT.getStoreSize(); 2260 } 2261 2262 return Align; 2263 } 2264 2265 /// CalculateStackSlotUsed - Return whether this argument will use its 2266 /// stack slot (instead of being passed in registers). ArgOffset, 2267 /// AvailableFPRs, and AvailableVRs must hold the current argument 2268 /// position, and will be updated to account for this argument. 2269 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 2270 ISD::ArgFlagsTy Flags, 2271 unsigned PtrByteSize, 2272 unsigned LinkageSize, 2273 unsigned ParamAreaSize, 2274 unsigned &ArgOffset, 2275 unsigned &AvailableFPRs, 2276 unsigned &AvailableVRs) { 2277 bool UseMemory = false; 2278 2279 // Respect alignment of argument on the stack. 2280 unsigned Align = 2281 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 2282 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2283 // If there's no space left in the argument save area, we must 2284 // use memory (this check also catches zero-sized arguments). 2285 if (ArgOffset >= LinkageSize + ParamAreaSize) 2286 UseMemory = true; 2287 2288 // Allocate argument on the stack. 2289 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2290 if (Flags.isInConsecutiveRegsLast()) 2291 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2292 // If we overran the argument save area, we must use memory 2293 // (this check catches arguments passed partially in memory) 2294 if (ArgOffset > LinkageSize + ParamAreaSize) 2295 UseMemory = true; 2296 2297 // However, if the argument is actually passed in an FPR or a VR, 2298 // we don't use memory after all. 2299 if (!Flags.isByVal()) { 2300 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 2301 if (AvailableFPRs > 0) { 2302 --AvailableFPRs; 2303 return false; 2304 } 2305 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2306 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2307 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) 2308 if (AvailableVRs > 0) { 2309 --AvailableVRs; 2310 return false; 2311 } 2312 } 2313 2314 return UseMemory; 2315 } 2316 2317 /// EnsureStackAlignment - Round stack frame size up from NumBytes to 2318 /// ensure minimum alignment required for target. 2319 static unsigned EnsureStackAlignment(const TargetMachine &Target, 2320 unsigned NumBytes) { 2321 unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment(); 2322 unsigned AlignMask = TargetAlign - 1; 2323 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2324 return NumBytes; 2325 } 2326 2327 SDValue 2328 PPCTargetLowering::LowerFormalArguments(SDValue Chain, 2329 CallingConv::ID CallConv, bool isVarArg, 2330 const SmallVectorImpl<ISD::InputArg> 2331 &Ins, 2332 SDLoc dl, SelectionDAG &DAG, 2333 SmallVectorImpl<SDValue> &InVals) 2334 const { 2335 if (Subtarget.isSVR4ABI()) { 2336 if (Subtarget.isPPC64()) 2337 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 2338 dl, DAG, InVals); 2339 else 2340 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 2341 dl, DAG, InVals); 2342 } else { 2343 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 2344 dl, DAG, InVals); 2345 } 2346 } 2347 2348 SDValue 2349 PPCTargetLowering::LowerFormalArguments_32SVR4( 2350 SDValue Chain, 2351 CallingConv::ID CallConv, bool isVarArg, 2352 const SmallVectorImpl<ISD::InputArg> 2353 &Ins, 2354 SDLoc dl, SelectionDAG &DAG, 2355 SmallVectorImpl<SDValue> &InVals) const { 2356 2357 // 32-bit SVR4 ABI Stack Frame Layout: 2358 // +-----------------------------------+ 2359 // +--> | Back chain | 2360 // | +-----------------------------------+ 2361 // | | Floating-point register save area | 2362 // | +-----------------------------------+ 2363 // | | General register save area | 2364 // | +-----------------------------------+ 2365 // | | CR save word | 2366 // | +-----------------------------------+ 2367 // | | VRSAVE save word | 2368 // | +-----------------------------------+ 2369 // | | Alignment padding | 2370 // | +-----------------------------------+ 2371 // | | Vector register save area | 2372 // | +-----------------------------------+ 2373 // | | Local variable space | 2374 // | +-----------------------------------+ 2375 // | | Parameter list area | 2376 // | +-----------------------------------+ 2377 // | | LR save word | 2378 // | +-----------------------------------+ 2379 // SP--> +--- | Back chain | 2380 // +-----------------------------------+ 2381 // 2382 // Specifications: 2383 // System V Application Binary Interface PowerPC Processor Supplement 2384 // AltiVec Technology Programming Interface Manual 2385 2386 MachineFunction &MF = DAG.getMachineFunction(); 2387 MachineFrameInfo *MFI = MF.getFrameInfo(); 2388 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2389 2390 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2391 // Potential tail calls could cause overwriting of argument stack slots. 2392 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2393 (CallConv == CallingConv::Fast)); 2394 unsigned PtrByteSize = 4; 2395 2396 // Assign locations to all of the incoming arguments. 2397 SmallVector<CCValAssign, 16> ArgLocs; 2398 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2399 getTargetMachine(), ArgLocs, *DAG.getContext()); 2400 2401 // Reserve space for the linkage area on the stack. 2402 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false); 2403 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 2404 2405 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 2406 2407 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2408 CCValAssign &VA = ArgLocs[i]; 2409 2410 // Arguments stored in registers. 2411 if (VA.isRegLoc()) { 2412 const TargetRegisterClass *RC; 2413 EVT ValVT = VA.getValVT(); 2414 2415 switch (ValVT.getSimpleVT().SimpleTy) { 2416 default: 2417 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 2418 case MVT::i1: 2419 case MVT::i32: 2420 RC = &PPC::GPRCRegClass; 2421 break; 2422 case MVT::f32: 2423 RC = &PPC::F4RCRegClass; 2424 break; 2425 case MVT::f64: 2426 if (Subtarget.hasVSX()) 2427 RC = &PPC::VSFRCRegClass; 2428 else 2429 RC = &PPC::F8RCRegClass; 2430 break; 2431 case MVT::v16i8: 2432 case MVT::v8i16: 2433 case MVT::v4i32: 2434 case MVT::v4f32: 2435 RC = &PPC::VRRCRegClass; 2436 break; 2437 case MVT::v2f64: 2438 case MVT::v2i64: 2439 RC = &PPC::VSHRCRegClass; 2440 break; 2441 } 2442 2443 // Transform the arguments stored in physical registers into virtual ones. 2444 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2445 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 2446 ValVT == MVT::i1 ? MVT::i32 : ValVT); 2447 2448 if (ValVT == MVT::i1) 2449 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 2450 2451 InVals.push_back(ArgValue); 2452 } else { 2453 // Argument stored in memory. 2454 assert(VA.isMemLoc()); 2455 2456 unsigned ArgSize = VA.getLocVT().getStoreSize(); 2457 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 2458 isImmutable); 2459 2460 // Create load nodes to retrieve arguments from the stack. 2461 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2462 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2463 MachinePointerInfo(), 2464 false, false, false, 0)); 2465 } 2466 } 2467 2468 // Assign locations to all of the incoming aggregate by value arguments. 2469 // Aggregates passed by value are stored in the local variable space of the 2470 // caller's stack frame, right above the parameter list area. 2471 SmallVector<CCValAssign, 16> ByValArgLocs; 2472 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2473 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 2474 2475 // Reserve stack space for the allocations in CCInfo. 2476 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 2477 2478 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 2479 2480 // Area that is at least reserved in the caller of this function. 2481 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 2482 MinReservedArea = std::max(MinReservedArea, LinkageSize); 2483 2484 // Set the size that is at least reserved in caller of this function. Tail 2485 // call optimized function's reserved stack space needs to be aligned so that 2486 // taking the difference between two stack areas will result in an aligned 2487 // stack. 2488 MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); 2489 FuncInfo->setMinReservedArea(MinReservedArea); 2490 2491 SmallVector<SDValue, 8> MemOps; 2492 2493 // If the function takes variable number of arguments, make a frame index for 2494 // the start of the first vararg value... for expansion of llvm.va_start. 2495 if (isVarArg) { 2496 static const MCPhysReg GPArgRegs[] = { 2497 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2498 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2499 }; 2500 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 2501 2502 static const MCPhysReg FPArgRegs[] = { 2503 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2504 PPC::F8 2505 }; 2506 const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 2507 2508 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, 2509 NumGPArgRegs)); 2510 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, 2511 NumFPArgRegs)); 2512 2513 // Make room for NumGPArgRegs and NumFPArgRegs. 2514 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 2515 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; 2516 2517 FuncInfo->setVarArgsStackOffset( 2518 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2519 CCInfo.getNextStackOffset(), true)); 2520 2521 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 2522 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2523 2524 // The fixed integer arguments of a variadic function are stored to the 2525 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 2526 // the result of va_next. 2527 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 2528 // Get an existing live-in vreg, or add a new one. 2529 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 2530 if (!VReg) 2531 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 2532 2533 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2534 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2535 MachinePointerInfo(), false, false, 0); 2536 MemOps.push_back(Store); 2537 // Increment the address by four for the next argument to store 2538 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2539 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2540 } 2541 2542 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 2543 // is set. 2544 // The double arguments are stored to the VarArgsFrameIndex 2545 // on the stack. 2546 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 2547 // Get an existing live-in vreg, or add a new one. 2548 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 2549 if (!VReg) 2550 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 2551 2552 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 2553 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2554 MachinePointerInfo(), false, false, 0); 2555 MemOps.push_back(Store); 2556 // Increment the address by eight for the next argument to store 2557 SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, 2558 PtrVT); 2559 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2560 } 2561 } 2562 2563 if (!MemOps.empty()) 2564 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 2565 2566 return Chain; 2567 } 2568 2569 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2570 // value to MVT::i64 and then truncate to the correct register size. 2571 SDValue 2572 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 2573 SelectionDAG &DAG, SDValue ArgVal, 2574 SDLoc dl) const { 2575 if (Flags.isSExt()) 2576 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 2577 DAG.getValueType(ObjectVT)); 2578 else if (Flags.isZExt()) 2579 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 2580 DAG.getValueType(ObjectVT)); 2581 2582 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 2583 } 2584 2585 SDValue 2586 PPCTargetLowering::LowerFormalArguments_64SVR4( 2587 SDValue Chain, 2588 CallingConv::ID CallConv, bool isVarArg, 2589 const SmallVectorImpl<ISD::InputArg> 2590 &Ins, 2591 SDLoc dl, SelectionDAG &DAG, 2592 SmallVectorImpl<SDValue> &InVals) const { 2593 // TODO: add description of PPC stack frame format, or at least some docs. 2594 // 2595 bool isELFv2ABI = Subtarget.isELFv2ABI(); 2596 bool isLittleEndian = Subtarget.isLittleEndian(); 2597 MachineFunction &MF = DAG.getMachineFunction(); 2598 MachineFrameInfo *MFI = MF.getFrameInfo(); 2599 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2600 2601 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2602 // Potential tail calls could cause overwriting of argument stack slots. 2603 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2604 (CallConv == CallingConv::Fast)); 2605 unsigned PtrByteSize = 8; 2606 2607 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, 2608 isELFv2ABI); 2609 2610 static const MCPhysReg GPR[] = { 2611 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2612 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2613 }; 2614 2615 static const MCPhysReg *FPR = GetFPR(); 2616 2617 static const MCPhysReg VR[] = { 2618 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2619 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2620 }; 2621 static const MCPhysReg VSRH[] = { 2622 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 2623 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 2624 }; 2625 2626 const unsigned Num_GPR_Regs = array_lengthof(GPR); 2627 const unsigned Num_FPR_Regs = 13; 2628 const unsigned Num_VR_Regs = array_lengthof(VR); 2629 2630 // Do a first pass over the arguments to determine whether the ABI 2631 // guarantees that our caller has allocated the parameter save area 2632 // on its stack frame. In the ELFv1 ABI, this is always the case; 2633 // in the ELFv2 ABI, it is true if this is a vararg function or if 2634 // any parameter is located in a stack slot. 2635 2636 bool HasParameterArea = !isELFv2ABI || isVarArg; 2637 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 2638 unsigned NumBytes = LinkageSize; 2639 unsigned AvailableFPRs = Num_FPR_Regs; 2640 unsigned AvailableVRs = Num_VR_Regs; 2641 for (unsigned i = 0, e = Ins.size(); i != e; ++i) 2642 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 2643 PtrByteSize, LinkageSize, ParamAreaSize, 2644 NumBytes, AvailableFPRs, AvailableVRs)) 2645 HasParameterArea = true; 2646 2647 // Add DAG nodes to load the arguments or copy them out of registers. On 2648 // entry to a function on PPC, the arguments start after the linkage area, 2649 // although the first ones are often in registers. 2650 2651 unsigned ArgOffset = LinkageSize; 2652 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; 2653 SmallVector<SDValue, 8> MemOps; 2654 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2655 unsigned CurArgIdx = 0; 2656 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2657 SDValue ArgVal; 2658 bool needsLoad = false; 2659 EVT ObjectVT = Ins[ArgNo].VT; 2660 EVT OrigVT = Ins[ArgNo].ArgVT; 2661 unsigned ObjSize = ObjectVT.getStoreSize(); 2662 unsigned ArgSize = ObjSize; 2663 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2664 std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); 2665 CurArgIdx = Ins[ArgNo].OrigArgIndex; 2666 2667 /* Respect alignment of argument on the stack. */ 2668 unsigned Align = 2669 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); 2670 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2671 unsigned CurArgOffset = ArgOffset; 2672 2673 /* Compute GPR index associated with argument offset. */ 2674 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 2675 GPR_idx = std::min(GPR_idx, Num_GPR_Regs); 2676 2677 // FIXME the codegen can be much improved in some cases. 2678 // We do not have to keep everything in memory. 2679 if (Flags.isByVal()) { 2680 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2681 ObjSize = Flags.getByValSize(); 2682 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2683 // Empty aggregate parameters do not take up registers. Examples: 2684 // struct { } a; 2685 // union { } b; 2686 // int c[0]; 2687 // etc. However, we have to provide a place-holder in InVals, so 2688 // pretend we have an 8-byte item at the current address for that 2689 // purpose. 2690 if (!ObjSize) { 2691 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2692 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2693 InVals.push_back(FIN); 2694 continue; 2695 } 2696 2697 // Create a stack object covering all stack doublewords occupied 2698 // by the argument. If the argument is (fully or partially) on 2699 // the stack, or if the argument is fully in registers but the 2700 // caller has allocated the parameter save anyway, we can refer 2701 // directly to the caller's stack frame. Otherwise, create a 2702 // local copy in our own frame. 2703 int FI; 2704 if (HasParameterArea || 2705 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) 2706 FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false); 2707 else 2708 FI = MFI->CreateStackObject(ArgSize, Align, false); 2709 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2710 2711 // Handle aggregates smaller than 8 bytes. 2712 if (ObjSize < PtrByteSize) { 2713 // The value of the object is its address, which differs from the 2714 // address of the enclosing doubleword on big-endian systems. 2715 SDValue Arg = FIN; 2716 if (!isLittleEndian) { 2717 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT); 2718 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); 2719 } 2720 InVals.push_back(Arg); 2721 2722 if (GPR_idx != Num_GPR_Regs) { 2723 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2724 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2725 SDValue Store; 2726 2727 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 2728 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 2729 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 2730 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, 2731 MachinePointerInfo(FuncArg), 2732 ObjType, false, false, 0); 2733 } else { 2734 // For sizes that don't fit a truncating store (3, 5, 6, 7), 2735 // store the whole register as-is to the parameter save area 2736 // slot. 2737 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2738 MachinePointerInfo(FuncArg), 2739 false, false, 0); 2740 } 2741 2742 MemOps.push_back(Store); 2743 } 2744 // Whether we copied from a register or not, advance the offset 2745 // into the parameter save area by a full doubleword. 2746 ArgOffset += PtrByteSize; 2747 continue; 2748 } 2749 2750 // The value of the object is its address, which is the address of 2751 // its first stack doubleword. 2752 InVals.push_back(FIN); 2753 2754 // Store whatever pieces of the object are in registers to memory. 2755 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2756 if (GPR_idx == Num_GPR_Regs) 2757 break; 2758 2759 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2760 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2761 SDValue Addr = FIN; 2762 if (j) { 2763 SDValue Off = DAG.getConstant(j, PtrVT); 2764 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); 2765 } 2766 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, 2767 MachinePointerInfo(FuncArg, j), 2768 false, false, 0); 2769 MemOps.push_back(Store); 2770 ++GPR_idx; 2771 } 2772 ArgOffset += ArgSize; 2773 continue; 2774 } 2775 2776 switch (ObjectVT.getSimpleVT().SimpleTy) { 2777 default: llvm_unreachable("Unhandled argument type!"); 2778 case MVT::i1: 2779 case MVT::i32: 2780 case MVT::i64: 2781 // These can be scalar arguments or elements of an integer array type 2782 // passed directly. Clang may use those instead of "byval" aggregate 2783 // types to avoid forcing arguments to memory unnecessarily. 2784 if (GPR_idx != Num_GPR_Regs) { 2785 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2786 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2787 2788 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 2789 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2790 // value to MVT::i64 and then truncate to the correct register size. 2791 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2792 } else { 2793 needsLoad = true; 2794 ArgSize = PtrByteSize; 2795 } 2796 ArgOffset += 8; 2797 break; 2798 2799 case MVT::f32: 2800 case MVT::f64: 2801 // These can be scalar arguments or elements of a float array type 2802 // passed directly. The latter are used to implement ELFv2 homogenous 2803 // float aggregates. 2804 if (FPR_idx != Num_FPR_Regs) { 2805 unsigned VReg; 2806 2807 if (ObjectVT == MVT::f32) 2808 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2809 else 2810 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ? 2811 &PPC::VSFRCRegClass : 2812 &PPC::F8RCRegClass); 2813 2814 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2815 ++FPR_idx; 2816 } else if (GPR_idx != Num_GPR_Regs) { 2817 // This can only ever happen in the presence of f32 array types, 2818 // since otherwise we never run out of FPRs before running out 2819 // of GPRs. 2820 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2821 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2822 2823 if (ObjectVT == MVT::f32) { 2824 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) 2825 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, 2826 DAG.getConstant(32, MVT::i32)); 2827 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 2828 } 2829 2830 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); 2831 } else { 2832 needsLoad = true; 2833 } 2834 2835 // When passing an array of floats, the array occupies consecutive 2836 // space in the argument area; only round up to the next doubleword 2837 // at the end of the array. Otherwise, each float takes 8 bytes. 2838 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; 2839 ArgOffset += ArgSize; 2840 if (Flags.isInConsecutiveRegsLast()) 2841 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2842 break; 2843 case MVT::v4f32: 2844 case MVT::v4i32: 2845 case MVT::v8i16: 2846 case MVT::v16i8: 2847 case MVT::v2f64: 2848 case MVT::v2i64: 2849 // These can be scalar arguments or elements of a vector array type 2850 // passed directly. The latter are used to implement ELFv2 homogenous 2851 // vector aggregates. 2852 if (VR_idx != Num_VR_Regs) { 2853 unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? 2854 MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : 2855 MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2856 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2857 ++VR_idx; 2858 } else { 2859 needsLoad = true; 2860 } 2861 ArgOffset += 16; 2862 break; 2863 } 2864 2865 // We need to load the argument to a virtual register if we determined 2866 // above that we ran out of physical registers of the appropriate type. 2867 if (needsLoad) { 2868 if (ObjSize < ArgSize && !isLittleEndian) 2869 CurArgOffset += ArgSize - ObjSize; 2870 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable); 2871 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2872 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2873 false, false, false, 0); 2874 } 2875 2876 InVals.push_back(ArgVal); 2877 } 2878 2879 // Area that is at least reserved in the caller of this function. 2880 unsigned MinReservedArea; 2881 if (HasParameterArea) 2882 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); 2883 else 2884 MinReservedArea = LinkageSize; 2885 2886 // Set the size that is at least reserved in caller of this function. Tail 2887 // call optimized functions' reserved stack space needs to be aligned so that 2888 // taking the difference between two stack areas will result in an aligned 2889 // stack. 2890 MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); 2891 FuncInfo->setMinReservedArea(MinReservedArea); 2892 2893 // If the function takes variable number of arguments, make a frame index for 2894 // the start of the first vararg value... for expansion of llvm.va_start. 2895 if (isVarArg) { 2896 int Depth = ArgOffset; 2897 2898 FuncInfo->setVarArgsFrameIndex( 2899 MFI->CreateFixedObject(PtrByteSize, Depth, true)); 2900 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2901 2902 // If this function is vararg, store any remaining integer argument regs 2903 // to their spots on the stack so that they may be loaded by deferencing the 2904 // result of va_next. 2905 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 2906 GPR_idx < Num_GPR_Regs; ++GPR_idx) { 2907 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2908 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2909 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2910 MachinePointerInfo(), false, false, 0); 2911 MemOps.push_back(Store); 2912 // Increment the address by four for the next argument to store 2913 SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT); 2914 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2915 } 2916 } 2917 2918 if (!MemOps.empty()) 2919 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 2920 2921 return Chain; 2922 } 2923 2924 SDValue 2925 PPCTargetLowering::LowerFormalArguments_Darwin( 2926 SDValue Chain, 2927 CallingConv::ID CallConv, bool isVarArg, 2928 const SmallVectorImpl<ISD::InputArg> 2929 &Ins, 2930 SDLoc dl, SelectionDAG &DAG, 2931 SmallVectorImpl<SDValue> &InVals) const { 2932 // TODO: add description of PPC stack frame format, or at least some docs. 2933 // 2934 MachineFunction &MF = DAG.getMachineFunction(); 2935 MachineFrameInfo *MFI = MF.getFrameInfo(); 2936 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2937 2938 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2939 bool isPPC64 = PtrVT == MVT::i64; 2940 // Potential tail calls could cause overwriting of argument stack slots. 2941 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2942 (CallConv == CallingConv::Fast)); 2943 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2944 2945 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true, 2946 false); 2947 unsigned ArgOffset = LinkageSize; 2948 // Area that is at least reserved in caller of this function. 2949 unsigned MinReservedArea = ArgOffset; 2950 2951 static const MCPhysReg GPR_32[] = { // 32-bit registers. 2952 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2953 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2954 }; 2955 static const MCPhysReg GPR_64[] = { // 64-bit registers. 2956 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2957 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2958 }; 2959 2960 static const MCPhysReg *FPR = GetFPR(); 2961 2962 static const MCPhysReg VR[] = { 2963 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2964 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2965 }; 2966 2967 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 2968 const unsigned Num_FPR_Regs = 13; 2969 const unsigned Num_VR_Regs = array_lengthof( VR); 2970 2971 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2972 2973 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 2974 2975 // In 32-bit non-varargs functions, the stack space for vectors is after the 2976 // stack space for non-vectors. We do not use this space unless we have 2977 // too many vectors to fit in registers, something that only occurs in 2978 // constructed examples:), but we have to walk the arglist to figure 2979 // that out...for the pathological case, compute VecArgOffset as the 2980 // start of the vector parameter area. Computing VecArgOffset is the 2981 // entire point of the following loop. 2982 unsigned VecArgOffset = ArgOffset; 2983 if (!isVarArg && !isPPC64) { 2984 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 2985 ++ArgNo) { 2986 EVT ObjectVT = Ins[ArgNo].VT; 2987 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2988 2989 if (Flags.isByVal()) { 2990 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 2991 unsigned ObjSize = Flags.getByValSize(); 2992 unsigned ArgSize = 2993 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2994 VecArgOffset += ArgSize; 2995 continue; 2996 } 2997 2998 switch(ObjectVT.getSimpleVT().SimpleTy) { 2999 default: llvm_unreachable("Unhandled argument type!"); 3000 case MVT::i1: 3001 case MVT::i32: 3002 case MVT::f32: 3003 VecArgOffset += 4; 3004 break; 3005 case MVT::i64: // PPC64 3006 case MVT::f64: 3007 // FIXME: We are guaranteed to be !isPPC64 at this point. 3008 // Does MVT::i64 apply? 3009 VecArgOffset += 8; 3010 break; 3011 case MVT::v4f32: 3012 case MVT::v4i32: 3013 case MVT::v8i16: 3014 case MVT::v16i8: 3015 // Nothing to do, we're only looking at Nonvector args here. 3016 break; 3017 } 3018 } 3019 } 3020 // We've found where the vector parameter area in memory is. Skip the 3021 // first 12 parameters; these don't use that memory. 3022 VecArgOffset = ((VecArgOffset+15)/16)*16; 3023 VecArgOffset += 12*16; 3024 3025 // Add DAG nodes to load the arguments or copy them out of registers. On 3026 // entry to a function on PPC, the arguments start after the linkage area, 3027 // although the first ones are often in registers. 3028 3029 SmallVector<SDValue, 8> MemOps; 3030 unsigned nAltivecParamsAtEnd = 0; 3031 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3032 unsigned CurArgIdx = 0; 3033 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3034 SDValue ArgVal; 3035 bool needsLoad = false; 3036 EVT ObjectVT = Ins[ArgNo].VT; 3037 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 3038 unsigned ArgSize = ObjSize; 3039 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3040 std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); 3041 CurArgIdx = Ins[ArgNo].OrigArgIndex; 3042 3043 unsigned CurArgOffset = ArgOffset; 3044 3045 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 3046 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 3047 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 3048 if (isVarArg || isPPC64) { 3049 MinReservedArea = ((MinReservedArea+15)/16)*16; 3050 MinReservedArea += CalculateStackSlotSize(ObjectVT, 3051 Flags, 3052 PtrByteSize); 3053 } else nAltivecParamsAtEnd++; 3054 } else 3055 // Calculate min reserved area. 3056 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 3057 Flags, 3058 PtrByteSize); 3059 3060 // FIXME the codegen can be much improved in some cases. 3061 // We do not have to keep everything in memory. 3062 if (Flags.isByVal()) { 3063 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3064 ObjSize = Flags.getByValSize(); 3065 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3066 // Objects of size 1 and 2 are right justified, everything else is 3067 // left justified. This means the memory address is adjusted forwards. 3068 if (ObjSize==1 || ObjSize==2) { 3069 CurArgOffset = CurArgOffset + (4 - ObjSize); 3070 } 3071 // The value of the object is its address. 3072 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false); 3073 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3074 InVals.push_back(FIN); 3075 if (ObjSize==1 || ObjSize==2) { 3076 if (GPR_idx != Num_GPR_Regs) { 3077 unsigned VReg; 3078 if (isPPC64) 3079 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3080 else 3081 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3082 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3083 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 3084 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 3085 MachinePointerInfo(FuncArg), 3086 ObjType, false, false, 0); 3087 MemOps.push_back(Store); 3088 ++GPR_idx; 3089 } 3090 3091 ArgOffset += PtrByteSize; 3092 3093 continue; 3094 } 3095 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3096 // Store whatever pieces of the object are in registers 3097 // to memory. ArgOffset will be the address of the beginning 3098 // of the object. 3099 if (GPR_idx != Num_GPR_Regs) { 3100 unsigned VReg; 3101 if (isPPC64) 3102 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3103 else 3104 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3105 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 3106 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3107 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3108 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3109 MachinePointerInfo(FuncArg, j), 3110 false, false, 0); 3111 MemOps.push_back(Store); 3112 ++GPR_idx; 3113 ArgOffset += PtrByteSize; 3114 } else { 3115 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 3116 break; 3117 } 3118 } 3119 continue; 3120 } 3121 3122 switch (ObjectVT.getSimpleVT().SimpleTy) { 3123 default: llvm_unreachable("Unhandled argument type!"); 3124 case MVT::i1: 3125 case MVT::i32: 3126 if (!isPPC64) { 3127 if (GPR_idx != Num_GPR_Regs) { 3128 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3129 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 3130 3131 if (ObjectVT == MVT::i1) 3132 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 3133 3134 ++GPR_idx; 3135 } else { 3136 needsLoad = true; 3137 ArgSize = PtrByteSize; 3138 } 3139 // All int arguments reserve stack space in the Darwin ABI. 3140 ArgOffset += PtrByteSize; 3141 break; 3142 } 3143 // FALLTHROUGH 3144 case MVT::i64: // PPC64 3145 if (GPR_idx != Num_GPR_Regs) { 3146 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3147 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3148 3149 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3150 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3151 // value to MVT::i64 and then truncate to the correct register size. 3152 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3153 3154 ++GPR_idx; 3155 } else { 3156 needsLoad = true; 3157 ArgSize = PtrByteSize; 3158 } 3159 // All int arguments reserve stack space in the Darwin ABI. 3160 ArgOffset += 8; 3161 break; 3162 3163 case MVT::f32: 3164 case MVT::f64: 3165 // Every 4 bytes of argument space consumes one of the GPRs available for 3166 // argument passing. 3167 if (GPR_idx != Num_GPR_Regs) { 3168 ++GPR_idx; 3169 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 3170 ++GPR_idx; 3171 } 3172 if (FPR_idx != Num_FPR_Regs) { 3173 unsigned VReg; 3174 3175 if (ObjectVT == MVT::f32) 3176 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 3177 else 3178 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 3179 3180 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3181 ++FPR_idx; 3182 } else { 3183 needsLoad = true; 3184 } 3185 3186 // All FP arguments reserve stack space in the Darwin ABI. 3187 ArgOffset += isPPC64 ? 8 : ObjSize; 3188 break; 3189 case MVT::v4f32: 3190 case MVT::v4i32: 3191 case MVT::v8i16: 3192 case MVT::v16i8: 3193 // Note that vector arguments in registers don't reserve stack space, 3194 // except in varargs functions. 3195 if (VR_idx != Num_VR_Regs) { 3196 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3197 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3198 if (isVarArg) { 3199 while ((ArgOffset % 16) != 0) { 3200 ArgOffset += PtrByteSize; 3201 if (GPR_idx != Num_GPR_Regs) 3202 GPR_idx++; 3203 } 3204 ArgOffset += 16; 3205 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 3206 } 3207 ++VR_idx; 3208 } else { 3209 if (!isVarArg && !isPPC64) { 3210 // Vectors go after all the nonvectors. 3211 CurArgOffset = VecArgOffset; 3212 VecArgOffset += 16; 3213 } else { 3214 // Vectors are aligned. 3215 ArgOffset = ((ArgOffset+15)/16)*16; 3216 CurArgOffset = ArgOffset; 3217 ArgOffset += 16; 3218 } 3219 needsLoad = true; 3220 } 3221 break; 3222 } 3223 3224 // We need to load the argument to a virtual register if we determined above 3225 // that we ran out of physical registers of the appropriate type. 3226 if (needsLoad) { 3227 int FI = MFI->CreateFixedObject(ObjSize, 3228 CurArgOffset + (ArgSize - ObjSize), 3229 isImmutable); 3230 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3231 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 3232 false, false, false, 0); 3233 } 3234 3235 InVals.push_back(ArgVal); 3236 } 3237 3238 // Allow for Altivec parameters at the end, if needed. 3239 if (nAltivecParamsAtEnd) { 3240 MinReservedArea = ((MinReservedArea+15)/16)*16; 3241 MinReservedArea += 16*nAltivecParamsAtEnd; 3242 } 3243 3244 // Area that is at least reserved in the caller of this function. 3245 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 3246 3247 // Set the size that is at least reserved in caller of this function. Tail 3248 // call optimized functions' reserved stack space needs to be aligned so that 3249 // taking the difference between two stack areas will result in an aligned 3250 // stack. 3251 MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); 3252 FuncInfo->setMinReservedArea(MinReservedArea); 3253 3254 // If the function takes variable number of arguments, make a frame index for 3255 // the start of the first vararg value... for expansion of llvm.va_start. 3256 if (isVarArg) { 3257 int Depth = ArgOffset; 3258 3259 FuncInfo->setVarArgsFrameIndex( 3260 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 3261 Depth, true)); 3262 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3263 3264 // If this function is vararg, store any remaining integer argument regs 3265 // to their spots on the stack so that they may be loaded by deferencing the 3266 // result of va_next. 3267 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 3268 unsigned VReg; 3269 3270 if (isPPC64) 3271 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3272 else 3273 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3274 3275 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3276 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3277 MachinePointerInfo(), false, false, 0); 3278 MemOps.push_back(Store); 3279 // Increment the address by four for the next argument to store 3280 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 3281 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3282 } 3283 } 3284 3285 if (!MemOps.empty()) 3286 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3287 3288 return Chain; 3289 } 3290 3291 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 3292 /// adjusted to accommodate the arguments for the tailcall. 3293 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 3294 unsigned ParamSize) { 3295 3296 if (!isTailCall) return 0; 3297 3298 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 3299 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 3300 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 3301 // Remember only if the new adjustement is bigger. 3302 if (SPDiff < FI->getTailCallSPDelta()) 3303 FI->setTailCallSPDelta(SPDiff); 3304 3305 return SPDiff; 3306 } 3307 3308 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 3309 /// for tail call optimization. Targets which want to do tail call 3310 /// optimization should implement this function. 3311 bool 3312 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 3313 CallingConv::ID CalleeCC, 3314 bool isVarArg, 3315 const SmallVectorImpl<ISD::InputArg> &Ins, 3316 SelectionDAG& DAG) const { 3317 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 3318 return false; 3319 3320 // Variable argument functions are not supported. 3321 if (isVarArg) 3322 return false; 3323 3324 MachineFunction &MF = DAG.getMachineFunction(); 3325 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 3326 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 3327 // Functions containing by val parameters are not supported. 3328 for (unsigned i = 0; i != Ins.size(); i++) { 3329 ISD::ArgFlagsTy Flags = Ins[i].Flags; 3330 if (Flags.isByVal()) return false; 3331 } 3332 3333 // Non-PIC/GOT tail calls are supported. 3334 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 3335 return true; 3336 3337 // At the moment we can only do local tail calls (in same module, hidden 3338 // or protected) if we are generating PIC. 3339 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3340 return G->getGlobal()->hasHiddenVisibility() 3341 || G->getGlobal()->hasProtectedVisibility(); 3342 } 3343 3344 return false; 3345 } 3346 3347 /// isCallCompatibleAddress - Return the immediate to use if the specified 3348 /// 32-bit value is representable in the immediate field of a BxA instruction. 3349 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 3350 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 3351 if (!C) return nullptr; 3352 3353 int Addr = C->getZExtValue(); 3354 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 3355 SignExtend32<26>(Addr) != Addr) 3356 return nullptr; // Top 6 bits have to be sext of immediate. 3357 3358 return DAG.getConstant((int)C->getZExtValue() >> 2, 3359 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 3360 } 3361 3362 namespace { 3363 3364 struct TailCallArgumentInfo { 3365 SDValue Arg; 3366 SDValue FrameIdxOp; 3367 int FrameIdx; 3368 3369 TailCallArgumentInfo() : FrameIdx(0) {} 3370 }; 3371 3372 } 3373 3374 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 3375 static void 3376 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 3377 SDValue Chain, 3378 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 3379 SmallVectorImpl<SDValue> &MemOpChains, 3380 SDLoc dl) { 3381 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 3382 SDValue Arg = TailCallArgs[i].Arg; 3383 SDValue FIN = TailCallArgs[i].FrameIdxOp; 3384 int FI = TailCallArgs[i].FrameIdx; 3385 // Store relative to framepointer. 3386 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, 3387 MachinePointerInfo::getFixedStack(FI), 3388 false, false, 0)); 3389 } 3390 } 3391 3392 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 3393 /// the appropriate stack slot for the tail call optimized function call. 3394 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 3395 MachineFunction &MF, 3396 SDValue Chain, 3397 SDValue OldRetAddr, 3398 SDValue OldFP, 3399 int SPDiff, 3400 bool isPPC64, 3401 bool isDarwinABI, 3402 SDLoc dl) { 3403 if (SPDiff) { 3404 // Calculate the new stack slot for the return address. 3405 int SlotSize = isPPC64 ? 8 : 4; 3406 int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, 3407 isDarwinABI); 3408 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 3409 NewRetAddrLoc, true); 3410 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3411 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 3412 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 3413 MachinePointerInfo::getFixedStack(NewRetAddr), 3414 false, false, 0); 3415 3416 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 3417 // slot as the FP is never overwritten. 3418 if (isDarwinABI) { 3419 int NewFPLoc = 3420 SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); 3421 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 3422 true); 3423 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 3424 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 3425 MachinePointerInfo::getFixedStack(NewFPIdx), 3426 false, false, 0); 3427 } 3428 } 3429 return Chain; 3430 } 3431 3432 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 3433 /// the position of the argument. 3434 static void 3435 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 3436 SDValue Arg, int SPDiff, unsigned ArgOffset, 3437 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 3438 int Offset = ArgOffset + SPDiff; 3439 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 3440 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 3441 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3442 SDValue FIN = DAG.getFrameIndex(FI, VT); 3443 TailCallArgumentInfo Info; 3444 Info.Arg = Arg; 3445 Info.FrameIdxOp = FIN; 3446 Info.FrameIdx = FI; 3447 TailCallArguments.push_back(Info); 3448 } 3449 3450 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 3451 /// stack slot. Returns the chain as result and the loaded frame pointers in 3452 /// LROpOut/FPOpout. Used when tail calling. 3453 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 3454 int SPDiff, 3455 SDValue Chain, 3456 SDValue &LROpOut, 3457 SDValue &FPOpOut, 3458 bool isDarwinABI, 3459 SDLoc dl) const { 3460 if (SPDiff) { 3461 // Load the LR and FP stack slot for later adjusting. 3462 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 3463 LROpOut = getReturnAddrFrameIndex(DAG); 3464 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 3465 false, false, false, 0); 3466 Chain = SDValue(LROpOut.getNode(), 1); 3467 3468 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 3469 // slot as the FP is never overwritten. 3470 if (isDarwinABI) { 3471 FPOpOut = getFramePointerFrameIndex(DAG); 3472 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 3473 false, false, false, 0); 3474 Chain = SDValue(FPOpOut.getNode(), 1); 3475 } 3476 } 3477 return Chain; 3478 } 3479 3480 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 3481 /// by "Src" to address "Dst" of size "Size". Alignment information is 3482 /// specified by the specific parameter attribute. The copy will be passed as 3483 /// a byval function parameter. 3484 /// Sometimes what we are copying is the end of a larger object, the part that 3485 /// does not fit in registers. 3486 static SDValue 3487 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 3488 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 3489 SDLoc dl) { 3490 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 3491 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 3492 false, false, MachinePointerInfo(), 3493 MachinePointerInfo()); 3494 } 3495 3496 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 3497 /// tail calls. 3498 static void 3499 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 3500 SDValue Arg, SDValue PtrOff, int SPDiff, 3501 unsigned ArgOffset, bool isPPC64, bool isTailCall, 3502 bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 3503 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, 3504 SDLoc dl) { 3505 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3506 if (!isTailCall) { 3507 if (isVector) { 3508 SDValue StackPtr; 3509 if (isPPC64) 3510 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3511 else 3512 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3513 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3514 DAG.getConstant(ArgOffset, PtrVT)); 3515 } 3516 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3517 MachinePointerInfo(), false, false, 0)); 3518 // Calculate and remember argument location. 3519 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 3520 TailCallArguments); 3521 } 3522 3523 static 3524 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 3525 SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 3526 SDValue LROp, SDValue FPOp, bool isDarwinABI, 3527 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 3528 MachineFunction &MF = DAG.getMachineFunction(); 3529 3530 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 3531 // might overwrite each other in case of tail call optimization. 3532 SmallVector<SDValue, 8> MemOpChains2; 3533 // Do not flag preceding copytoreg stuff together with the following stuff. 3534 InFlag = SDValue(); 3535 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 3536 MemOpChains2, dl); 3537 if (!MemOpChains2.empty()) 3538 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 3539 3540 // Store the return address to the appropriate stack slot. 3541 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 3542 isPPC64, isDarwinABI, dl); 3543 3544 // Emit callseq_end just before tailcall node. 3545 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3546 DAG.getIntPtrConstant(0, true), InFlag, dl); 3547 InFlag = Chain.getValue(1); 3548 } 3549 3550 static 3551 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 3552 SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, 3553 SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, 3554 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 3555 const PPCSubtarget &Subtarget) { 3556 3557 bool isPPC64 = Subtarget.isPPC64(); 3558 bool isSVR4ABI = Subtarget.isSVR4ABI(); 3559 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3560 3561 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3562 NodeTys.push_back(MVT::Other); // Returns a chain 3563 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 3564 3565 unsigned CallOpc = PPCISD::CALL; 3566 3567 bool needIndirectCall = true; 3568 if (!isSVR4ABI || !isPPC64) 3569 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 3570 // If this is an absolute destination address, use the munged value. 3571 Callee = SDValue(Dest, 0); 3572 needIndirectCall = false; 3573 } 3574 3575 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3576 // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 3577 // Use indirect calls for ALL functions calls in JIT mode, since the 3578 // far-call stubs may be outside relocation limits for a BL instruction. 3579 if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { 3580 unsigned OpFlags = 0; 3581 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 3582 (Subtarget.getTargetTriple().isMacOSX() && 3583 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 3584 (G->getGlobal()->isDeclaration() || 3585 G->getGlobal()->isWeakForLinker())) || 3586 (Subtarget.isTargetELF() && !isPPC64 && 3587 !G->getGlobal()->hasLocalLinkage() && 3588 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3589 // PC-relative references to external symbols should go through $stub, 3590 // unless we're building with the leopard linker or later, which 3591 // automatically synthesizes these stubs. 3592 OpFlags = PPCII::MO_PLT_OR_STUB; 3593 } 3594 3595 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 3596 // every direct call is) turn it into a TargetGlobalAddress / 3597 // TargetExternalSymbol node so that legalize doesn't hack it. 3598 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 3599 Callee.getValueType(), 3600 0, OpFlags); 3601 needIndirectCall = false; 3602 } 3603 } 3604 3605 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3606 unsigned char OpFlags = 0; 3607 3608 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 3609 (Subtarget.getTargetTriple().isMacOSX() && 3610 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) || 3611 (Subtarget.isTargetELF() && !isPPC64 && 3612 DAG.getTarget().getRelocationModel() == Reloc::PIC_) ) { 3613 // PC-relative references to external symbols should go through $stub, 3614 // unless we're building with the leopard linker or later, which 3615 // automatically synthesizes these stubs. 3616 OpFlags = PPCII::MO_PLT_OR_STUB; 3617 } 3618 3619 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 3620 OpFlags); 3621 needIndirectCall = false; 3622 } 3623 3624 if (needIndirectCall) { 3625 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 3626 // to do the call, we can't use PPCISD::CALL. 3627 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 3628 3629 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 3630 // Function pointers in the 64-bit SVR4 ABI do not point to the function 3631 // entry point, but to the function descriptor (the function entry point 3632 // address is part of the function descriptor though). 3633 // The function descriptor is a three doubleword structure with the 3634 // following fields: function entry point, TOC base address and 3635 // environment pointer. 3636 // Thus for a call through a function pointer, the following actions need 3637 // to be performed: 3638 // 1. Save the TOC of the caller in the TOC save area of its stack 3639 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 3640 // 2. Load the address of the function entry point from the function 3641 // descriptor. 3642 // 3. Load the TOC of the callee from the function descriptor into r2. 3643 // 4. Load the environment pointer from the function descriptor into 3644 // r11. 3645 // 5. Branch to the function entry point address. 3646 // 6. On return of the callee, the TOC of the caller needs to be 3647 // restored (this is done in FinishCall()). 3648 // 3649 // All those operations are flagged together to ensure that no other 3650 // operations can be scheduled in between. E.g. without flagging the 3651 // operations together, a TOC access in the caller could be scheduled 3652 // between the load of the callee TOC and the branch to the callee, which 3653 // results in the TOC access going through the TOC of the callee instead 3654 // of going through the TOC of the caller, which leads to incorrect code. 3655 3656 // Load the address of the function entry point from the function 3657 // descriptor. 3658 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); 3659 SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, 3660 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 3661 Chain = LoadFuncPtr.getValue(1); 3662 InFlag = LoadFuncPtr.getValue(2); 3663 3664 // Load environment pointer into r11. 3665 // Offset of the environment pointer within the function descriptor. 3666 SDValue PtrOff = DAG.getIntPtrConstant(16); 3667 3668 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 3669 SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, 3670 InFlag); 3671 Chain = LoadEnvPtr.getValue(1); 3672 InFlag = LoadEnvPtr.getValue(2); 3673 3674 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 3675 InFlag); 3676 Chain = EnvVal.getValue(0); 3677 InFlag = EnvVal.getValue(1); 3678 3679 // Load TOC of the callee into r2. We are using a target-specific load 3680 // with r2 hard coded, because the result of a target-independent load 3681 // would never go directly into r2, since r2 is a reserved register (which 3682 // prevents the register allocator from allocating it), resulting in an 3683 // additional register being allocated and an unnecessary move instruction 3684 // being generated. 3685 VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3686 SDValue TOCOff = DAG.getIntPtrConstant(8); 3687 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 3688 SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, 3689 AddTOC, InFlag); 3690 Chain = LoadTOCPtr.getValue(0); 3691 InFlag = LoadTOCPtr.getValue(1); 3692 3693 MTCTROps[0] = Chain; 3694 MTCTROps[1] = LoadFuncPtr; 3695 MTCTROps[2] = InFlag; 3696 } 3697 3698 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 3699 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 3700 InFlag = Chain.getValue(1); 3701 3702 NodeTys.clear(); 3703 NodeTys.push_back(MVT::Other); 3704 NodeTys.push_back(MVT::Glue); 3705 Ops.push_back(Chain); 3706 CallOpc = PPCISD::BCTRL; 3707 Callee.setNode(nullptr); 3708 // Add use of X11 (holding environment pointer) 3709 if (isSVR4ABI && isPPC64 && !isELFv2ABI) 3710 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 3711 // Add CTR register as callee so a bctr can be emitted later. 3712 if (isTailCall) 3713 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 3714 } 3715 3716 // If this is a direct call, pass the chain and the callee. 3717 if (Callee.getNode()) { 3718 Ops.push_back(Chain); 3719 Ops.push_back(Callee); 3720 3721 // If this is a call to __tls_get_addr, find the symbol whose address 3722 // is to be taken and add it to the list. This will be used to 3723 // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld). 3724 // We find the symbol by walking the chain to the CopyFromReg, walking 3725 // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and 3726 // pulling the symbol from that node. 3727 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 3728 if (!strcmp(S->getSymbol(), "__tls_get_addr")) { 3729 assert(!needIndirectCall && "Indirect call to __tls_get_addr???"); 3730 SDNode *AddI = Chain.getNode()->getOperand(2).getNode(); 3731 SDValue TGTAddr = AddI->getOperand(1); 3732 assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress && 3733 "Didn't find target global TLS address where we expected one"); 3734 Ops.push_back(TGTAddr); 3735 CallOpc = PPCISD::CALL_TLS; 3736 } 3737 } 3738 // If this is a tail call add stack pointer delta. 3739 if (isTailCall) 3740 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 3741 3742 // Add argument registers to the end of the list so that they are known live 3743 // into the call. 3744 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 3745 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 3746 RegsToPass[i].second.getValueType())); 3747 3748 // Direct calls in the ELFv2 ABI need the TOC register live into the call. 3749 if (Callee.getNode() && isELFv2ABI) 3750 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); 3751 3752 return CallOpc; 3753 } 3754 3755 static 3756 bool isLocalCall(const SDValue &Callee) 3757 { 3758 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3759 return !G->getGlobal()->isDeclaration() && 3760 !G->getGlobal()->isWeakForLinker(); 3761 return false; 3762 } 3763 3764 SDValue 3765 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 3766 CallingConv::ID CallConv, bool isVarArg, 3767 const SmallVectorImpl<ISD::InputArg> &Ins, 3768 SDLoc dl, SelectionDAG &DAG, 3769 SmallVectorImpl<SDValue> &InVals) const { 3770 3771 SmallVector<CCValAssign, 16> RVLocs; 3772 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3773 getTargetMachine(), RVLocs, *DAG.getContext()); 3774 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 3775 3776 // Copy all of the result registers out of their specified physreg. 3777 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 3778 CCValAssign &VA = RVLocs[i]; 3779 assert(VA.isRegLoc() && "Can only return in registers!"); 3780 3781 SDValue Val = DAG.getCopyFromReg(Chain, dl, 3782 VA.getLocReg(), VA.getLocVT(), InFlag); 3783 Chain = Val.getValue(1); 3784 InFlag = Val.getValue(2); 3785 3786 switch (VA.getLocInfo()) { 3787 default: llvm_unreachable("Unknown loc info!"); 3788 case CCValAssign::Full: break; 3789 case CCValAssign::AExt: 3790 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3791 break; 3792 case CCValAssign::ZExt: 3793 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 3794 DAG.getValueType(VA.getValVT())); 3795 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3796 break; 3797 case CCValAssign::SExt: 3798 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 3799 DAG.getValueType(VA.getValVT())); 3800 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3801 break; 3802 } 3803 3804 InVals.push_back(Val); 3805 } 3806 3807 return Chain; 3808 } 3809 3810 SDValue 3811 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, 3812 bool isTailCall, bool isVarArg, 3813 SelectionDAG &DAG, 3814 SmallVector<std::pair<unsigned, SDValue>, 8> 3815 &RegsToPass, 3816 SDValue InFlag, SDValue Chain, 3817 SDValue &Callee, 3818 int SPDiff, unsigned NumBytes, 3819 const SmallVectorImpl<ISD::InputArg> &Ins, 3820 SmallVectorImpl<SDValue> &InVals) const { 3821 3822 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3823 std::vector<EVT> NodeTys; 3824 SmallVector<SDValue, 8> Ops; 3825 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, 3826 isTailCall, RegsToPass, Ops, NodeTys, 3827 Subtarget); 3828 3829 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 3830 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 3831 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 3832 3833 // When performing tail call optimization the callee pops its arguments off 3834 // the stack. Account for this here so these bytes can be pushed back on in 3835 // PPCFrameLowering::eliminateCallFramePseudoInstr. 3836 int BytesCalleePops = 3837 (CallConv == CallingConv::Fast && 3838 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 3839 3840 // Add a register mask operand representing the call-preserved registers. 3841 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 3842 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 3843 assert(Mask && "Missing call preserved mask for calling convention"); 3844 Ops.push_back(DAG.getRegisterMask(Mask)); 3845 3846 if (InFlag.getNode()) 3847 Ops.push_back(InFlag); 3848 3849 // Emit tail call. 3850 if (isTailCall) { 3851 assert(((Callee.getOpcode() == ISD::Register && 3852 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 3853 Callee.getOpcode() == ISD::TargetExternalSymbol || 3854 Callee.getOpcode() == ISD::TargetGlobalAddress || 3855 isa<ConstantSDNode>(Callee)) && 3856 "Expecting an global address, external symbol, absolute value or register"); 3857 3858 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 3859 } 3860 3861 // Add a NOP immediately after the branch instruction when using the 64-bit 3862 // SVR4 ABI. At link time, if caller and callee are in a different module and 3863 // thus have a different TOC, the call will be replaced with a call to a stub 3864 // function which saves the current TOC, loads the TOC of the callee and 3865 // branches to the callee. The NOP will be replaced with a load instruction 3866 // which restores the TOC of the caller from the TOC save slot of the current 3867 // stack frame. If caller and callee belong to the same module (and have the 3868 // same TOC), the NOP will remain unchanged. 3869 3870 bool needsTOCRestore = false; 3871 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) { 3872 if (CallOpc == PPCISD::BCTRL) { 3873 // This is a call through a function pointer. 3874 // Restore the caller TOC from the save area into R2. 3875 // See PrepareCall() for more information about calls through function 3876 // pointers in the 64-bit SVR4 ABI. 3877 // We are using a target-specific load with r2 hard coded, because the 3878 // result of a target-independent load would never go directly into r2, 3879 // since r2 is a reserved register (which prevents the register allocator 3880 // from allocating it), resulting in an additional register being 3881 // allocated and an unnecessary move instruction being generated. 3882 needsTOCRestore = true; 3883 } else if ((CallOpc == PPCISD::CALL) && 3884 (!isLocalCall(Callee) || 3885 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3886 // Otherwise insert NOP for non-local calls. 3887 CallOpc = PPCISD::CALL_NOP; 3888 } else if (CallOpc == PPCISD::CALL_TLS) 3889 // For 64-bit SVR4, TLS calls are always non-local. 3890 CallOpc = PPCISD::CALL_NOP_TLS; 3891 } 3892 3893 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 3894 InFlag = Chain.getValue(1); 3895 3896 if (needsTOCRestore) { 3897 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3898 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3899 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 3900 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); 3901 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset); 3902 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 3903 Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag); 3904 InFlag = Chain.getValue(1); 3905 } 3906 3907 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3908 DAG.getIntPtrConstant(BytesCalleePops, true), 3909 InFlag, dl); 3910 if (!Ins.empty()) 3911 InFlag = Chain.getValue(1); 3912 3913 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 3914 Ins, dl, DAG, InVals); 3915 } 3916 3917 SDValue 3918 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 3919 SmallVectorImpl<SDValue> &InVals) const { 3920 SelectionDAG &DAG = CLI.DAG; 3921 SDLoc &dl = CLI.DL; 3922 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 3923 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 3924 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 3925 SDValue Chain = CLI.Chain; 3926 SDValue Callee = CLI.Callee; 3927 bool &isTailCall = CLI.IsTailCall; 3928 CallingConv::ID CallConv = CLI.CallConv; 3929 bool isVarArg = CLI.IsVarArg; 3930 3931 if (isTailCall) 3932 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 3933 Ins, DAG); 3934 3935 if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) 3936 report_fatal_error("failed to perform tail call elimination on a call " 3937 "site marked musttail"); 3938 3939 if (Subtarget.isSVR4ABI()) { 3940 if (Subtarget.isPPC64()) 3941 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 3942 isTailCall, Outs, OutVals, Ins, 3943 dl, DAG, InVals); 3944 else 3945 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 3946 isTailCall, Outs, OutVals, Ins, 3947 dl, DAG, InVals); 3948 } 3949 3950 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 3951 isTailCall, Outs, OutVals, Ins, 3952 dl, DAG, InVals); 3953 } 3954 3955 SDValue 3956 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, 3957 CallingConv::ID CallConv, bool isVarArg, 3958 bool isTailCall, 3959 const SmallVectorImpl<ISD::OutputArg> &Outs, 3960 const SmallVectorImpl<SDValue> &OutVals, 3961 const SmallVectorImpl<ISD::InputArg> &Ins, 3962 SDLoc dl, SelectionDAG &DAG, 3963 SmallVectorImpl<SDValue> &InVals) const { 3964 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 3965 // of the 32-bit SVR4 ABI stack frame layout. 3966 3967 assert((CallConv == CallingConv::C || 3968 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 3969 3970 unsigned PtrByteSize = 4; 3971 3972 MachineFunction &MF = DAG.getMachineFunction(); 3973 3974 // Mark this function as potentially containing a function that contains a 3975 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3976 // and restoring the callers stack pointer in this functions epilog. This is 3977 // done because by tail calling the called function might overwrite the value 3978 // in this function's (MF) stack pointer stack slot 0(SP). 3979 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3980 CallConv == CallingConv::Fast) 3981 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3982 3983 // Count how many bytes are to be pushed on the stack, including the linkage 3984 // area, parameter list area and the part of the local variable space which 3985 // contains copies of aggregates which are passed by value. 3986 3987 // Assign locations to all of the outgoing arguments. 3988 SmallVector<CCValAssign, 16> ArgLocs; 3989 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3990 getTargetMachine(), ArgLocs, *DAG.getContext()); 3991 3992 // Reserve space for the linkage area on the stack. 3993 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false), 3994 PtrByteSize); 3995 3996 if (isVarArg) { 3997 // Handle fixed and variable vector arguments differently. 3998 // Fixed vector arguments go into registers as long as registers are 3999 // available. Variable vector arguments always go into memory. 4000 unsigned NumArgs = Outs.size(); 4001 4002 for (unsigned i = 0; i != NumArgs; ++i) { 4003 MVT ArgVT = Outs[i].VT; 4004 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4005 bool Result; 4006 4007 if (Outs[i].IsFixed) { 4008 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 4009 CCInfo); 4010 } else { 4011 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 4012 ArgFlags, CCInfo); 4013 } 4014 4015 if (Result) { 4016 #ifndef NDEBUG 4017 errs() << "Call operand #" << i << " has unhandled type " 4018 << EVT(ArgVT).getEVTString() << "\n"; 4019 #endif 4020 llvm_unreachable(nullptr); 4021 } 4022 } 4023 } else { 4024 // All arguments are treated the same. 4025 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 4026 } 4027 4028 // Assign locations to all of the outgoing aggregate by value arguments. 4029 SmallVector<CCValAssign, 16> ByValArgLocs; 4030 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4031 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 4032 4033 // Reserve stack space for the allocations in CCInfo. 4034 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 4035 4036 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 4037 4038 // Size of the linkage area, parameter list area and the part of the local 4039 // space variable where copies of aggregates which are passed by value are 4040 // stored. 4041 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 4042 4043 // Calculate by how many bytes the stack has to be adjusted in case of tail 4044 // call optimization. 4045 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4046 4047 // Adjust the stack pointer for the new arguments... 4048 // These operations are automatically eliminated by the prolog/epilog pass 4049 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4050 dl); 4051 SDValue CallSeqStart = Chain; 4052 4053 // Load the return address and frame pointer so it can be moved somewhere else 4054 // later. 4055 SDValue LROp, FPOp; 4056 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 4057 dl); 4058 4059 // Set up a copy of the stack pointer for use loading and storing any 4060 // arguments that may not fit in the registers available for argument 4061 // passing. 4062 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4063 4064 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4065 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4066 SmallVector<SDValue, 8> MemOpChains; 4067 4068 bool seenFloatArg = false; 4069 // Walk the register/memloc assignments, inserting copies/loads. 4070 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 4071 i != e; 4072 ++i) { 4073 CCValAssign &VA = ArgLocs[i]; 4074 SDValue Arg = OutVals[i]; 4075 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4076 4077 if (Flags.isByVal()) { 4078 // Argument is an aggregate which is passed by value, thus we need to 4079 // create a copy of it in the local variable space of the current stack 4080 // frame (which is the stack frame of the caller) and pass the address of 4081 // this copy to the callee. 4082 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 4083 CCValAssign &ByValVA = ByValArgLocs[j++]; 4084 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 4085 4086 // Memory reserved in the local variable space of the callers stack frame. 4087 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 4088 4089 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 4090 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 4091 4092 // Create a copy of the argument in the local area of the current 4093 // stack frame. 4094 SDValue MemcpyCall = 4095 CreateCopyOfByValArgument(Arg, PtrOff, 4096 CallSeqStart.getNode()->getOperand(0), 4097 Flags, DAG, dl); 4098 4099 // This must go outside the CALLSEQ_START..END. 4100 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4101 CallSeqStart.getNode()->getOperand(1), 4102 SDLoc(MemcpyCall)); 4103 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4104 NewCallSeqStart.getNode()); 4105 Chain = CallSeqStart = NewCallSeqStart; 4106 4107 // Pass the address of the aggregate copy on the stack either in a 4108 // physical register or in the parameter list area of the current stack 4109 // frame to the callee. 4110 Arg = PtrOff; 4111 } 4112 4113 if (VA.isRegLoc()) { 4114 if (Arg.getValueType() == MVT::i1) 4115 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); 4116 4117 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 4118 // Put argument in a physical register. 4119 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 4120 } else { 4121 // Put argument in the parameter list area of the current stack frame. 4122 assert(VA.isMemLoc()); 4123 unsigned LocMemOffset = VA.getLocMemOffset(); 4124 4125 if (!isTailCall) { 4126 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 4127 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 4128 4129 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 4130 MachinePointerInfo(), 4131 false, false, 0)); 4132 } else { 4133 // Calculate and remember argument location. 4134 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 4135 TailCallArguments); 4136 } 4137 } 4138 } 4139 4140 if (!MemOpChains.empty()) 4141 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4142 4143 // Build a sequence of copy-to-reg nodes chained together with token chain 4144 // and flag operands which copy the outgoing args into the appropriate regs. 4145 SDValue InFlag; 4146 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4147 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4148 RegsToPass[i].second, InFlag); 4149 InFlag = Chain.getValue(1); 4150 } 4151 4152 // Set CR bit 6 to true if this is a vararg call with floating args passed in 4153 // registers. 4154 if (isVarArg) { 4155 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 4156 SDValue Ops[] = { Chain, InFlag }; 4157 4158 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 4159 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 4160 4161 InFlag = Chain.getValue(1); 4162 } 4163 4164 if (isTailCall) 4165 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 4166 false, TailCallArguments); 4167 4168 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 4169 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4170 Ins, InVals); 4171 } 4172 4173 // Copy an argument into memory, being careful to do this outside the 4174 // call sequence for the call to which the argument belongs. 4175 SDValue 4176 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 4177 SDValue CallSeqStart, 4178 ISD::ArgFlagsTy Flags, 4179 SelectionDAG &DAG, 4180 SDLoc dl) const { 4181 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 4182 CallSeqStart.getNode()->getOperand(0), 4183 Flags, DAG, dl); 4184 // The MEMCPY must go outside the CALLSEQ_START..END. 4185 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4186 CallSeqStart.getNode()->getOperand(1), 4187 SDLoc(MemcpyCall)); 4188 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4189 NewCallSeqStart.getNode()); 4190 return NewCallSeqStart; 4191 } 4192 4193 SDValue 4194 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, 4195 CallingConv::ID CallConv, bool isVarArg, 4196 bool isTailCall, 4197 const SmallVectorImpl<ISD::OutputArg> &Outs, 4198 const SmallVectorImpl<SDValue> &OutVals, 4199 const SmallVectorImpl<ISD::InputArg> &Ins, 4200 SDLoc dl, SelectionDAG &DAG, 4201 SmallVectorImpl<SDValue> &InVals) const { 4202 4203 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4204 bool isLittleEndian = Subtarget.isLittleEndian(); 4205 unsigned NumOps = Outs.size(); 4206 4207 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4208 unsigned PtrByteSize = 8; 4209 4210 MachineFunction &MF = DAG.getMachineFunction(); 4211 4212 // Mark this function as potentially containing a function that contains a 4213 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4214 // and restoring the callers stack pointer in this functions epilog. This is 4215 // done because by tail calling the called function might overwrite the value 4216 // in this function's (MF) stack pointer stack slot 0(SP). 4217 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4218 CallConv == CallingConv::Fast) 4219 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4220 4221 // Count how many bytes are to be pushed on the stack, including the linkage 4222 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 4223 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 4224 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 4225 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, 4226 isELFv2ABI); 4227 unsigned NumBytes = LinkageSize; 4228 4229 // Add up all the space actually used. 4230 for (unsigned i = 0; i != NumOps; ++i) { 4231 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4232 EVT ArgVT = Outs[i].VT; 4233 EVT OrigVT = Outs[i].ArgVT; 4234 4235 /* Respect alignment of argument on the stack. */ 4236 unsigned Align = 4237 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 4238 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 4239 4240 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 4241 if (Flags.isInConsecutiveRegsLast()) 4242 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4243 } 4244 4245 unsigned NumBytesActuallyUsed = NumBytes; 4246 4247 // The prolog code of the callee may store up to 8 GPR argument registers to 4248 // the stack, allowing va_start to index over them in memory if its varargs. 4249 // Because we cannot tell if this is needed on the caller side, we have to 4250 // conservatively assume that it is needed. As such, make sure we have at 4251 // least enough stack space for the caller to store the 8 GPRs. 4252 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. 4253 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 4254 4255 // Tail call needs the stack to be aligned. 4256 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4257 CallConv == CallingConv::Fast) 4258 NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); 4259 4260 // Calculate by how many bytes the stack has to be adjusted in case of tail 4261 // call optimization. 4262 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4263 4264 // To protect arguments on the stack from being clobbered in a tail call, 4265 // force all the loads to happen before doing any other lowering. 4266 if (isTailCall) 4267 Chain = DAG.getStackArgumentTokenFactor(Chain); 4268 4269 // Adjust the stack pointer for the new arguments... 4270 // These operations are automatically eliminated by the prolog/epilog pass 4271 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4272 dl); 4273 SDValue CallSeqStart = Chain; 4274 4275 // Load the return address and frame pointer so it can be move somewhere else 4276 // later. 4277 SDValue LROp, FPOp; 4278 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4279 dl); 4280 4281 // Set up a copy of the stack pointer for use loading and storing any 4282 // arguments that may not fit in the registers available for argument 4283 // passing. 4284 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4285 4286 // Figure out which arguments are going to go in registers, and which in 4287 // memory. Also, if this is a vararg function, floating point operations 4288 // must be stored to our stack, and loaded into integer regs as well, if 4289 // any integer regs are available for argument passing. 4290 unsigned ArgOffset = LinkageSize; 4291 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; 4292 4293 static const MCPhysReg GPR[] = { 4294 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4295 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4296 }; 4297 static const MCPhysReg *FPR = GetFPR(); 4298 4299 static const MCPhysReg VR[] = { 4300 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4301 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4302 }; 4303 static const MCPhysReg VSRH[] = { 4304 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 4305 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 4306 }; 4307 4308 const unsigned NumGPRs = array_lengthof(GPR); 4309 const unsigned NumFPRs = 13; 4310 const unsigned NumVRs = array_lengthof(VR); 4311 4312 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4313 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4314 4315 SmallVector<SDValue, 8> MemOpChains; 4316 for (unsigned i = 0; i != NumOps; ++i) { 4317 SDValue Arg = OutVals[i]; 4318 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4319 EVT ArgVT = Outs[i].VT; 4320 EVT OrigVT = Outs[i].ArgVT; 4321 4322 /* Respect alignment of argument on the stack. */ 4323 unsigned Align = 4324 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 4325 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 4326 4327 /* Compute GPR index associated with argument offset. */ 4328 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 4329 GPR_idx = std::min(GPR_idx, NumGPRs); 4330 4331 // PtrOff will be used to store the current argument to the stack if a 4332 // register cannot be found for it. 4333 SDValue PtrOff; 4334 4335 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4336 4337 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4338 4339 // Promote integers to 64-bit values. 4340 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 4341 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4342 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4343 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4344 } 4345 4346 // FIXME memcpy is used way more than necessary. Correctness first. 4347 // Note: "by value" is code for passing a structure by value, not 4348 // basic types. 4349 if (Flags.isByVal()) { 4350 // Note: Size includes alignment padding, so 4351 // struct x { short a; char b; } 4352 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 4353 // These are the proper values we need for right-justifying the 4354 // aggregate in a parameter register. 4355 unsigned Size = Flags.getByValSize(); 4356 4357 // An empty aggregate parameter takes up no storage and no 4358 // registers. 4359 if (Size == 0) 4360 continue; 4361 4362 // All aggregates smaller than 8 bytes must be passed right-justified. 4363 if (Size==1 || Size==2 || Size==4) { 4364 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 4365 if (GPR_idx != NumGPRs) { 4366 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4367 MachinePointerInfo(), VT, 4368 false, false, 0); 4369 MemOpChains.push_back(Load.getValue(1)); 4370 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); 4371 4372 ArgOffset += PtrByteSize; 4373 continue; 4374 } 4375 } 4376 4377 if (GPR_idx == NumGPRs && Size < 8) { 4378 SDValue AddPtr = PtrOff; 4379 if (!isLittleEndian) { 4380 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4381 PtrOff.getValueType()); 4382 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4383 } 4384 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4385 CallSeqStart, 4386 Flags, DAG, dl); 4387 ArgOffset += PtrByteSize; 4388 continue; 4389 } 4390 // Copy entire object into memory. There are cases where gcc-generated 4391 // code assumes it is there, even if it could be put entirely into 4392 // registers. (This is not what the doc says.) 4393 4394 // FIXME: The above statement is likely due to a misunderstanding of the 4395 // documents. All arguments must be copied into the parameter area BY 4396 // THE CALLEE in the event that the callee takes the address of any 4397 // formal argument. That has not yet been implemented. However, it is 4398 // reasonable to use the stack area as a staging area for the register 4399 // load. 4400 4401 // Skip this for small aggregates, as we will use the same slot for a 4402 // right-justified copy, below. 4403 if (Size >= 8) 4404 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4405 CallSeqStart, 4406 Flags, DAG, dl); 4407 4408 // When a register is available, pass a small aggregate right-justified. 4409 if (Size < 8 && GPR_idx != NumGPRs) { 4410 // The easiest way to get this right-justified in a register 4411 // is to copy the structure into the rightmost portion of a 4412 // local variable slot, then load the whole slot into the 4413 // register. 4414 // FIXME: The memcpy seems to produce pretty awful code for 4415 // small aggregates, particularly for packed ones. 4416 // FIXME: It would be preferable to use the slot in the 4417 // parameter save area instead of a new local variable. 4418 SDValue AddPtr = PtrOff; 4419 if (!isLittleEndian) { 4420 SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); 4421 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4422 } 4423 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4424 CallSeqStart, 4425 Flags, DAG, dl); 4426 4427 // Load the slot into the register. 4428 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, 4429 MachinePointerInfo(), 4430 false, false, false, 0); 4431 MemOpChains.push_back(Load.getValue(1)); 4432 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); 4433 4434 // Done with this argument. 4435 ArgOffset += PtrByteSize; 4436 continue; 4437 } 4438 4439 // For aggregates larger than PtrByteSize, copy the pieces of the 4440 // object that fit into registers from the parameter save area. 4441 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4442 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4443 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4444 if (GPR_idx != NumGPRs) { 4445 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4446 MachinePointerInfo(), 4447 false, false, false, 0); 4448 MemOpChains.push_back(Load.getValue(1)); 4449 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4450 ArgOffset += PtrByteSize; 4451 } else { 4452 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4453 break; 4454 } 4455 } 4456 continue; 4457 } 4458 4459 switch (Arg.getSimpleValueType().SimpleTy) { 4460 default: llvm_unreachable("Unexpected ValueType for argument!"); 4461 case MVT::i1: 4462 case MVT::i32: 4463 case MVT::i64: 4464 // These can be scalar arguments or elements of an integer array type 4465 // passed directly. Clang may use those instead of "byval" aggregate 4466 // types to avoid forcing arguments to memory unnecessarily. 4467 if (GPR_idx != NumGPRs) { 4468 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg)); 4469 } else { 4470 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4471 true, isTailCall, false, MemOpChains, 4472 TailCallArguments, dl); 4473 } 4474 ArgOffset += PtrByteSize; 4475 break; 4476 case MVT::f32: 4477 case MVT::f64: { 4478 // These can be scalar arguments or elements of a float array type 4479 // passed directly. The latter are used to implement ELFv2 homogenous 4480 // float aggregates. 4481 4482 // Named arguments go into FPRs first, and once they overflow, the 4483 // remaining arguments go into GPRs and then the parameter save area. 4484 // Unnamed arguments for vararg functions always go to GPRs and 4485 // then the parameter save area. For now, put all arguments to vararg 4486 // routines always in both locations (FPR *and* GPR or stack slot). 4487 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 4488 4489 // First load the argument into the next available FPR. 4490 if (FPR_idx != NumFPRs) 4491 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4492 4493 // Next, load the argument into GPR or stack slot if needed. 4494 if (!NeedGPROrStack) 4495 ; 4496 else if (GPR_idx != NumGPRs) { 4497 // In the non-vararg case, this can only ever happen in the 4498 // presence of f32 array types, since otherwise we never run 4499 // out of FPRs before running out of GPRs. 4500 SDValue ArgVal; 4501 4502 // Double values are always passed in a single GPR. 4503 if (Arg.getValueType() != MVT::f32) { 4504 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 4505 4506 // Non-array float values are extended and passed in a GPR. 4507 } else if (!Flags.isInConsecutiveRegs()) { 4508 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4509 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 4510 4511 // If we have an array of floats, we collect every odd element 4512 // together with its predecessor into one GPR. 4513 } else if (ArgOffset % PtrByteSize != 0) { 4514 SDValue Lo, Hi; 4515 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 4516 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4517 if (!isLittleEndian) 4518 std::swap(Lo, Hi); 4519 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 4520 4521 // The final element, if even, goes into the first half of a GPR. 4522 } else if (Flags.isInConsecutiveRegsLast()) { 4523 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4524 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 4525 if (!isLittleEndian) 4526 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 4527 DAG.getConstant(32, MVT::i32)); 4528 4529 // Non-final even elements are skipped; they will be handled 4530 // together the with subsequent argument on the next go-around. 4531 } else 4532 ArgVal = SDValue(); 4533 4534 if (ArgVal.getNode()) 4535 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal)); 4536 } else { 4537 // Single-precision floating-point values are mapped to the 4538 // second (rightmost) word of the stack doubleword. 4539 if (Arg.getValueType() == MVT::f32 && 4540 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 4541 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4542 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4543 } 4544 4545 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4546 true, isTailCall, false, MemOpChains, 4547 TailCallArguments, dl); 4548 } 4549 // When passing an array of floats, the array occupies consecutive 4550 // space in the argument area; only round up to the next doubleword 4551 // at the end of the array. Otherwise, each float takes 8 bytes. 4552 ArgOffset += (Arg.getValueType() == MVT::f32 && 4553 Flags.isInConsecutiveRegs()) ? 4 : 8; 4554 if (Flags.isInConsecutiveRegsLast()) 4555 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4556 break; 4557 } 4558 case MVT::v4f32: 4559 case MVT::v4i32: 4560 case MVT::v8i16: 4561 case MVT::v16i8: 4562 case MVT::v2f64: 4563 case MVT::v2i64: 4564 // These can be scalar arguments or elements of a vector array type 4565 // passed directly. The latter are used to implement ELFv2 homogenous 4566 // vector aggregates. 4567 4568 // For a varargs call, named arguments go into VRs or on the stack as 4569 // usual; unnamed arguments always go to the stack or the corresponding 4570 // GPRs when within range. For now, we always put the value in both 4571 // locations (or even all three). 4572 if (isVarArg) { 4573 // We could elide this store in the case where the object fits 4574 // entirely in R registers. Maybe later. 4575 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4576 MachinePointerInfo(), false, false, 0); 4577 MemOpChains.push_back(Store); 4578 if (VR_idx != NumVRs) { 4579 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4580 MachinePointerInfo(), 4581 false, false, false, 0); 4582 MemOpChains.push_back(Load.getValue(1)); 4583 4584 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 4585 Arg.getSimpleValueType() == MVT::v2i64) ? 4586 VSRH[VR_idx] : VR[VR_idx]; 4587 ++VR_idx; 4588 4589 RegsToPass.push_back(std::make_pair(VReg, Load)); 4590 } 4591 ArgOffset += 16; 4592 for (unsigned i=0; i<16; i+=PtrByteSize) { 4593 if (GPR_idx == NumGPRs) 4594 break; 4595 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4596 DAG.getConstant(i, PtrVT)); 4597 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4598 false, false, false, 0); 4599 MemOpChains.push_back(Load.getValue(1)); 4600 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4601 } 4602 break; 4603 } 4604 4605 // Non-varargs Altivec params go into VRs or on the stack. 4606 if (VR_idx != NumVRs) { 4607 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 4608 Arg.getSimpleValueType() == MVT::v2i64) ? 4609 VSRH[VR_idx] : VR[VR_idx]; 4610 ++VR_idx; 4611 4612 RegsToPass.push_back(std::make_pair(VReg, Arg)); 4613 } else { 4614 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4615 true, isTailCall, true, MemOpChains, 4616 TailCallArguments, dl); 4617 } 4618 ArgOffset += 16; 4619 break; 4620 } 4621 } 4622 4623 assert(NumBytesActuallyUsed == ArgOffset); 4624 (void)NumBytesActuallyUsed; 4625 4626 if (!MemOpChains.empty()) 4627 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4628 4629 // Check if this is an indirect call (MTCTR/BCTRL). 4630 // See PrepareCall() for more information about calls through function 4631 // pointers in the 64-bit SVR4 ABI. 4632 if (!isTailCall && 4633 !dyn_cast<GlobalAddressSDNode>(Callee) && 4634 !dyn_cast<ExternalSymbolSDNode>(Callee)) { 4635 // Load r2 into a virtual register and store it to the TOC save area. 4636 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 4637 // TOC save area offset. 4638 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); 4639 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset); 4640 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4641 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), 4642 false, false, 0); 4643 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 4644 // This does not mean the MTCTR instruction must use R12; it's easier 4645 // to model this as an extra parameter, so do that. 4646 if (isELFv2ABI) 4647 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 4648 } 4649 4650 // Build a sequence of copy-to-reg nodes chained together with token chain 4651 // and flag operands which copy the outgoing args into the appropriate regs. 4652 SDValue InFlag; 4653 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4654 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4655 RegsToPass[i].second, InFlag); 4656 InFlag = Chain.getValue(1); 4657 } 4658 4659 if (isTailCall) 4660 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, 4661 FPOp, true, TailCallArguments); 4662 4663 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 4664 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4665 Ins, InVals); 4666 } 4667 4668 SDValue 4669 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 4670 CallingConv::ID CallConv, bool isVarArg, 4671 bool isTailCall, 4672 const SmallVectorImpl<ISD::OutputArg> &Outs, 4673 const SmallVectorImpl<SDValue> &OutVals, 4674 const SmallVectorImpl<ISD::InputArg> &Ins, 4675 SDLoc dl, SelectionDAG &DAG, 4676 SmallVectorImpl<SDValue> &InVals) const { 4677 4678 unsigned NumOps = Outs.size(); 4679 4680 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4681 bool isPPC64 = PtrVT == MVT::i64; 4682 unsigned PtrByteSize = isPPC64 ? 8 : 4; 4683 4684 MachineFunction &MF = DAG.getMachineFunction(); 4685 4686 // Mark this function as potentially containing a function that contains a 4687 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4688 // and restoring the callers stack pointer in this functions epilog. This is 4689 // done because by tail calling the called function might overwrite the value 4690 // in this function's (MF) stack pointer stack slot 0(SP). 4691 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4692 CallConv == CallingConv::Fast) 4693 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4694 4695 // Count how many bytes are to be pushed on the stack, including the linkage 4696 // area, and parameter passing area. We start with 24/48 bytes, which is 4697 // prereserved space for [SP][CR][LR][3 x unused]. 4698 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true, 4699 false); 4700 unsigned NumBytes = LinkageSize; 4701 4702 // Add up all the space actually used. 4703 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 4704 // they all go in registers, but we must reserve stack space for them for 4705 // possible use by the caller. In varargs or 64-bit calls, parameters are 4706 // assigned stack space in order, with padding so Altivec parameters are 4707 // 16-byte aligned. 4708 unsigned nAltivecParamsAtEnd = 0; 4709 for (unsigned i = 0; i != NumOps; ++i) { 4710 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4711 EVT ArgVT = Outs[i].VT; 4712 // Varargs Altivec parameters are padded to a 16 byte boundary. 4713 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 4714 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 4715 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 4716 if (!isVarArg && !isPPC64) { 4717 // Non-varargs Altivec parameters go after all the non-Altivec 4718 // parameters; handle those later so we know how much padding we need. 4719 nAltivecParamsAtEnd++; 4720 continue; 4721 } 4722 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 4723 NumBytes = ((NumBytes+15)/16)*16; 4724 } 4725 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 4726 } 4727 4728 // Allow for Altivec parameters at the end, if needed. 4729 if (nAltivecParamsAtEnd) { 4730 NumBytes = ((NumBytes+15)/16)*16; 4731 NumBytes += 16*nAltivecParamsAtEnd; 4732 } 4733 4734 // The prolog code of the callee may store up to 8 GPR argument registers to 4735 // the stack, allowing va_start to index over them in memory if its varargs. 4736 // Because we cannot tell if this is needed on the caller side, we have to 4737 // conservatively assume that it is needed. As such, make sure we have at 4738 // least enough stack space for the caller to store the 8 GPRs. 4739 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 4740 4741 // Tail call needs the stack to be aligned. 4742 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4743 CallConv == CallingConv::Fast) 4744 NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); 4745 4746 // Calculate by how many bytes the stack has to be adjusted in case of tail 4747 // call optimization. 4748 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4749 4750 // To protect arguments on the stack from being clobbered in a tail call, 4751 // force all the loads to happen before doing any other lowering. 4752 if (isTailCall) 4753 Chain = DAG.getStackArgumentTokenFactor(Chain); 4754 4755 // Adjust the stack pointer for the new arguments... 4756 // These operations are automatically eliminated by the prolog/epilog pass 4757 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4758 dl); 4759 SDValue CallSeqStart = Chain; 4760 4761 // Load the return address and frame pointer so it can be move somewhere else 4762 // later. 4763 SDValue LROp, FPOp; 4764 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4765 dl); 4766 4767 // Set up a copy of the stack pointer for use loading and storing any 4768 // arguments that may not fit in the registers available for argument 4769 // passing. 4770 SDValue StackPtr; 4771 if (isPPC64) 4772 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4773 else 4774 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4775 4776 // Figure out which arguments are going to go in registers, and which in 4777 // memory. Also, if this is a vararg function, floating point operations 4778 // must be stored to our stack, and loaded into integer regs as well, if 4779 // any integer regs are available for argument passing. 4780 unsigned ArgOffset = LinkageSize; 4781 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4782 4783 static const MCPhysReg GPR_32[] = { // 32-bit registers. 4784 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 4785 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 4786 }; 4787 static const MCPhysReg GPR_64[] = { // 64-bit registers. 4788 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4789 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4790 }; 4791 static const MCPhysReg *FPR = GetFPR(); 4792 4793 static const MCPhysReg VR[] = { 4794 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4795 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4796 }; 4797 const unsigned NumGPRs = array_lengthof(GPR_32); 4798 const unsigned NumFPRs = 13; 4799 const unsigned NumVRs = array_lengthof(VR); 4800 4801 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 4802 4803 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4804 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4805 4806 SmallVector<SDValue, 8> MemOpChains; 4807 for (unsigned i = 0; i != NumOps; ++i) { 4808 SDValue Arg = OutVals[i]; 4809 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4810 4811 // PtrOff will be used to store the current argument to the stack if a 4812 // register cannot be found for it. 4813 SDValue PtrOff; 4814 4815 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4816 4817 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4818 4819 // On PPC64, promote integers to 64-bit values. 4820 if (isPPC64 && Arg.getValueType() == MVT::i32) { 4821 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4822 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4823 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4824 } 4825 4826 // FIXME memcpy is used way more than necessary. Correctness first. 4827 // Note: "by value" is code for passing a structure by value, not 4828 // basic types. 4829 if (Flags.isByVal()) { 4830 unsigned Size = Flags.getByValSize(); 4831 // Very small objects are passed right-justified. Everything else is 4832 // passed left-justified. 4833 if (Size==1 || Size==2) { 4834 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 4835 if (GPR_idx != NumGPRs) { 4836 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4837 MachinePointerInfo(), VT, 4838 false, false, 0); 4839 MemOpChains.push_back(Load.getValue(1)); 4840 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4841 4842 ArgOffset += PtrByteSize; 4843 } else { 4844 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4845 PtrOff.getValueType()); 4846 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4847 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4848 CallSeqStart, 4849 Flags, DAG, dl); 4850 ArgOffset += PtrByteSize; 4851 } 4852 continue; 4853 } 4854 // Copy entire object into memory. There are cases where gcc-generated 4855 // code assumes it is there, even if it could be put entirely into 4856 // registers. (This is not what the doc says.) 4857 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4858 CallSeqStart, 4859 Flags, DAG, dl); 4860 4861 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 4862 // copy the pieces of the object that fit into registers from the 4863 // parameter save area. 4864 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4865 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4866 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4867 if (GPR_idx != NumGPRs) { 4868 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4869 MachinePointerInfo(), 4870 false, false, false, 0); 4871 MemOpChains.push_back(Load.getValue(1)); 4872 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4873 ArgOffset += PtrByteSize; 4874 } else { 4875 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4876 break; 4877 } 4878 } 4879 continue; 4880 } 4881 4882 switch (Arg.getSimpleValueType().SimpleTy) { 4883 default: llvm_unreachable("Unexpected ValueType for argument!"); 4884 case MVT::i1: 4885 case MVT::i32: 4886 case MVT::i64: 4887 if (GPR_idx != NumGPRs) { 4888 if (Arg.getValueType() == MVT::i1) 4889 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 4890 4891 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4892 } else { 4893 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4894 isPPC64, isTailCall, false, MemOpChains, 4895 TailCallArguments, dl); 4896 } 4897 ArgOffset += PtrByteSize; 4898 break; 4899 case MVT::f32: 4900 case MVT::f64: 4901 if (FPR_idx != NumFPRs) { 4902 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4903 4904 if (isVarArg) { 4905 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4906 MachinePointerInfo(), false, false, 0); 4907 MemOpChains.push_back(Store); 4908 4909 // Float varargs are always shadowed in available integer registers 4910 if (GPR_idx != NumGPRs) { 4911 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4912 MachinePointerInfo(), false, false, 4913 false, 0); 4914 MemOpChains.push_back(Load.getValue(1)); 4915 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4916 } 4917 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 4918 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4919 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4920 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4921 MachinePointerInfo(), 4922 false, false, false, 0); 4923 MemOpChains.push_back(Load.getValue(1)); 4924 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4925 } 4926 } else { 4927 // If we have any FPRs remaining, we may also have GPRs remaining. 4928 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 4929 // GPRs. 4930 if (GPR_idx != NumGPRs) 4931 ++GPR_idx; 4932 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 4933 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 4934 ++GPR_idx; 4935 } 4936 } else 4937 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4938 isPPC64, isTailCall, false, MemOpChains, 4939 TailCallArguments, dl); 4940 if (isPPC64) 4941 ArgOffset += 8; 4942 else 4943 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 4944 break; 4945 case MVT::v4f32: 4946 case MVT::v4i32: 4947 case MVT::v8i16: 4948 case MVT::v16i8: 4949 if (isVarArg) { 4950 // These go aligned on the stack, or in the corresponding R registers 4951 // when within range. The Darwin PPC ABI doc claims they also go in 4952 // V registers; in fact gcc does this only for arguments that are 4953 // prototyped, not for those that match the ... We do it for all 4954 // arguments, seems to work. 4955 while (ArgOffset % 16 !=0) { 4956 ArgOffset += PtrByteSize; 4957 if (GPR_idx != NumGPRs) 4958 GPR_idx++; 4959 } 4960 // We could elide this store in the case where the object fits 4961 // entirely in R registers. Maybe later. 4962 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4963 DAG.getConstant(ArgOffset, PtrVT)); 4964 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4965 MachinePointerInfo(), false, false, 0); 4966 MemOpChains.push_back(Store); 4967 if (VR_idx != NumVRs) { 4968 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4969 MachinePointerInfo(), 4970 false, false, false, 0); 4971 MemOpChains.push_back(Load.getValue(1)); 4972 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 4973 } 4974 ArgOffset += 16; 4975 for (unsigned i=0; i<16; i+=PtrByteSize) { 4976 if (GPR_idx == NumGPRs) 4977 break; 4978 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4979 DAG.getConstant(i, PtrVT)); 4980 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4981 false, false, false, 0); 4982 MemOpChains.push_back(Load.getValue(1)); 4983 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4984 } 4985 break; 4986 } 4987 4988 // Non-varargs Altivec params generally go in registers, but have 4989 // stack space allocated at the end. 4990 if (VR_idx != NumVRs) { 4991 // Doesn't have GPR space allocated. 4992 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 4993 } else if (nAltivecParamsAtEnd==0) { 4994 // We are emitting Altivec params in order. 4995 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4996 isPPC64, isTailCall, true, MemOpChains, 4997 TailCallArguments, dl); 4998 ArgOffset += 16; 4999 } 5000 break; 5001 } 5002 } 5003 // If all Altivec parameters fit in registers, as they usually do, 5004 // they get stack space following the non-Altivec parameters. We 5005 // don't track this here because nobody below needs it. 5006 // If there are more Altivec parameters than fit in registers emit 5007 // the stores here. 5008 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 5009 unsigned j = 0; 5010 // Offset is aligned; skip 1st 12 params which go in V registers. 5011 ArgOffset = ((ArgOffset+15)/16)*16; 5012 ArgOffset += 12*16; 5013 for (unsigned i = 0; i != NumOps; ++i) { 5014 SDValue Arg = OutVals[i]; 5015 EVT ArgType = Outs[i].VT; 5016 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 5017 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 5018 if (++j > NumVRs) { 5019 SDValue PtrOff; 5020 // We are emitting Altivec params in order. 5021 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5022 isPPC64, isTailCall, true, MemOpChains, 5023 TailCallArguments, dl); 5024 ArgOffset += 16; 5025 } 5026 } 5027 } 5028 } 5029 5030 if (!MemOpChains.empty()) 5031 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5032 5033 // On Darwin, R12 must contain the address of an indirect callee. This does 5034 // not mean the MTCTR instruction must use R12; it's easier to model this as 5035 // an extra parameter, so do that. 5036 if (!isTailCall && 5037 !dyn_cast<GlobalAddressSDNode>(Callee) && 5038 !dyn_cast<ExternalSymbolSDNode>(Callee) && 5039 !isBLACompatibleAddress(Callee, DAG)) 5040 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 5041 PPC::R12), Callee)); 5042 5043 // Build a sequence of copy-to-reg nodes chained together with token chain 5044 // and flag operands which copy the outgoing args into the appropriate regs. 5045 SDValue InFlag; 5046 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5047 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5048 RegsToPass[i].second, InFlag); 5049 InFlag = Chain.getValue(1); 5050 } 5051 5052 if (isTailCall) 5053 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 5054 FPOp, true, TailCallArguments); 5055 5056 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 5057 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 5058 Ins, InVals); 5059 } 5060 5061 bool 5062 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 5063 MachineFunction &MF, bool isVarArg, 5064 const SmallVectorImpl<ISD::OutputArg> &Outs, 5065 LLVMContext &Context) const { 5066 SmallVector<CCValAssign, 16> RVLocs; 5067 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), 5068 RVLocs, Context); 5069 return CCInfo.CheckReturn(Outs, RetCC_PPC); 5070 } 5071 5072 SDValue 5073 PPCTargetLowering::LowerReturn(SDValue Chain, 5074 CallingConv::ID CallConv, bool isVarArg, 5075 const SmallVectorImpl<ISD::OutputArg> &Outs, 5076 const SmallVectorImpl<SDValue> &OutVals, 5077 SDLoc dl, SelectionDAG &DAG) const { 5078 5079 SmallVector<CCValAssign, 16> RVLocs; 5080 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 5081 getTargetMachine(), RVLocs, *DAG.getContext()); 5082 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 5083 5084 SDValue Flag; 5085 SmallVector<SDValue, 4> RetOps(1, Chain); 5086 5087 // Copy the result values into the output registers. 5088 for (unsigned i = 0; i != RVLocs.size(); ++i) { 5089 CCValAssign &VA = RVLocs[i]; 5090 assert(VA.isRegLoc() && "Can only return in registers!"); 5091 5092 SDValue Arg = OutVals[i]; 5093 5094 switch (VA.getLocInfo()) { 5095 default: llvm_unreachable("Unknown loc info!"); 5096 case CCValAssign::Full: break; 5097 case CCValAssign::AExt: 5098 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 5099 break; 5100 case CCValAssign::ZExt: 5101 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 5102 break; 5103 case CCValAssign::SExt: 5104 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 5105 break; 5106 } 5107 5108 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 5109 Flag = Chain.getValue(1); 5110 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 5111 } 5112 5113 RetOps[0] = Chain; // Update chain. 5114 5115 // Add the flag if we have it. 5116 if (Flag.getNode()) 5117 RetOps.push_back(Flag); 5118 5119 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 5120 } 5121 5122 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 5123 const PPCSubtarget &Subtarget) const { 5124 // When we pop the dynamic allocation we need to restore the SP link. 5125 SDLoc dl(Op); 5126 5127 // Get the corect type for pointers. 5128 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5129 5130 // Construct the stack pointer operand. 5131 bool isPPC64 = Subtarget.isPPC64(); 5132 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 5133 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 5134 5135 // Get the operands for the STACKRESTORE. 5136 SDValue Chain = Op.getOperand(0); 5137 SDValue SaveSP = Op.getOperand(1); 5138 5139 // Load the old link SP. 5140 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 5141 MachinePointerInfo(), 5142 false, false, false, 0); 5143 5144 // Restore the stack pointer. 5145 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 5146 5147 // Store the old link SP. 5148 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 5149 false, false, 0); 5150 } 5151 5152 5153 5154 SDValue 5155 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 5156 MachineFunction &MF = DAG.getMachineFunction(); 5157 bool isPPC64 = Subtarget.isPPC64(); 5158 bool isDarwinABI = Subtarget.isDarwinABI(); 5159 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5160 5161 // Get current frame pointer save index. The users of this index will be 5162 // primarily DYNALLOC instructions. 5163 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 5164 int RASI = FI->getReturnAddrSaveIndex(); 5165 5166 // If the frame pointer save index hasn't been defined yet. 5167 if (!RASI) { 5168 // Find out what the fix offset of the frame pointer save area. 5169 int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); 5170 // Allocate the frame index for frame pointer save area. 5171 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); 5172 // Save the result. 5173 FI->setReturnAddrSaveIndex(RASI); 5174 } 5175 return DAG.getFrameIndex(RASI, PtrVT); 5176 } 5177 5178 SDValue 5179 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 5180 MachineFunction &MF = DAG.getMachineFunction(); 5181 bool isPPC64 = Subtarget.isPPC64(); 5182 bool isDarwinABI = Subtarget.isDarwinABI(); 5183 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5184 5185 // Get current frame pointer save index. The users of this index will be 5186 // primarily DYNALLOC instructions. 5187 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 5188 int FPSI = FI->getFramePointerSaveIndex(); 5189 5190 // If the frame pointer save index hasn't been defined yet. 5191 if (!FPSI) { 5192 // Find out what the fix offset of the frame pointer save area. 5193 int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, 5194 isDarwinABI); 5195 5196 // Allocate the frame index for frame pointer save area. 5197 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 5198 // Save the result. 5199 FI->setFramePointerSaveIndex(FPSI); 5200 } 5201 return DAG.getFrameIndex(FPSI, PtrVT); 5202 } 5203 5204 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 5205 SelectionDAG &DAG, 5206 const PPCSubtarget &Subtarget) const { 5207 // Get the inputs. 5208 SDValue Chain = Op.getOperand(0); 5209 SDValue Size = Op.getOperand(1); 5210 SDLoc dl(Op); 5211 5212 // Get the corect type for pointers. 5213 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5214 // Negate the size. 5215 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 5216 DAG.getConstant(0, PtrVT), Size); 5217 // Construct a node for the frame pointer save index. 5218 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 5219 // Build a DYNALLOC node. 5220 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 5221 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 5222 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 5223 } 5224 5225 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 5226 SelectionDAG &DAG) const { 5227 SDLoc DL(Op); 5228 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 5229 DAG.getVTList(MVT::i32, MVT::Other), 5230 Op.getOperand(0), Op.getOperand(1)); 5231 } 5232 5233 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 5234 SelectionDAG &DAG) const { 5235 SDLoc DL(Op); 5236 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 5237 Op.getOperand(0), Op.getOperand(1)); 5238 } 5239 5240 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 5241 assert(Op.getValueType() == MVT::i1 && 5242 "Custom lowering only for i1 loads"); 5243 5244 // First, load 8 bits into 32 bits, then truncate to 1 bit. 5245 5246 SDLoc dl(Op); 5247 LoadSDNode *LD = cast<LoadSDNode>(Op); 5248 5249 SDValue Chain = LD->getChain(); 5250 SDValue BasePtr = LD->getBasePtr(); 5251 MachineMemOperand *MMO = LD->getMemOperand(); 5252 5253 SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, 5254 BasePtr, MVT::i8, MMO); 5255 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 5256 5257 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 5258 return DAG.getMergeValues(Ops, dl); 5259 } 5260 5261 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 5262 assert(Op.getOperand(1).getValueType() == MVT::i1 && 5263 "Custom lowering only for i1 stores"); 5264 5265 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 5266 5267 SDLoc dl(Op); 5268 StoreSDNode *ST = cast<StoreSDNode>(Op); 5269 5270 SDValue Chain = ST->getChain(); 5271 SDValue BasePtr = ST->getBasePtr(); 5272 SDValue Value = ST->getValue(); 5273 MachineMemOperand *MMO = ST->getMemOperand(); 5274 5275 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); 5276 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 5277 } 5278 5279 // FIXME: Remove this once the ANDI glue bug is fixed: 5280 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 5281 assert(Op.getValueType() == MVT::i1 && 5282 "Custom lowering only for i1 results"); 5283 5284 SDLoc DL(Op); 5285 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 5286 Op.getOperand(0)); 5287 } 5288 5289 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 5290 /// possible. 5291 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 5292 // Not FP? Not a fsel. 5293 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 5294 !Op.getOperand(2).getValueType().isFloatingPoint()) 5295 return Op; 5296 5297 // We might be able to do better than this under some circumstances, but in 5298 // general, fsel-based lowering of select is a finite-math-only optimization. 5299 // For more information, see section F.3 of the 2.06 ISA specification. 5300 if (!DAG.getTarget().Options.NoInfsFPMath || 5301 !DAG.getTarget().Options.NoNaNsFPMath) 5302 return Op; 5303 5304 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 5305 5306 EVT ResVT = Op.getValueType(); 5307 EVT CmpVT = Op.getOperand(0).getValueType(); 5308 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5309 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 5310 SDLoc dl(Op); 5311 5312 // If the RHS of the comparison is a 0.0, we don't need to do the 5313 // subtraction at all. 5314 SDValue Sel1; 5315 if (isFloatingPointZero(RHS)) 5316 switch (CC) { 5317 default: break; // SETUO etc aren't handled by fsel. 5318 case ISD::SETNE: 5319 std::swap(TV, FV); 5320 case ISD::SETEQ: 5321 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5322 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5323 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 5324 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 5325 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 5326 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5327 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 5328 case ISD::SETULT: 5329 case ISD::SETLT: 5330 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 5331 case ISD::SETOGE: 5332 case ISD::SETGE: 5333 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5334 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5335 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 5336 case ISD::SETUGT: 5337 case ISD::SETGT: 5338 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 5339 case ISD::SETOLE: 5340 case ISD::SETLE: 5341 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5342 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5343 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5344 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 5345 } 5346 5347 SDValue Cmp; 5348 switch (CC) { 5349 default: break; // SETUO etc aren't handled by fsel. 5350 case ISD::SETNE: 5351 std::swap(TV, FV); 5352 case ISD::SETEQ: 5353 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5354 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5355 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5356 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5357 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 5358 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 5359 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5360 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 5361 case ISD::SETULT: 5362 case ISD::SETLT: 5363 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5364 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5365 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5366 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 5367 case ISD::SETOGE: 5368 case ISD::SETGE: 5369 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5370 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5371 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5372 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5373 case ISD::SETUGT: 5374 case ISD::SETGT: 5375 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 5376 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5377 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5378 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 5379 case ISD::SETOLE: 5380 case ISD::SETLE: 5381 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 5382 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5383 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5384 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5385 } 5386 return Op; 5387 } 5388 5389 // FIXME: Split this code up when LegalizeDAGTypes lands. 5390 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 5391 SDLoc dl) const { 5392 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 5393 SDValue Src = Op.getOperand(0); 5394 if (Src.getValueType() == MVT::f32) 5395 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 5396 5397 SDValue Tmp; 5398 switch (Op.getSimpleValueType().SimpleTy) { 5399 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 5400 case MVT::i32: 5401 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : 5402 (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : 5403 PPCISD::FCTIDZ), 5404 dl, MVT::f64, Src); 5405 break; 5406 case MVT::i64: 5407 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 5408 "i64 FP_TO_UINT is supported only with FPCVT"); 5409 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 5410 PPCISD::FCTIDUZ, 5411 dl, MVT::f64, Src); 5412 break; 5413 } 5414 5415 // Convert the FP value to an int value through memory. 5416 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 5417 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 5418 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 5419 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 5420 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); 5421 5422 // Emit a store to the stack slot. 5423 SDValue Chain; 5424 if (i32Stack) { 5425 MachineFunction &MF = DAG.getMachineFunction(); 5426 MachineMemOperand *MMO = 5427 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 5428 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 5429 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 5430 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 5431 } else 5432 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 5433 MPI, false, false, 0); 5434 5435 // Result is a load from the stack slot. If loading 4 bytes, make sure to 5436 // add in a bias. 5437 if (Op.getValueType() == MVT::i32 && !i32Stack) { 5438 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 5439 DAG.getConstant(4, FIPtr.getValueType())); 5440 MPI = MachinePointerInfo(); 5441 } 5442 5443 return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI, 5444 false, false, false, 0); 5445 } 5446 5447 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 5448 SelectionDAG &DAG) const { 5449 SDLoc dl(Op); 5450 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 5451 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 5452 return SDValue(); 5453 5454 if (Op.getOperand(0).getValueType() == MVT::i1) 5455 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 5456 DAG.getConstantFP(1.0, Op.getValueType()), 5457 DAG.getConstantFP(0.0, Op.getValueType())); 5458 5459 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 5460 "UINT_TO_FP is supported only with FPCVT"); 5461 5462 // If we have FCFIDS, then use it when converting to single-precision. 5463 // Otherwise, convert to double-precision and then round. 5464 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 5465 (Op.getOpcode() == ISD::UINT_TO_FP ? 5466 PPCISD::FCFIDUS : PPCISD::FCFIDS) : 5467 (Op.getOpcode() == ISD::UINT_TO_FP ? 5468 PPCISD::FCFIDU : PPCISD::FCFID); 5469 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 5470 MVT::f32 : MVT::f64; 5471 5472 if (Op.getOperand(0).getValueType() == MVT::i64) { 5473 SDValue SINT = Op.getOperand(0); 5474 // When converting to single-precision, we actually need to convert 5475 // to double-precision first and then round to single-precision. 5476 // To avoid double-rounding effects during that operation, we have 5477 // to prepare the input operand. Bits that might be truncated when 5478 // converting to double-precision are replaced by a bit that won't 5479 // be lost at this stage, but is below the single-precision rounding 5480 // position. 5481 // 5482 // However, if -enable-unsafe-fp-math is in effect, accept double 5483 // rounding to avoid the extra overhead. 5484 if (Op.getValueType() == MVT::f32 && 5485 !Subtarget.hasFPCVT() && 5486 !DAG.getTarget().Options.UnsafeFPMath) { 5487 5488 // Twiddle input to make sure the low 11 bits are zero. (If this 5489 // is the case, we are guaranteed the value will fit into the 53 bit 5490 // mantissa of an IEEE double-precision value without rounding.) 5491 // If any of those low 11 bits were not zero originally, make sure 5492 // bit 12 (value 2048) is set instead, so that the final rounding 5493 // to single-precision gets the correct result. 5494 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 5495 SINT, DAG.getConstant(2047, MVT::i64)); 5496 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 5497 Round, DAG.getConstant(2047, MVT::i64)); 5498 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 5499 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 5500 Round, DAG.getConstant(-2048, MVT::i64)); 5501 5502 // However, we cannot use that value unconditionally: if the magnitude 5503 // of the input value is small, the bit-twiddling we did above might 5504 // end up visibly changing the output. Fortunately, in that case, we 5505 // don't need to twiddle bits since the original input will convert 5506 // exactly to double-precision floating-point already. Therefore, 5507 // construct a conditional to use the original value if the top 11 5508 // bits are all sign-bit copies, and use the rounded value computed 5509 // above otherwise. 5510 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 5511 SINT, DAG.getConstant(53, MVT::i32)); 5512 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 5513 Cond, DAG.getConstant(1, MVT::i64)); 5514 Cond = DAG.getSetCC(dl, MVT::i32, 5515 Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); 5516 5517 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 5518 } 5519 5520 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 5521 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 5522 5523 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 5524 FP = DAG.getNode(ISD::FP_ROUND, dl, 5525 MVT::f32, FP, DAG.getIntPtrConstant(0)); 5526 return FP; 5527 } 5528 5529 assert(Op.getOperand(0).getValueType() == MVT::i32 && 5530 "Unhandled INT_TO_FP type in custom expander!"); 5531 // Since we only generate this in 64-bit mode, we can take advantage of 5532 // 64-bit registers. In particular, sign extend the input value into the 5533 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 5534 // then lfd it and fcfid it. 5535 MachineFunction &MF = DAG.getMachineFunction(); 5536 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 5537 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5538 5539 SDValue Ld; 5540 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 5541 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 5542 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5543 5544 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 5545 MachinePointerInfo::getFixedStack(FrameIdx), 5546 false, false, 0); 5547 5548 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 5549 "Expected an i32 store"); 5550 MachineMemOperand *MMO = 5551 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), 5552 MachineMemOperand::MOLoad, 4, 4); 5553 SDValue Ops[] = { Store, FIdx }; 5554 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 5555 PPCISD::LFIWZX : PPCISD::LFIWAX, 5556 dl, DAG.getVTList(MVT::f64, MVT::Other), 5557 Ops, MVT::i32, MMO); 5558 } else { 5559 assert(Subtarget.isPPC64() && 5560 "i32->FP without LFIWAX supported only on PPC64"); 5561 5562 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 5563 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5564 5565 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 5566 Op.getOperand(0)); 5567 5568 // STD the extended value into the stack slot. 5569 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, 5570 MachinePointerInfo::getFixedStack(FrameIdx), 5571 false, false, 0); 5572 5573 // Load the value as a double. 5574 Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, 5575 MachinePointerInfo::getFixedStack(FrameIdx), 5576 false, false, false, 0); 5577 } 5578 5579 // FCFID it and return it. 5580 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 5581 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 5582 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); 5583 return FP; 5584 } 5585 5586 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 5587 SelectionDAG &DAG) const { 5588 SDLoc dl(Op); 5589 /* 5590 The rounding mode is in bits 30:31 of FPSR, and has the following 5591 settings: 5592 00 Round to nearest 5593 01 Round to 0 5594 10 Round to +inf 5595 11 Round to -inf 5596 5597 FLT_ROUNDS, on the other hand, expects the following: 5598 -1 Undefined 5599 0 Round to 0 5600 1 Round to nearest 5601 2 Round to +inf 5602 3 Round to -inf 5603 5604 To perform the conversion, we do: 5605 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 5606 */ 5607 5608 MachineFunction &MF = DAG.getMachineFunction(); 5609 EVT VT = Op.getValueType(); 5610 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5611 5612 // Save FP Control Word to register 5613 EVT NodeTys[] = { 5614 MVT::f64, // return register 5615 MVT::Glue // unused in this context 5616 }; 5617 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 5618 5619 // Save FP register to stack slot 5620 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 5621 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 5622 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 5623 StackSlot, MachinePointerInfo(), false, false,0); 5624 5625 // Load FP Control Word from low 32 bits of stack slot. 5626 SDValue Four = DAG.getConstant(4, PtrVT); 5627 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 5628 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 5629 false, false, false, 0); 5630 5631 // Transform as necessary 5632 SDValue CWD1 = 5633 DAG.getNode(ISD::AND, dl, MVT::i32, 5634 CWD, DAG.getConstant(3, MVT::i32)); 5635 SDValue CWD2 = 5636 DAG.getNode(ISD::SRL, dl, MVT::i32, 5637 DAG.getNode(ISD::AND, dl, MVT::i32, 5638 DAG.getNode(ISD::XOR, dl, MVT::i32, 5639 CWD, DAG.getConstant(3, MVT::i32)), 5640 DAG.getConstant(3, MVT::i32)), 5641 DAG.getConstant(1, MVT::i32)); 5642 5643 SDValue RetVal = 5644 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 5645 5646 return DAG.getNode((VT.getSizeInBits() < 16 ? 5647 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 5648 } 5649 5650 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5651 EVT VT = Op.getValueType(); 5652 unsigned BitWidth = VT.getSizeInBits(); 5653 SDLoc dl(Op); 5654 assert(Op.getNumOperands() == 3 && 5655 VT == Op.getOperand(1).getValueType() && 5656 "Unexpected SHL!"); 5657 5658 // Expand into a bunch of logical ops. Note that these ops 5659 // depend on the PPC behavior for oversized shift amounts. 5660 SDValue Lo = Op.getOperand(0); 5661 SDValue Hi = Op.getOperand(1); 5662 SDValue Amt = Op.getOperand(2); 5663 EVT AmtVT = Amt.getValueType(); 5664 5665 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5666 DAG.getConstant(BitWidth, AmtVT), Amt); 5667 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 5668 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 5669 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 5670 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5671 DAG.getConstant(-BitWidth, AmtVT)); 5672 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 5673 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5674 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 5675 SDValue OutOps[] = { OutLo, OutHi }; 5676 return DAG.getMergeValues(OutOps, dl); 5677 } 5678 5679 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5680 EVT VT = Op.getValueType(); 5681 SDLoc dl(Op); 5682 unsigned BitWidth = VT.getSizeInBits(); 5683 assert(Op.getNumOperands() == 3 && 5684 VT == Op.getOperand(1).getValueType() && 5685 "Unexpected SRL!"); 5686 5687 // Expand into a bunch of logical ops. Note that these ops 5688 // depend on the PPC behavior for oversized shift amounts. 5689 SDValue Lo = Op.getOperand(0); 5690 SDValue Hi = Op.getOperand(1); 5691 SDValue Amt = Op.getOperand(2); 5692 EVT AmtVT = Amt.getValueType(); 5693 5694 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5695 DAG.getConstant(BitWidth, AmtVT), Amt); 5696 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5697 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5698 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5699 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5700 DAG.getConstant(-BitWidth, AmtVT)); 5701 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 5702 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5703 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 5704 SDValue OutOps[] = { OutLo, OutHi }; 5705 return DAG.getMergeValues(OutOps, dl); 5706 } 5707 5708 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 5709 SDLoc dl(Op); 5710 EVT VT = Op.getValueType(); 5711 unsigned BitWidth = VT.getSizeInBits(); 5712 assert(Op.getNumOperands() == 3 && 5713 VT == Op.getOperand(1).getValueType() && 5714 "Unexpected SRA!"); 5715 5716 // Expand into a bunch of logical ops, followed by a select_cc. 5717 SDValue Lo = Op.getOperand(0); 5718 SDValue Hi = Op.getOperand(1); 5719 SDValue Amt = Op.getOperand(2); 5720 EVT AmtVT = Amt.getValueType(); 5721 5722 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5723 DAG.getConstant(BitWidth, AmtVT), Amt); 5724 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5725 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5726 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5727 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5728 DAG.getConstant(-BitWidth, AmtVT)); 5729 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 5730 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 5731 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), 5732 Tmp4, Tmp6, ISD::SETLE); 5733 SDValue OutOps[] = { OutLo, OutHi }; 5734 return DAG.getMergeValues(OutOps, dl); 5735 } 5736 5737 //===----------------------------------------------------------------------===// 5738 // Vector related lowering. 5739 // 5740 5741 /// BuildSplatI - Build a canonical splati of Val with an element size of 5742 /// SplatSize. Cast the result to VT. 5743 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 5744 SelectionDAG &DAG, SDLoc dl) { 5745 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 5746 5747 static const EVT VTys[] = { // canonical VT to use for each size. 5748 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 5749 }; 5750 5751 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 5752 5753 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 5754 if (Val == -1) 5755 SplatSize = 1; 5756 5757 EVT CanonicalVT = VTys[SplatSize-1]; 5758 5759 // Build a canonical splat for this value. 5760 SDValue Elt = DAG.getConstant(Val, MVT::i32); 5761 SmallVector<SDValue, 8> Ops; 5762 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 5763 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops); 5764 return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); 5765 } 5766 5767 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the 5768 /// specified intrinsic ID. 5769 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, 5770 SelectionDAG &DAG, SDLoc dl, 5771 EVT DestVT = MVT::Other) { 5772 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 5773 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5774 DAG.getConstant(IID, MVT::i32), Op); 5775 } 5776 5777 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 5778 /// specified intrinsic ID. 5779 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 5780 SelectionDAG &DAG, SDLoc dl, 5781 EVT DestVT = MVT::Other) { 5782 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 5783 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5784 DAG.getConstant(IID, MVT::i32), LHS, RHS); 5785 } 5786 5787 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 5788 /// specified intrinsic ID. 5789 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 5790 SDValue Op2, SelectionDAG &DAG, 5791 SDLoc dl, EVT DestVT = MVT::Other) { 5792 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 5793 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5794 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 5795 } 5796 5797 5798 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 5799 /// amount. The result has the specified value type. 5800 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 5801 EVT VT, SelectionDAG &DAG, SDLoc dl) { 5802 // Force LHS/RHS to be the right type. 5803 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 5804 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 5805 5806 int Ops[16]; 5807 for (unsigned i = 0; i != 16; ++i) 5808 Ops[i] = i + Amt; 5809 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 5810 return DAG.getNode(ISD::BITCAST, dl, VT, T); 5811 } 5812 5813 // If this is a case we can't handle, return null and let the default 5814 // expansion code take care of it. If we CAN select this case, and if it 5815 // selects to a single instruction, return Op. Otherwise, if we can codegen 5816 // this case more efficiently than a constant pool load, lower it to the 5817 // sequence of ops that should be used. 5818 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 5819 SelectionDAG &DAG) const { 5820 SDLoc dl(Op); 5821 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 5822 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 5823 5824 // Check if this is a splat of a constant value. 5825 APInt APSplatBits, APSplatUndef; 5826 unsigned SplatBitSize; 5827 bool HasAnyUndefs; 5828 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 5829 HasAnyUndefs, 0, true) || SplatBitSize > 32) 5830 return SDValue(); 5831 5832 unsigned SplatBits = APSplatBits.getZExtValue(); 5833 unsigned SplatUndef = APSplatUndef.getZExtValue(); 5834 unsigned SplatSize = SplatBitSize / 8; 5835 5836 // First, handle single instruction cases. 5837 5838 // All zeros? 5839 if (SplatBits == 0) { 5840 // Canonicalize all zero vectors to be v4i32. 5841 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 5842 SDValue Z = DAG.getConstant(0, MVT::i32); 5843 Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); 5844 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 5845 } 5846 return Op; 5847 } 5848 5849 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 5850 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 5851 (32-SplatBitSize)); 5852 if (SextVal >= -16 && SextVal <= 15) 5853 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 5854 5855 5856 // Two instruction sequences. 5857 5858 // If this value is in the range [-32,30] and is even, use: 5859 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 5860 // If this value is in the range [17,31] and is odd, use: 5861 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 5862 // If this value is in the range [-31,-17] and is odd, use: 5863 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 5864 // Note the last two are three-instruction sequences. 5865 if (SextVal >= -32 && SextVal <= 31) { 5866 // To avoid having these optimizations undone by constant folding, 5867 // we convert to a pseudo that will be expanded later into one of 5868 // the above forms. 5869 SDValue Elt = DAG.getConstant(SextVal, MVT::i32); 5870 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 5871 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 5872 SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32); 5873 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 5874 if (VT == Op.getValueType()) 5875 return RetVal; 5876 else 5877 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 5878 } 5879 5880 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 5881 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 5882 // for fneg/fabs. 5883 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 5884 // Make -1 and vspltisw -1: 5885 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 5886 5887 // Make the VSLW intrinsic, computing 0x8000_0000. 5888 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 5889 OnesV, DAG, dl); 5890 5891 // xor by OnesV to invert it. 5892 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 5893 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5894 } 5895 5896 // The remaining cases assume either big endian element order or 5897 // a splat-size that equates to the element size of the vector 5898 // to be built. An example that doesn't work for little endian is 5899 // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits 5900 // and a vector element size of 16 bits. The code below will 5901 // produce the vector in big endian element order, which for little 5902 // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. 5903 5904 // For now, just avoid these optimizations in that case. 5905 // FIXME: Develop correct optimizations for LE with mismatched 5906 // splat and element sizes. 5907 5908 if (Subtarget.isLittleEndian() && 5909 SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) 5910 return SDValue(); 5911 5912 // Check to see if this is a wide variety of vsplti*, binop self cases. 5913 static const signed char SplatCsts[] = { 5914 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 5915 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 5916 }; 5917 5918 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 5919 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 5920 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 5921 int i = SplatCsts[idx]; 5922 5923 // Figure out what shift amount will be used by altivec if shifted by i in 5924 // this splat size. 5925 unsigned TypeShiftAmt = i & (SplatBitSize-1); 5926 5927 // vsplti + shl self. 5928 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 5929 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5930 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5931 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 5932 Intrinsic::ppc_altivec_vslw 5933 }; 5934 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5935 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5936 } 5937 5938 // vsplti + srl self. 5939 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5940 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5941 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5942 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 5943 Intrinsic::ppc_altivec_vsrw 5944 }; 5945 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5946 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5947 } 5948 5949 // vsplti + sra self. 5950 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5951 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5952 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5953 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 5954 Intrinsic::ppc_altivec_vsraw 5955 }; 5956 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5957 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5958 } 5959 5960 // vsplti + rol self. 5961 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 5962 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 5963 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5964 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5965 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 5966 Intrinsic::ppc_altivec_vrlw 5967 }; 5968 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5969 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5970 } 5971 5972 // t = vsplti c, result = vsldoi t, t, 1 5973 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 5974 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5975 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); 5976 } 5977 // t = vsplti c, result = vsldoi t, t, 2 5978 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 5979 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5980 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); 5981 } 5982 // t = vsplti c, result = vsldoi t, t, 3 5983 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 5984 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5985 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); 5986 } 5987 } 5988 5989 return SDValue(); 5990 } 5991 5992 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 5993 /// the specified operations to build the shuffle. 5994 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 5995 SDValue RHS, SelectionDAG &DAG, 5996 SDLoc dl) { 5997 unsigned OpNum = (PFEntry >> 26) & 0x0F; 5998 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 5999 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 6000 6001 enum { 6002 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 6003 OP_VMRGHW, 6004 OP_VMRGLW, 6005 OP_VSPLTISW0, 6006 OP_VSPLTISW1, 6007 OP_VSPLTISW2, 6008 OP_VSPLTISW3, 6009 OP_VSLDOI4, 6010 OP_VSLDOI8, 6011 OP_VSLDOI12 6012 }; 6013 6014 if (OpNum == OP_COPY) { 6015 if (LHSID == (1*9+2)*9+3) return LHS; 6016 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 6017 return RHS; 6018 } 6019 6020 SDValue OpLHS, OpRHS; 6021 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 6022 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 6023 6024 int ShufIdxs[16]; 6025 switch (OpNum) { 6026 default: llvm_unreachable("Unknown i32 permute!"); 6027 case OP_VMRGHW: 6028 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 6029 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 6030 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 6031 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 6032 break; 6033 case OP_VMRGLW: 6034 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 6035 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 6036 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 6037 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 6038 break; 6039 case OP_VSPLTISW0: 6040 for (unsigned i = 0; i != 16; ++i) 6041 ShufIdxs[i] = (i&3)+0; 6042 break; 6043 case OP_VSPLTISW1: 6044 for (unsigned i = 0; i != 16; ++i) 6045 ShufIdxs[i] = (i&3)+4; 6046 break; 6047 case OP_VSPLTISW2: 6048 for (unsigned i = 0; i != 16; ++i) 6049 ShufIdxs[i] = (i&3)+8; 6050 break; 6051 case OP_VSPLTISW3: 6052 for (unsigned i = 0; i != 16; ++i) 6053 ShufIdxs[i] = (i&3)+12; 6054 break; 6055 case OP_VSLDOI4: 6056 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 6057 case OP_VSLDOI8: 6058 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 6059 case OP_VSLDOI12: 6060 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 6061 } 6062 EVT VT = OpLHS.getValueType(); 6063 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 6064 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 6065 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 6066 return DAG.getNode(ISD::BITCAST, dl, VT, T); 6067 } 6068 6069 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 6070 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 6071 /// return the code it can be lowered into. Worst case, it can always be 6072 /// lowered into a vperm. 6073 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 6074 SelectionDAG &DAG) const { 6075 SDLoc dl(Op); 6076 SDValue V1 = Op.getOperand(0); 6077 SDValue V2 = Op.getOperand(1); 6078 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 6079 EVT VT = Op.getValueType(); 6080 bool isLittleEndian = Subtarget.isLittleEndian(); 6081 6082 // Cases that are handled by instructions that take permute immediates 6083 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 6084 // selected by the instruction selector. 6085 if (V2.getOpcode() == ISD::UNDEF) { 6086 if (PPC::isSplatShuffleMask(SVOp, 1) || 6087 PPC::isSplatShuffleMask(SVOp, 2) || 6088 PPC::isSplatShuffleMask(SVOp, 4) || 6089 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 6090 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 6091 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 6092 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 6093 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 6094 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 6095 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 6096 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 6097 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) { 6098 return Op; 6099 } 6100 } 6101 6102 // Altivec has a variety of "shuffle immediates" that take two vector inputs 6103 // and produce a fixed permutation. If any of these match, do not lower to 6104 // VPERM. 6105 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 6106 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 6107 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 6108 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 6109 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 6110 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 6111 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 6112 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 6113 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 6114 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)) 6115 return Op; 6116 6117 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 6118 // perfect shuffle table to emit an optimal matching sequence. 6119 ArrayRef<int> PermMask = SVOp->getMask(); 6120 6121 unsigned PFIndexes[4]; 6122 bool isFourElementShuffle = true; 6123 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 6124 unsigned EltNo = 8; // Start out undef. 6125 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 6126 if (PermMask[i*4+j] < 0) 6127 continue; // Undef, ignore it. 6128 6129 unsigned ByteSource = PermMask[i*4+j]; 6130 if ((ByteSource & 3) != j) { 6131 isFourElementShuffle = false; 6132 break; 6133 } 6134 6135 if (EltNo == 8) { 6136 EltNo = ByteSource/4; 6137 } else if (EltNo != ByteSource/4) { 6138 isFourElementShuffle = false; 6139 break; 6140 } 6141 } 6142 PFIndexes[i] = EltNo; 6143 } 6144 6145 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 6146 // perfect shuffle vector to determine if it is cost effective to do this as 6147 // discrete instructions, or whether we should use a vperm. 6148 // For now, we skip this for little endian until such time as we have a 6149 // little-endian perfect shuffle table. 6150 if (isFourElementShuffle && !isLittleEndian) { 6151 // Compute the index in the perfect shuffle table. 6152 unsigned PFTableIndex = 6153 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 6154 6155 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 6156 unsigned Cost = (PFEntry >> 30); 6157 6158 // Determining when to avoid vperm is tricky. Many things affect the cost 6159 // of vperm, particularly how many times the perm mask needs to be computed. 6160 // For example, if the perm mask can be hoisted out of a loop or is already 6161 // used (perhaps because there are multiple permutes with the same shuffle 6162 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 6163 // the loop requires an extra register. 6164 // 6165 // As a compromise, we only emit discrete instructions if the shuffle can be 6166 // generated in 3 or fewer operations. When we have loop information 6167 // available, if this block is within a loop, we should avoid using vperm 6168 // for 3-operation perms and use a constant pool load instead. 6169 if (Cost < 3) 6170 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 6171 } 6172 6173 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 6174 // vector that will get spilled to the constant pool. 6175 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 6176 6177 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 6178 // that it is in input element units, not in bytes. Convert now. 6179 6180 // For little endian, the order of the input vectors is reversed, and 6181 // the permutation mask is complemented with respect to 31. This is 6182 // necessary to produce proper semantics with the big-endian-biased vperm 6183 // instruction. 6184 EVT EltVT = V1.getValueType().getVectorElementType(); 6185 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 6186 6187 SmallVector<SDValue, 16> ResultMask; 6188 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 6189 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 6190 6191 for (unsigned j = 0; j != BytesPerElement; ++j) 6192 if (isLittleEndian) 6193 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j), 6194 MVT::i32)); 6195 else 6196 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 6197 MVT::i32)); 6198 } 6199 6200 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 6201 ResultMask); 6202 if (isLittleEndian) 6203 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 6204 V2, V1, VPermMask); 6205 else 6206 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 6207 V1, V2, VPermMask); 6208 } 6209 6210 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 6211 /// altivec comparison. If it is, return true and fill in Opc/isDot with 6212 /// information about the intrinsic. 6213 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 6214 bool &isDot) { 6215 unsigned IntrinsicID = 6216 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 6217 CompareOpc = -1; 6218 isDot = false; 6219 switch (IntrinsicID) { 6220 default: return false; 6221 // Comparison predicates. 6222 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 6223 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 6224 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 6225 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 6226 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 6227 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 6228 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 6229 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 6230 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 6231 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 6232 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 6233 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 6234 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 6235 6236 // Normal Comparisons. 6237 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 6238 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 6239 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 6240 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 6241 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 6242 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 6243 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 6244 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 6245 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 6246 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 6247 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 6248 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 6249 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 6250 } 6251 return true; 6252 } 6253 6254 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 6255 /// lower, do it, otherwise return null. 6256 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 6257 SelectionDAG &DAG) const { 6258 // If this is a lowered altivec predicate compare, CompareOpc is set to the 6259 // opcode number of the comparison. 6260 SDLoc dl(Op); 6261 int CompareOpc; 6262 bool isDot; 6263 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 6264 return SDValue(); // Don't custom lower most intrinsics. 6265 6266 // If this is a non-dot comparison, make the VCMP node and we are done. 6267 if (!isDot) { 6268 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 6269 Op.getOperand(1), Op.getOperand(2), 6270 DAG.getConstant(CompareOpc, MVT::i32)); 6271 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 6272 } 6273 6274 // Create the PPCISD altivec 'dot' comparison node. 6275 SDValue Ops[] = { 6276 Op.getOperand(2), // LHS 6277 Op.getOperand(3), // RHS 6278 DAG.getConstant(CompareOpc, MVT::i32) 6279 }; 6280 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 6281 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 6282 6283 // Now that we have the comparison, emit a copy from the CR to a GPR. 6284 // This is flagged to the above dot comparison. 6285 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 6286 DAG.getRegister(PPC::CR6, MVT::i32), 6287 CompNode.getValue(1)); 6288 6289 // Unpack the result based on how the target uses it. 6290 unsigned BitNo; // Bit # of CR6. 6291 bool InvertBit; // Invert result? 6292 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 6293 default: // Can't happen, don't crash on invalid number though. 6294 case 0: // Return the value of the EQ bit of CR6. 6295 BitNo = 0; InvertBit = false; 6296 break; 6297 case 1: // Return the inverted value of the EQ bit of CR6. 6298 BitNo = 0; InvertBit = true; 6299 break; 6300 case 2: // Return the value of the LT bit of CR6. 6301 BitNo = 2; InvertBit = false; 6302 break; 6303 case 3: // Return the inverted value of the LT bit of CR6. 6304 BitNo = 2; InvertBit = true; 6305 break; 6306 } 6307 6308 // Shift the bit into the low position. 6309 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 6310 DAG.getConstant(8-(3-BitNo), MVT::i32)); 6311 // Isolate the bit. 6312 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 6313 DAG.getConstant(1, MVT::i32)); 6314 6315 // If we are supposed to, toggle the bit. 6316 if (InvertBit) 6317 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 6318 DAG.getConstant(1, MVT::i32)); 6319 return Flags; 6320 } 6321 6322 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 6323 SelectionDAG &DAG) const { 6324 SDLoc dl(Op); 6325 // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int 6326 // instructions), but for smaller types, we need to first extend up to v2i32 6327 // before doing going farther. 6328 if (Op.getValueType() == MVT::v2i64) { 6329 EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 6330 if (ExtVT != MVT::v2i32) { 6331 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); 6332 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, 6333 DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), 6334 ExtVT.getVectorElementType(), 4))); 6335 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); 6336 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, 6337 DAG.getValueType(MVT::v2i32)); 6338 } 6339 6340 return Op; 6341 } 6342 6343 return SDValue(); 6344 } 6345 6346 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 6347 SelectionDAG &DAG) const { 6348 SDLoc dl(Op); 6349 // Create a stack slot that is 16-byte aligned. 6350 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 6351 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 6352 EVT PtrVT = getPointerTy(); 6353 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6354 6355 // Store the input value into Value#0 of the stack slot. 6356 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 6357 Op.getOperand(0), FIdx, MachinePointerInfo(), 6358 false, false, 0); 6359 // Load it out. 6360 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 6361 false, false, false, 0); 6362 } 6363 6364 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 6365 SDLoc dl(Op); 6366 if (Op.getValueType() == MVT::v4i32) { 6367 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6368 6369 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 6370 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 6371 6372 SDValue RHSSwap = // = vrlw RHS, 16 6373 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 6374 6375 // Shrinkify inputs to v8i16. 6376 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 6377 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 6378 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 6379 6380 // Low parts multiplied together, generating 32-bit results (we ignore the 6381 // top parts). 6382 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 6383 LHS, RHS, DAG, dl, MVT::v4i32); 6384 6385 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 6386 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 6387 // Shift the high parts up 16 bits. 6388 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 6389 Neg16, DAG, dl); 6390 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 6391 } else if (Op.getValueType() == MVT::v8i16) { 6392 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6393 6394 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 6395 6396 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 6397 LHS, RHS, Zero, DAG, dl); 6398 } else if (Op.getValueType() == MVT::v16i8) { 6399 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6400 bool isLittleEndian = Subtarget.isLittleEndian(); 6401 6402 // Multiply the even 8-bit parts, producing 16-bit sums. 6403 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 6404 LHS, RHS, DAG, dl, MVT::v8i16); 6405 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 6406 6407 // Multiply the odd 8-bit parts, producing 16-bit sums. 6408 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 6409 LHS, RHS, DAG, dl, MVT::v8i16); 6410 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 6411 6412 // Merge the results together. Because vmuleub and vmuloub are 6413 // instructions with a big-endian bias, we must reverse the 6414 // element numbering and reverse the meaning of "odd" and "even" 6415 // when generating little endian code. 6416 int Ops[16]; 6417 for (unsigned i = 0; i != 8; ++i) { 6418 if (isLittleEndian) { 6419 Ops[i*2 ] = 2*i; 6420 Ops[i*2+1] = 2*i+16; 6421 } else { 6422 Ops[i*2 ] = 2*i+1; 6423 Ops[i*2+1] = 2*i+1+16; 6424 } 6425 } 6426 if (isLittleEndian) 6427 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 6428 else 6429 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 6430 } else { 6431 llvm_unreachable("Unknown mul to lower!"); 6432 } 6433 } 6434 6435 /// LowerOperation - Provide custom lowering hooks for some operations. 6436 /// 6437 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 6438 switch (Op.getOpcode()) { 6439 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 6440 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 6441 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 6442 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 6443 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 6444 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 6445 case ISD::SETCC: return LowerSETCC(Op, DAG); 6446 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 6447 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 6448 case ISD::VASTART: 6449 return LowerVASTART(Op, DAG, Subtarget); 6450 6451 case ISD::VAARG: 6452 return LowerVAARG(Op, DAG, Subtarget); 6453 6454 case ISD::VACOPY: 6455 return LowerVACOPY(Op, DAG, Subtarget); 6456 6457 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget); 6458 case ISD::DYNAMIC_STACKALLOC: 6459 return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); 6460 6461 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 6462 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 6463 6464 case ISD::LOAD: return LowerLOAD(Op, DAG); 6465 case ISD::STORE: return LowerSTORE(Op, DAG); 6466 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 6467 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 6468 case ISD::FP_TO_UINT: 6469 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 6470 SDLoc(Op)); 6471 case ISD::UINT_TO_FP: 6472 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 6473 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 6474 6475 // Lower 64-bit shifts. 6476 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 6477 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 6478 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 6479 6480 // Vector-related lowering. 6481 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 6482 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 6483 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 6484 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 6485 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 6486 case ISD::MUL: return LowerMUL(Op, DAG); 6487 6488 // For counter-based loop handling. 6489 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 6490 6491 // Frame & Return address. 6492 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 6493 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 6494 } 6495 } 6496 6497 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 6498 SmallVectorImpl<SDValue>&Results, 6499 SelectionDAG &DAG) const { 6500 const TargetMachine &TM = getTargetMachine(); 6501 SDLoc dl(N); 6502 switch (N->getOpcode()) { 6503 default: 6504 llvm_unreachable("Do not know how to custom type legalize this operation!"); 6505 case ISD::INTRINSIC_W_CHAIN: { 6506 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 6507 Intrinsic::ppc_is_decremented_ctr_nonzero) 6508 break; 6509 6510 assert(N->getValueType(0) == MVT::i1 && 6511 "Unexpected result type for CTR decrement intrinsic"); 6512 EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); 6513 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 6514 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 6515 N->getOperand(1)); 6516 6517 Results.push_back(NewInt); 6518 Results.push_back(NewInt.getValue(1)); 6519 break; 6520 } 6521 case ISD::VAARG: { 6522 if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() 6523 || TM.getSubtarget<PPCSubtarget>().isPPC64()) 6524 return; 6525 6526 EVT VT = N->getValueType(0); 6527 6528 if (VT == MVT::i64) { 6529 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget); 6530 6531 Results.push_back(NewNode); 6532 Results.push_back(NewNode.getValue(1)); 6533 } 6534 return; 6535 } 6536 case ISD::FP_ROUND_INREG: { 6537 assert(N->getValueType(0) == MVT::ppcf128); 6538 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 6539 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 6540 MVT::f64, N->getOperand(0), 6541 DAG.getIntPtrConstant(0)); 6542 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 6543 MVT::f64, N->getOperand(0), 6544 DAG.getIntPtrConstant(1)); 6545 6546 // Add the two halves of the long double in round-to-zero mode. 6547 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 6548 6549 // We know the low half is about to be thrown away, so just use something 6550 // convenient. 6551 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 6552 FPreg, FPreg)); 6553 return; 6554 } 6555 case ISD::FP_TO_SINT: 6556 // LowerFP_TO_INT() can only handle f32 and f64. 6557 if (N->getOperand(0).getValueType() == MVT::ppcf128) 6558 return; 6559 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 6560 return; 6561 } 6562 } 6563 6564 6565 //===----------------------------------------------------------------------===// 6566 // Other Lowering Code 6567 //===----------------------------------------------------------------------===// 6568 6569 MachineBasicBlock * 6570 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 6571 bool is64bit, unsigned BinOpcode) const { 6572 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6573 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6574 6575 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6576 MachineFunction *F = BB->getParent(); 6577 MachineFunction::iterator It = BB; 6578 ++It; 6579 6580 unsigned dest = MI->getOperand(0).getReg(); 6581 unsigned ptrA = MI->getOperand(1).getReg(); 6582 unsigned ptrB = MI->getOperand(2).getReg(); 6583 unsigned incr = MI->getOperand(3).getReg(); 6584 DebugLoc dl = MI->getDebugLoc(); 6585 6586 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 6587 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6588 F->insert(It, loopMBB); 6589 F->insert(It, exitMBB); 6590 exitMBB->splice(exitMBB->begin(), BB, 6591 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6592 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6593 6594 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6595 unsigned TmpReg = (!BinOpcode) ? incr : 6596 RegInfo.createVirtualRegister( 6597 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 6598 (const TargetRegisterClass *) &PPC::GPRCRegClass); 6599 6600 // thisMBB: 6601 // ... 6602 // fallthrough --> loopMBB 6603 BB->addSuccessor(loopMBB); 6604 6605 // loopMBB: 6606 // l[wd]arx dest, ptr 6607 // add r0, dest, incr 6608 // st[wd]cx. r0, ptr 6609 // bne- loopMBB 6610 // fallthrough --> exitMBB 6611 BB = loopMBB; 6612 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 6613 .addReg(ptrA).addReg(ptrB); 6614 if (BinOpcode) 6615 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 6616 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6617 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 6618 BuildMI(BB, dl, TII->get(PPC::BCC)) 6619 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 6620 BB->addSuccessor(loopMBB); 6621 BB->addSuccessor(exitMBB); 6622 6623 // exitMBB: 6624 // ... 6625 BB = exitMBB; 6626 return BB; 6627 } 6628 6629 MachineBasicBlock * 6630 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 6631 MachineBasicBlock *BB, 6632 bool is8bit, // operation 6633 unsigned BinOpcode) const { 6634 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6635 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6636 // In 64 bit mode we have to use 64 bits for addresses, even though the 6637 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 6638 // registers without caring whether they're 32 or 64, but here we're 6639 // doing actual arithmetic on the addresses. 6640 bool is64bit = Subtarget.isPPC64(); 6641 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 6642 6643 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6644 MachineFunction *F = BB->getParent(); 6645 MachineFunction::iterator It = BB; 6646 ++It; 6647 6648 unsigned dest = MI->getOperand(0).getReg(); 6649 unsigned ptrA = MI->getOperand(1).getReg(); 6650 unsigned ptrB = MI->getOperand(2).getReg(); 6651 unsigned incr = MI->getOperand(3).getReg(); 6652 DebugLoc dl = MI->getDebugLoc(); 6653 6654 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 6655 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6656 F->insert(It, loopMBB); 6657 F->insert(It, exitMBB); 6658 exitMBB->splice(exitMBB->begin(), BB, 6659 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6660 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6661 6662 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6663 const TargetRegisterClass *RC = 6664 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 6665 (const TargetRegisterClass *) &PPC::GPRCRegClass; 6666 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 6667 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 6668 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 6669 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 6670 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 6671 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 6672 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 6673 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 6674 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 6675 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 6676 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 6677 unsigned Ptr1Reg; 6678 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 6679 6680 // thisMBB: 6681 // ... 6682 // fallthrough --> loopMBB 6683 BB->addSuccessor(loopMBB); 6684 6685 // The 4-byte load must be aligned, while a char or short may be 6686 // anywhere in the word. Hence all this nasty bookkeeping code. 6687 // add ptr1, ptrA, ptrB [copy if ptrA==0] 6688 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 6689 // xori shift, shift1, 24 [16] 6690 // rlwinm ptr, ptr1, 0, 0, 29 6691 // slw incr2, incr, shift 6692 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 6693 // slw mask, mask2, shift 6694 // loopMBB: 6695 // lwarx tmpDest, ptr 6696 // add tmp, tmpDest, incr2 6697 // andc tmp2, tmpDest, mask 6698 // and tmp3, tmp, mask 6699 // or tmp4, tmp3, tmp2 6700 // stwcx. tmp4, ptr 6701 // bne- loopMBB 6702 // fallthrough --> exitMBB 6703 // srw dest, tmpDest, shift 6704 if (ptrA != ZeroReg) { 6705 Ptr1Reg = RegInfo.createVirtualRegister(RC); 6706 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 6707 .addReg(ptrA).addReg(ptrB); 6708 } else { 6709 Ptr1Reg = ptrB; 6710 } 6711 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 6712 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 6713 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 6714 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 6715 if (is64bit) 6716 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 6717 .addReg(Ptr1Reg).addImm(0).addImm(61); 6718 else 6719 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 6720 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 6721 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 6722 .addReg(incr).addReg(ShiftReg); 6723 if (is8bit) 6724 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 6725 else { 6726 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 6727 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 6728 } 6729 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 6730 .addReg(Mask2Reg).addReg(ShiftReg); 6731 6732 BB = loopMBB; 6733 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6734 .addReg(ZeroReg).addReg(PtrReg); 6735 if (BinOpcode) 6736 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 6737 .addReg(Incr2Reg).addReg(TmpDestReg); 6738 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 6739 .addReg(TmpDestReg).addReg(MaskReg); 6740 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 6741 .addReg(TmpReg).addReg(MaskReg); 6742 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 6743 .addReg(Tmp3Reg).addReg(Tmp2Reg); 6744 BuildMI(BB, dl, TII->get(PPC::STWCX)) 6745 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 6746 BuildMI(BB, dl, TII->get(PPC::BCC)) 6747 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 6748 BB->addSuccessor(loopMBB); 6749 BB->addSuccessor(exitMBB); 6750 6751 // exitMBB: 6752 // ... 6753 BB = exitMBB; 6754 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 6755 .addReg(ShiftReg); 6756 return BB; 6757 } 6758 6759 llvm::MachineBasicBlock* 6760 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, 6761 MachineBasicBlock *MBB) const { 6762 DebugLoc DL = MI->getDebugLoc(); 6763 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6764 6765 MachineFunction *MF = MBB->getParent(); 6766 MachineRegisterInfo &MRI = MF->getRegInfo(); 6767 6768 const BasicBlock *BB = MBB->getBasicBlock(); 6769 MachineFunction::iterator I = MBB; 6770 ++I; 6771 6772 // Memory Reference 6773 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 6774 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 6775 6776 unsigned DstReg = MI->getOperand(0).getReg(); 6777 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 6778 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 6779 unsigned mainDstReg = MRI.createVirtualRegister(RC); 6780 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 6781 6782 MVT PVT = getPointerTy(); 6783 assert((PVT == MVT::i64 || PVT == MVT::i32) && 6784 "Invalid Pointer Size!"); 6785 // For v = setjmp(buf), we generate 6786 // 6787 // thisMBB: 6788 // SjLjSetup mainMBB 6789 // bl mainMBB 6790 // v_restore = 1 6791 // b sinkMBB 6792 // 6793 // mainMBB: 6794 // buf[LabelOffset] = LR 6795 // v_main = 0 6796 // 6797 // sinkMBB: 6798 // v = phi(main, restore) 6799 // 6800 6801 MachineBasicBlock *thisMBB = MBB; 6802 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 6803 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 6804 MF->insert(I, mainMBB); 6805 MF->insert(I, sinkMBB); 6806 6807 MachineInstrBuilder MIB; 6808 6809 // Transfer the remainder of BB and its successor edges to sinkMBB. 6810 sinkMBB->splice(sinkMBB->begin(), MBB, 6811 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 6812 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 6813 6814 // Note that the structure of the jmp_buf used here is not compatible 6815 // with that used by libc, and is not designed to be. Specifically, it 6816 // stores only those 'reserved' registers that LLVM does not otherwise 6817 // understand how to spill. Also, by convention, by the time this 6818 // intrinsic is called, Clang has already stored the frame address in the 6819 // first slot of the buffer and stack address in the third. Following the 6820 // X86 target code, we'll store the jump address in the second slot. We also 6821 // need to save the TOC pointer (R2) to handle jumps between shared 6822 // libraries, and that will be stored in the fourth slot. The thread 6823 // identifier (R13) is not affected. 6824 6825 // thisMBB: 6826 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 6827 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 6828 const int64_t BPOffset = 4 * PVT.getStoreSize(); 6829 6830 // Prepare IP either in reg. 6831 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 6832 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 6833 unsigned BufReg = MI->getOperand(1).getReg(); 6834 6835 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 6836 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 6837 .addReg(PPC::X2) 6838 .addImm(TOCOffset) 6839 .addReg(BufReg); 6840 MIB.setMemRefs(MMOBegin, MMOEnd); 6841 } 6842 6843 // Naked functions never have a base pointer, and so we use r1. For all 6844 // other functions, this decision must be delayed until during PEI. 6845 unsigned BaseReg; 6846 if (MF->getFunction()->getAttributes().hasAttribute( 6847 AttributeSet::FunctionIndex, Attribute::Naked)) 6848 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 6849 else 6850 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 6851 6852 MIB = BuildMI(*thisMBB, MI, DL, 6853 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 6854 .addReg(BaseReg) 6855 .addImm(BPOffset) 6856 .addReg(BufReg); 6857 MIB.setMemRefs(MMOBegin, MMOEnd); 6858 6859 // Setup 6860 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 6861 const PPCRegisterInfo *TRI = 6862 static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo()); 6863 MIB.addRegMask(TRI->getNoPreservedMask()); 6864 6865 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 6866 6867 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 6868 .addMBB(mainMBB); 6869 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 6870 6871 thisMBB->addSuccessor(mainMBB, /* weight */ 0); 6872 thisMBB->addSuccessor(sinkMBB, /* weight */ 1); 6873 6874 // mainMBB: 6875 // mainDstReg = 0 6876 MIB = BuildMI(mainMBB, DL, 6877 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 6878 6879 // Store IP 6880 if (Subtarget.isPPC64()) { 6881 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 6882 .addReg(LabelReg) 6883 .addImm(LabelOffset) 6884 .addReg(BufReg); 6885 } else { 6886 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 6887 .addReg(LabelReg) 6888 .addImm(LabelOffset) 6889 .addReg(BufReg); 6890 } 6891 6892 MIB.setMemRefs(MMOBegin, MMOEnd); 6893 6894 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 6895 mainMBB->addSuccessor(sinkMBB); 6896 6897 // sinkMBB: 6898 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 6899 TII->get(PPC::PHI), DstReg) 6900 .addReg(mainDstReg).addMBB(mainMBB) 6901 .addReg(restoreDstReg).addMBB(thisMBB); 6902 6903 MI->eraseFromParent(); 6904 return sinkMBB; 6905 } 6906 6907 MachineBasicBlock * 6908 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, 6909 MachineBasicBlock *MBB) const { 6910 DebugLoc DL = MI->getDebugLoc(); 6911 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6912 6913 MachineFunction *MF = MBB->getParent(); 6914 MachineRegisterInfo &MRI = MF->getRegInfo(); 6915 6916 // Memory Reference 6917 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 6918 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 6919 6920 MVT PVT = getPointerTy(); 6921 assert((PVT == MVT::i64 || PVT == MVT::i32) && 6922 "Invalid Pointer Size!"); 6923 6924 const TargetRegisterClass *RC = 6925 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 6926 unsigned Tmp = MRI.createVirtualRegister(RC); 6927 // Since FP is only updated here but NOT referenced, it's treated as GPR. 6928 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 6929 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 6930 unsigned BP = (PVT == MVT::i64) ? PPC::X30 : 6931 (Subtarget.isSVR4ABI() && 6932 MF->getTarget().getRelocationModel() == Reloc::PIC_ ? 6933 PPC::R29 : PPC::R30); 6934 6935 MachineInstrBuilder MIB; 6936 6937 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 6938 const int64_t SPOffset = 2 * PVT.getStoreSize(); 6939 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 6940 const int64_t BPOffset = 4 * PVT.getStoreSize(); 6941 6942 unsigned BufReg = MI->getOperand(0).getReg(); 6943 6944 // Reload FP (the jumped-to function may not have had a 6945 // frame pointer, and if so, then its r31 will be restored 6946 // as necessary). 6947 if (PVT == MVT::i64) { 6948 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 6949 .addImm(0) 6950 .addReg(BufReg); 6951 } else { 6952 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 6953 .addImm(0) 6954 .addReg(BufReg); 6955 } 6956 MIB.setMemRefs(MMOBegin, MMOEnd); 6957 6958 // Reload IP 6959 if (PVT == MVT::i64) { 6960 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 6961 .addImm(LabelOffset) 6962 .addReg(BufReg); 6963 } else { 6964 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 6965 .addImm(LabelOffset) 6966 .addReg(BufReg); 6967 } 6968 MIB.setMemRefs(MMOBegin, MMOEnd); 6969 6970 // Reload SP 6971 if (PVT == MVT::i64) { 6972 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 6973 .addImm(SPOffset) 6974 .addReg(BufReg); 6975 } else { 6976 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 6977 .addImm(SPOffset) 6978 .addReg(BufReg); 6979 } 6980 MIB.setMemRefs(MMOBegin, MMOEnd); 6981 6982 // Reload BP 6983 if (PVT == MVT::i64) { 6984 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 6985 .addImm(BPOffset) 6986 .addReg(BufReg); 6987 } else { 6988 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 6989 .addImm(BPOffset) 6990 .addReg(BufReg); 6991 } 6992 MIB.setMemRefs(MMOBegin, MMOEnd); 6993 6994 // Reload TOC 6995 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 6996 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 6997 .addImm(TOCOffset) 6998 .addReg(BufReg); 6999 7000 MIB.setMemRefs(MMOBegin, MMOEnd); 7001 } 7002 7003 // Jump 7004 BuildMI(*MBB, MI, DL, 7005 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 7006 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 7007 7008 MI->eraseFromParent(); 7009 return MBB; 7010 } 7011 7012 MachineBasicBlock * 7013 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 7014 MachineBasicBlock *BB) const { 7015 if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || 7016 MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { 7017 return emitEHSjLjSetJmp(MI, BB); 7018 } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || 7019 MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { 7020 return emitEHSjLjLongJmp(MI, BB); 7021 } 7022 7023 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 7024 7025 // To "insert" these instructions we actually have to insert their 7026 // control-flow patterns. 7027 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7028 MachineFunction::iterator It = BB; 7029 ++It; 7030 7031 MachineFunction *F = BB->getParent(); 7032 7033 if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || 7034 MI->getOpcode() == PPC::SELECT_CC_I8 || 7035 MI->getOpcode() == PPC::SELECT_I4 || 7036 MI->getOpcode() == PPC::SELECT_I8)) { 7037 SmallVector<MachineOperand, 2> Cond; 7038 if (MI->getOpcode() == PPC::SELECT_CC_I4 || 7039 MI->getOpcode() == PPC::SELECT_CC_I8) 7040 Cond.push_back(MI->getOperand(4)); 7041 else 7042 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 7043 Cond.push_back(MI->getOperand(1)); 7044 7045 DebugLoc dl = MI->getDebugLoc(); 7046 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 7047 TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), 7048 Cond, MI->getOperand(2).getReg(), 7049 MI->getOperand(3).getReg()); 7050 } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || 7051 MI->getOpcode() == PPC::SELECT_CC_I8 || 7052 MI->getOpcode() == PPC::SELECT_CC_F4 || 7053 MI->getOpcode() == PPC::SELECT_CC_F8 || 7054 MI->getOpcode() == PPC::SELECT_CC_VRRC || 7055 MI->getOpcode() == PPC::SELECT_I4 || 7056 MI->getOpcode() == PPC::SELECT_I8 || 7057 MI->getOpcode() == PPC::SELECT_F4 || 7058 MI->getOpcode() == PPC::SELECT_F8 || 7059 MI->getOpcode() == PPC::SELECT_VRRC) { 7060 // The incoming instruction knows the destination vreg to set, the 7061 // condition code register to branch on, the true/false values to 7062 // select between, and a branch opcode to use. 7063 7064 // thisMBB: 7065 // ... 7066 // TrueVal = ... 7067 // cmpTY ccX, r1, r2 7068 // bCC copy1MBB 7069 // fallthrough --> copy0MBB 7070 MachineBasicBlock *thisMBB = BB; 7071 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 7072 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7073 DebugLoc dl = MI->getDebugLoc(); 7074 F->insert(It, copy0MBB); 7075 F->insert(It, sinkMBB); 7076 7077 // Transfer the remainder of BB and its successor edges to sinkMBB. 7078 sinkMBB->splice(sinkMBB->begin(), BB, 7079 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7080 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7081 7082 // Next, add the true and fallthrough blocks as its successors. 7083 BB->addSuccessor(copy0MBB); 7084 BB->addSuccessor(sinkMBB); 7085 7086 if (MI->getOpcode() == PPC::SELECT_I4 || 7087 MI->getOpcode() == PPC::SELECT_I8 || 7088 MI->getOpcode() == PPC::SELECT_F4 || 7089 MI->getOpcode() == PPC::SELECT_F8 || 7090 MI->getOpcode() == PPC::SELECT_VRRC) { 7091 BuildMI(BB, dl, TII->get(PPC::BC)) 7092 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 7093 } else { 7094 unsigned SelectPred = MI->getOperand(4).getImm(); 7095 BuildMI(BB, dl, TII->get(PPC::BCC)) 7096 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 7097 } 7098 7099 // copy0MBB: 7100 // %FalseValue = ... 7101 // # fallthrough to sinkMBB 7102 BB = copy0MBB; 7103 7104 // Update machine-CFG edges 7105 BB->addSuccessor(sinkMBB); 7106 7107 // sinkMBB: 7108 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 7109 // ... 7110 BB = sinkMBB; 7111 BuildMI(*BB, BB->begin(), dl, 7112 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 7113 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 7114 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 7115 } 7116 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 7117 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 7118 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 7119 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 7120 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 7121 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 7122 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 7123 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 7124 7125 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 7126 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 7127 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 7128 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 7129 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 7130 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 7131 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 7132 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 7133 7134 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 7135 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 7136 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 7137 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 7138 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 7139 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 7140 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 7141 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 7142 7143 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 7144 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 7145 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 7146 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 7147 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 7148 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 7149 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 7150 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 7151 7152 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 7153 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 7154 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 7155 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 7156 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 7157 BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); 7158 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 7159 BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); 7160 7161 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 7162 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 7163 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 7164 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 7165 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 7166 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 7167 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 7168 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 7169 7170 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 7171 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 7172 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 7173 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 7174 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 7175 BB = EmitAtomicBinary(MI, BB, false, 0); 7176 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 7177 BB = EmitAtomicBinary(MI, BB, true, 0); 7178 7179 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 7180 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 7181 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 7182 7183 unsigned dest = MI->getOperand(0).getReg(); 7184 unsigned ptrA = MI->getOperand(1).getReg(); 7185 unsigned ptrB = MI->getOperand(2).getReg(); 7186 unsigned oldval = MI->getOperand(3).getReg(); 7187 unsigned newval = MI->getOperand(4).getReg(); 7188 DebugLoc dl = MI->getDebugLoc(); 7189 7190 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 7191 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 7192 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 7193 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 7194 F->insert(It, loop1MBB); 7195 F->insert(It, loop2MBB); 7196 F->insert(It, midMBB); 7197 F->insert(It, exitMBB); 7198 exitMBB->splice(exitMBB->begin(), BB, 7199 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7200 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7201 7202 // thisMBB: 7203 // ... 7204 // fallthrough --> loopMBB 7205 BB->addSuccessor(loop1MBB); 7206 7207 // loop1MBB: 7208 // l[wd]arx dest, ptr 7209 // cmp[wd] dest, oldval 7210 // bne- midMBB 7211 // loop2MBB: 7212 // st[wd]cx. newval, ptr 7213 // bne- loopMBB 7214 // b exitBB 7215 // midMBB: 7216 // st[wd]cx. dest, ptr 7217 // exitBB: 7218 BB = loop1MBB; 7219 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 7220 .addReg(ptrA).addReg(ptrB); 7221 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 7222 .addReg(oldval).addReg(dest); 7223 BuildMI(BB, dl, TII->get(PPC::BCC)) 7224 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 7225 BB->addSuccessor(loop2MBB); 7226 BB->addSuccessor(midMBB); 7227 7228 BB = loop2MBB; 7229 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 7230 .addReg(newval).addReg(ptrA).addReg(ptrB); 7231 BuildMI(BB, dl, TII->get(PPC::BCC)) 7232 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 7233 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 7234 BB->addSuccessor(loop1MBB); 7235 BB->addSuccessor(exitMBB); 7236 7237 BB = midMBB; 7238 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 7239 .addReg(dest).addReg(ptrA).addReg(ptrB); 7240 BB->addSuccessor(exitMBB); 7241 7242 // exitMBB: 7243 // ... 7244 BB = exitMBB; 7245 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 7246 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 7247 // We must use 64-bit registers for addresses when targeting 64-bit, 7248 // since we're actually doing arithmetic on them. Other registers 7249 // can be 32-bit. 7250 bool is64bit = Subtarget.isPPC64(); 7251 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 7252 7253 unsigned dest = MI->getOperand(0).getReg(); 7254 unsigned ptrA = MI->getOperand(1).getReg(); 7255 unsigned ptrB = MI->getOperand(2).getReg(); 7256 unsigned oldval = MI->getOperand(3).getReg(); 7257 unsigned newval = MI->getOperand(4).getReg(); 7258 DebugLoc dl = MI->getDebugLoc(); 7259 7260 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 7261 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 7262 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 7263 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 7264 F->insert(It, loop1MBB); 7265 F->insert(It, loop2MBB); 7266 F->insert(It, midMBB); 7267 F->insert(It, exitMBB); 7268 exitMBB->splice(exitMBB->begin(), BB, 7269 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7270 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7271 7272 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7273 const TargetRegisterClass *RC = 7274 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 7275 (const TargetRegisterClass *) &PPC::GPRCRegClass; 7276 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 7277 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 7278 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 7279 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 7280 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 7281 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 7282 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 7283 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 7284 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 7285 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 7286 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 7287 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 7288 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 7289 unsigned Ptr1Reg; 7290 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 7291 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 7292 // thisMBB: 7293 // ... 7294 // fallthrough --> loopMBB 7295 BB->addSuccessor(loop1MBB); 7296 7297 // The 4-byte load must be aligned, while a char or short may be 7298 // anywhere in the word. Hence all this nasty bookkeeping code. 7299 // add ptr1, ptrA, ptrB [copy if ptrA==0] 7300 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 7301 // xori shift, shift1, 24 [16] 7302 // rlwinm ptr, ptr1, 0, 0, 29 7303 // slw newval2, newval, shift 7304 // slw oldval2, oldval,shift 7305 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 7306 // slw mask, mask2, shift 7307 // and newval3, newval2, mask 7308 // and oldval3, oldval2, mask 7309 // loop1MBB: 7310 // lwarx tmpDest, ptr 7311 // and tmp, tmpDest, mask 7312 // cmpw tmp, oldval3 7313 // bne- midMBB 7314 // loop2MBB: 7315 // andc tmp2, tmpDest, mask 7316 // or tmp4, tmp2, newval3 7317 // stwcx. tmp4, ptr 7318 // bne- loop1MBB 7319 // b exitBB 7320 // midMBB: 7321 // stwcx. tmpDest, ptr 7322 // exitBB: 7323 // srw dest, tmpDest, shift 7324 if (ptrA != ZeroReg) { 7325 Ptr1Reg = RegInfo.createVirtualRegister(RC); 7326 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 7327 .addReg(ptrA).addReg(ptrB); 7328 } else { 7329 Ptr1Reg = ptrB; 7330 } 7331 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 7332 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 7333 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 7334 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 7335 if (is64bit) 7336 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 7337 .addReg(Ptr1Reg).addImm(0).addImm(61); 7338 else 7339 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 7340 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 7341 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 7342 .addReg(newval).addReg(ShiftReg); 7343 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 7344 .addReg(oldval).addReg(ShiftReg); 7345 if (is8bit) 7346 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 7347 else { 7348 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 7349 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 7350 .addReg(Mask3Reg).addImm(65535); 7351 } 7352 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 7353 .addReg(Mask2Reg).addReg(ShiftReg); 7354 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 7355 .addReg(NewVal2Reg).addReg(MaskReg); 7356 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 7357 .addReg(OldVal2Reg).addReg(MaskReg); 7358 7359 BB = loop1MBB; 7360 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 7361 .addReg(ZeroReg).addReg(PtrReg); 7362 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 7363 .addReg(TmpDestReg).addReg(MaskReg); 7364 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 7365 .addReg(TmpReg).addReg(OldVal3Reg); 7366 BuildMI(BB, dl, TII->get(PPC::BCC)) 7367 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 7368 BB->addSuccessor(loop2MBB); 7369 BB->addSuccessor(midMBB); 7370 7371 BB = loop2MBB; 7372 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 7373 .addReg(TmpDestReg).addReg(MaskReg); 7374 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 7375 .addReg(Tmp2Reg).addReg(NewVal3Reg); 7376 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 7377 .addReg(ZeroReg).addReg(PtrReg); 7378 BuildMI(BB, dl, TII->get(PPC::BCC)) 7379 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 7380 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 7381 BB->addSuccessor(loop1MBB); 7382 BB->addSuccessor(exitMBB); 7383 7384 BB = midMBB; 7385 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 7386 .addReg(ZeroReg).addReg(PtrReg); 7387 BB->addSuccessor(exitMBB); 7388 7389 // exitMBB: 7390 // ... 7391 BB = exitMBB; 7392 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 7393 .addReg(ShiftReg); 7394 } else if (MI->getOpcode() == PPC::FADDrtz) { 7395 // This pseudo performs an FADD with rounding mode temporarily forced 7396 // to round-to-zero. We emit this via custom inserter since the FPSCR 7397 // is not modeled at the SelectionDAG level. 7398 unsigned Dest = MI->getOperand(0).getReg(); 7399 unsigned Src1 = MI->getOperand(1).getReg(); 7400 unsigned Src2 = MI->getOperand(2).getReg(); 7401 DebugLoc dl = MI->getDebugLoc(); 7402 7403 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7404 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 7405 7406 // Save FPSCR value. 7407 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 7408 7409 // Set rounding mode to round-to-zero. 7410 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 7411 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 7412 7413 // Perform addition. 7414 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 7415 7416 // Restore FPSCR value. 7417 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); 7418 } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 7419 MI->getOpcode() == PPC::ANDIo_1_GT_BIT || 7420 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 7421 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { 7422 unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 7423 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? 7424 PPC::ANDIo8 : PPC::ANDIo; 7425 bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 7426 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); 7427 7428 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7429 unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? 7430 &PPC::GPRCRegClass : 7431 &PPC::G8RCRegClass); 7432 7433 DebugLoc dl = MI->getDebugLoc(); 7434 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 7435 .addReg(MI->getOperand(1).getReg()).addImm(1); 7436 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 7437 MI->getOperand(0).getReg()) 7438 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 7439 } else { 7440 llvm_unreachable("Unexpected instr type to insert"); 7441 } 7442 7443 MI->eraseFromParent(); // The pseudo instruction is gone now. 7444 return BB; 7445 } 7446 7447 //===----------------------------------------------------------------------===// 7448 // Target Optimization Hooks 7449 //===----------------------------------------------------------------------===// 7450 7451 SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, 7452 DAGCombinerInfo &DCI) const { 7453 if (DCI.isAfterLegalizeVectorOps()) 7454 return SDValue(); 7455 7456 EVT VT = Op.getValueType(); 7457 7458 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 7459 (VT == MVT::f64 && Subtarget.hasFRE()) || 7460 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 7461 (VT == MVT::v2f64 && Subtarget.hasVSX())) { 7462 7463 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 7464 // For the reciprocal, we need to find the zero of the function: 7465 // F(X) = A X - 1 [which has a zero at X = 1/A] 7466 // => 7467 // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 7468 // does not require additional intermediate precision] 7469 7470 // Convergence is quadratic, so we essentially double the number of digits 7471 // correct after every iteration. The minimum architected relative 7472 // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has 7473 // 23 digits and double has 52 digits. 7474 int Iterations = Subtarget.hasRecipPrec() ? 1 : 3; 7475 if (VT.getScalarType() == MVT::f64) 7476 ++Iterations; 7477 7478 SelectionDAG &DAG = DCI.DAG; 7479 SDLoc dl(Op); 7480 7481 SDValue FPOne = 7482 DAG.getConstantFP(1.0, VT.getScalarType()); 7483 if (VT.isVector()) { 7484 assert(VT.getVectorNumElements() == 4 && 7485 "Unknown vector type"); 7486 FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, 7487 FPOne, FPOne, FPOne, FPOne); 7488 } 7489 7490 SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op); 7491 DCI.AddToWorklist(Est.getNode()); 7492 7493 // Newton iterations: Est = Est + Est (1 - Arg * Est) 7494 for (int i = 0; i < Iterations; ++i) { 7495 SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est); 7496 DCI.AddToWorklist(NewEst.getNode()); 7497 7498 NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst); 7499 DCI.AddToWorklist(NewEst.getNode()); 7500 7501 NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); 7502 DCI.AddToWorklist(NewEst.getNode()); 7503 7504 Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst); 7505 DCI.AddToWorklist(Est.getNode()); 7506 } 7507 7508 return Est; 7509 } 7510 7511 return SDValue(); 7512 } 7513 7514 SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, 7515 DAGCombinerInfo &DCI) const { 7516 if (DCI.isAfterLegalizeVectorOps()) 7517 return SDValue(); 7518 7519 EVT VT = Op.getValueType(); 7520 7521 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 7522 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 7523 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 7524 (VT == MVT::v2f64 && Subtarget.hasVSX())) { 7525 7526 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 7527 // For the reciprocal sqrt, we need to find the zero of the function: 7528 // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 7529 // => 7530 // X_{i+1} = X_i (1.5 - A X_i^2 / 2) 7531 // As a result, we precompute A/2 prior to the iteration loop. 7532 7533 // Convergence is quadratic, so we essentially double the number of digits 7534 // correct after every iteration. The minimum architected relative 7535 // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has 7536 // 23 digits and double has 52 digits. 7537 int Iterations = Subtarget.hasRecipPrec() ? 1 : 3; 7538 if (VT.getScalarType() == MVT::f64) 7539 ++Iterations; 7540 7541 SelectionDAG &DAG = DCI.DAG; 7542 SDLoc dl(Op); 7543 7544 SDValue FPThreeHalves = 7545 DAG.getConstantFP(1.5, VT.getScalarType()); 7546 if (VT.isVector()) { 7547 assert(VT.getVectorNumElements() == 4 && 7548 "Unknown vector type"); 7549 FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, 7550 FPThreeHalves, FPThreeHalves, 7551 FPThreeHalves, FPThreeHalves); 7552 } 7553 7554 SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op); 7555 DCI.AddToWorklist(Est.getNode()); 7556 7557 // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that 7558 // this entire sequence requires only one FP constant. 7559 SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op); 7560 DCI.AddToWorklist(HalfArg.getNode()); 7561 7562 HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op); 7563 DCI.AddToWorklist(HalfArg.getNode()); 7564 7565 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 7566 for (int i = 0; i < Iterations; ++i) { 7567 SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est); 7568 DCI.AddToWorklist(NewEst.getNode()); 7569 7570 NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst); 7571 DCI.AddToWorklist(NewEst.getNode()); 7572 7573 NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst); 7574 DCI.AddToWorklist(NewEst.getNode()); 7575 7576 Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); 7577 DCI.AddToWorklist(Est.getNode()); 7578 } 7579 7580 return Est; 7581 } 7582 7583 return SDValue(); 7584 } 7585 7586 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 7587 // not enforce equality of the chain operands. 7588 static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, 7589 unsigned Bytes, int Dist, 7590 SelectionDAG &DAG) { 7591 EVT VT = LS->getMemoryVT(); 7592 if (VT.getSizeInBits() / 8 != Bytes) 7593 return false; 7594 7595 SDValue Loc = LS->getBasePtr(); 7596 SDValue BaseLoc = Base->getBasePtr(); 7597 if (Loc.getOpcode() == ISD::FrameIndex) { 7598 if (BaseLoc.getOpcode() != ISD::FrameIndex) 7599 return false; 7600 const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 7601 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 7602 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 7603 int FS = MFI->getObjectSize(FI); 7604 int BFS = MFI->getObjectSize(BFI); 7605 if (FS != BFS || FS != (int)Bytes) return false; 7606 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); 7607 } 7608 7609 // Handle X+C 7610 if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && 7611 cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) 7612 return true; 7613 7614 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7615 const GlobalValue *GV1 = nullptr; 7616 const GlobalValue *GV2 = nullptr; 7617 int64_t Offset1 = 0; 7618 int64_t Offset2 = 0; 7619 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 7620 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 7621 if (isGA1 && isGA2 && GV1 == GV2) 7622 return Offset1 == (Offset2 + Dist*Bytes); 7623 return false; 7624 } 7625 7626 // Return true is there is a nearyby consecutive load to the one provided 7627 // (regardless of alignment). We search up and down the chain, looking though 7628 // token factors and other loads (but nothing else). As a result, a true 7629 // results indicates that it is safe to create a new consecutive load adjacent 7630 // to the load provided. 7631 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 7632 SDValue Chain = LD->getChain(); 7633 EVT VT = LD->getMemoryVT(); 7634 7635 SmallSet<SDNode *, 16> LoadRoots; 7636 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 7637 SmallSet<SDNode *, 16> Visited; 7638 7639 // First, search up the chain, branching to follow all token-factor operands. 7640 // If we find a consecutive load, then we're done, otherwise, record all 7641 // nodes just above the top-level loads and token factors. 7642 while (!Queue.empty()) { 7643 SDNode *ChainNext = Queue.pop_back_val(); 7644 if (!Visited.insert(ChainNext)) 7645 continue; 7646 7647 if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) { 7648 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 7649 return true; 7650 7651 if (!Visited.count(ChainLD->getChain().getNode())) 7652 Queue.push_back(ChainLD->getChain().getNode()); 7653 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 7654 for (const SDUse &O : ChainNext->ops()) 7655 if (!Visited.count(O.getNode())) 7656 Queue.push_back(O.getNode()); 7657 } else 7658 LoadRoots.insert(ChainNext); 7659 } 7660 7661 // Second, search down the chain, starting from the top-level nodes recorded 7662 // in the first phase. These top-level nodes are the nodes just above all 7663 // loads and token factors. Starting with their uses, recursively look though 7664 // all loads (just the chain uses) and token factors to find a consecutive 7665 // load. 7666 Visited.clear(); 7667 Queue.clear(); 7668 7669 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 7670 IE = LoadRoots.end(); I != IE; ++I) { 7671 Queue.push_back(*I); 7672 7673 while (!Queue.empty()) { 7674 SDNode *LoadRoot = Queue.pop_back_val(); 7675 if (!Visited.insert(LoadRoot)) 7676 continue; 7677 7678 if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot)) 7679 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 7680 return true; 7681 7682 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 7683 UE = LoadRoot->use_end(); UI != UE; ++UI) 7684 if (((isa<LoadSDNode>(*UI) && 7685 cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) || 7686 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 7687 Queue.push_back(*UI); 7688 } 7689 } 7690 7691 return false; 7692 } 7693 7694 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 7695 DAGCombinerInfo &DCI) const { 7696 SelectionDAG &DAG = DCI.DAG; 7697 SDLoc dl(N); 7698 7699 assert(Subtarget.useCRBits() && 7700 "Expecting to be tracking CR bits"); 7701 // If we're tracking CR bits, we need to be careful that we don't have: 7702 // trunc(binary-ops(zext(x), zext(y))) 7703 // or 7704 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 7705 // such that we're unnecessarily moving things into GPRs when it would be 7706 // better to keep them in CR bits. 7707 7708 // Note that trunc here can be an actual i1 trunc, or can be the effective 7709 // truncation that comes from a setcc or select_cc. 7710 if (N->getOpcode() == ISD::TRUNCATE && 7711 N->getValueType(0) != MVT::i1) 7712 return SDValue(); 7713 7714 if (N->getOperand(0).getValueType() != MVT::i32 && 7715 N->getOperand(0).getValueType() != MVT::i64) 7716 return SDValue(); 7717 7718 if (N->getOpcode() == ISD::SETCC || 7719 N->getOpcode() == ISD::SELECT_CC) { 7720 // If we're looking at a comparison, then we need to make sure that the 7721 // high bits (all except for the first) don't matter the result. 7722 ISD::CondCode CC = 7723 cast<CondCodeSDNode>(N->getOperand( 7724 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 7725 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 7726 7727 if (ISD::isSignedIntSetCC(CC)) { 7728 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 7729 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 7730 return SDValue(); 7731 } else if (ISD::isUnsignedIntSetCC(CC)) { 7732 if (!DAG.MaskedValueIsZero(N->getOperand(0), 7733 APInt::getHighBitsSet(OpBits, OpBits-1)) || 7734 !DAG.MaskedValueIsZero(N->getOperand(1), 7735 APInt::getHighBitsSet(OpBits, OpBits-1))) 7736 return SDValue(); 7737 } else { 7738 // This is neither a signed nor an unsigned comparison, just make sure 7739 // that the high bits are equal. 7740 APInt Op1Zero, Op1One; 7741 APInt Op2Zero, Op2One; 7742 DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); 7743 DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); 7744 7745 // We don't really care about what is known about the first bit (if 7746 // anything), so clear it in all masks prior to comparing them. 7747 Op1Zero.clearBit(0); Op1One.clearBit(0); 7748 Op2Zero.clearBit(0); Op2One.clearBit(0); 7749 7750 if (Op1Zero != Op2Zero || Op1One != Op2One) 7751 return SDValue(); 7752 } 7753 } 7754 7755 // We now know that the higher-order bits are irrelevant, we just need to 7756 // make sure that all of the intermediate operations are bit operations, and 7757 // all inputs are extensions. 7758 if (N->getOperand(0).getOpcode() != ISD::AND && 7759 N->getOperand(0).getOpcode() != ISD::OR && 7760 N->getOperand(0).getOpcode() != ISD::XOR && 7761 N->getOperand(0).getOpcode() != ISD::SELECT && 7762 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 7763 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 7764 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 7765 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 7766 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 7767 return SDValue(); 7768 7769 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 7770 N->getOperand(1).getOpcode() != ISD::AND && 7771 N->getOperand(1).getOpcode() != ISD::OR && 7772 N->getOperand(1).getOpcode() != ISD::XOR && 7773 N->getOperand(1).getOpcode() != ISD::SELECT && 7774 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 7775 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 7776 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 7777 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 7778 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 7779 return SDValue(); 7780 7781 SmallVector<SDValue, 4> Inputs; 7782 SmallVector<SDValue, 8> BinOps, PromOps; 7783 SmallPtrSet<SDNode *, 16> Visited; 7784 7785 for (unsigned i = 0; i < 2; ++i) { 7786 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 7787 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 7788 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 7789 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 7790 isa<ConstantSDNode>(N->getOperand(i))) 7791 Inputs.push_back(N->getOperand(i)); 7792 else 7793 BinOps.push_back(N->getOperand(i)); 7794 7795 if (N->getOpcode() == ISD::TRUNCATE) 7796 break; 7797 } 7798 7799 // Visit all inputs, collect all binary operations (and, or, xor and 7800 // select) that are all fed by extensions. 7801 while (!BinOps.empty()) { 7802 SDValue BinOp = BinOps.back(); 7803 BinOps.pop_back(); 7804 7805 if (!Visited.insert(BinOp.getNode())) 7806 continue; 7807 7808 PromOps.push_back(BinOp); 7809 7810 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 7811 // The condition of the select is not promoted. 7812 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 7813 continue; 7814 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 7815 continue; 7816 7817 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 7818 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 7819 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 7820 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 7821 isa<ConstantSDNode>(BinOp.getOperand(i))) { 7822 Inputs.push_back(BinOp.getOperand(i)); 7823 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 7824 BinOp.getOperand(i).getOpcode() == ISD::OR || 7825 BinOp.getOperand(i).getOpcode() == ISD::XOR || 7826 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 7827 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 7828 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 7829 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 7830 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 7831 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 7832 BinOps.push_back(BinOp.getOperand(i)); 7833 } else { 7834 // We have an input that is not an extension or another binary 7835 // operation; we'll abort this transformation. 7836 return SDValue(); 7837 } 7838 } 7839 } 7840 7841 // Make sure that this is a self-contained cluster of operations (which 7842 // is not quite the same thing as saying that everything has only one 7843 // use). 7844 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 7845 if (isa<ConstantSDNode>(Inputs[i])) 7846 continue; 7847 7848 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 7849 UE = Inputs[i].getNode()->use_end(); 7850 UI != UE; ++UI) { 7851 SDNode *User = *UI; 7852 if (User != N && !Visited.count(User)) 7853 return SDValue(); 7854 7855 // Make sure that we're not going to promote the non-output-value 7856 // operand(s) or SELECT or SELECT_CC. 7857 // FIXME: Although we could sometimes handle this, and it does occur in 7858 // practice that one of the condition inputs to the select is also one of 7859 // the outputs, we currently can't deal with this. 7860 if (User->getOpcode() == ISD::SELECT) { 7861 if (User->getOperand(0) == Inputs[i]) 7862 return SDValue(); 7863 } else if (User->getOpcode() == ISD::SELECT_CC) { 7864 if (User->getOperand(0) == Inputs[i] || 7865 User->getOperand(1) == Inputs[i]) 7866 return SDValue(); 7867 } 7868 } 7869 } 7870 7871 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 7872 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 7873 UE = PromOps[i].getNode()->use_end(); 7874 UI != UE; ++UI) { 7875 SDNode *User = *UI; 7876 if (User != N && !Visited.count(User)) 7877 return SDValue(); 7878 7879 // Make sure that we're not going to promote the non-output-value 7880 // operand(s) or SELECT or SELECT_CC. 7881 // FIXME: Although we could sometimes handle this, and it does occur in 7882 // practice that one of the condition inputs to the select is also one of 7883 // the outputs, we currently can't deal with this. 7884 if (User->getOpcode() == ISD::SELECT) { 7885 if (User->getOperand(0) == PromOps[i]) 7886 return SDValue(); 7887 } else if (User->getOpcode() == ISD::SELECT_CC) { 7888 if (User->getOperand(0) == PromOps[i] || 7889 User->getOperand(1) == PromOps[i]) 7890 return SDValue(); 7891 } 7892 } 7893 } 7894 7895 // Replace all inputs with the extension operand. 7896 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 7897 // Constants may have users outside the cluster of to-be-promoted nodes, 7898 // and so we need to replace those as we do the promotions. 7899 if (isa<ConstantSDNode>(Inputs[i])) 7900 continue; 7901 else 7902 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 7903 } 7904 7905 // Replace all operations (these are all the same, but have a different 7906 // (i1) return type). DAG.getNode will validate that the types of 7907 // a binary operator match, so go through the list in reverse so that 7908 // we've likely promoted both operands first. Any intermediate truncations or 7909 // extensions disappear. 7910 while (!PromOps.empty()) { 7911 SDValue PromOp = PromOps.back(); 7912 PromOps.pop_back(); 7913 7914 if (PromOp.getOpcode() == ISD::TRUNCATE || 7915 PromOp.getOpcode() == ISD::SIGN_EXTEND || 7916 PromOp.getOpcode() == ISD::ZERO_EXTEND || 7917 PromOp.getOpcode() == ISD::ANY_EXTEND) { 7918 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 7919 PromOp.getOperand(0).getValueType() != MVT::i1) { 7920 // The operand is not yet ready (see comment below). 7921 PromOps.insert(PromOps.begin(), PromOp); 7922 continue; 7923 } 7924 7925 SDValue RepValue = PromOp.getOperand(0); 7926 if (isa<ConstantSDNode>(RepValue)) 7927 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 7928 7929 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 7930 continue; 7931 } 7932 7933 unsigned C; 7934 switch (PromOp.getOpcode()) { 7935 default: C = 0; break; 7936 case ISD::SELECT: C = 1; break; 7937 case ISD::SELECT_CC: C = 2; break; 7938 } 7939 7940 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 7941 PromOp.getOperand(C).getValueType() != MVT::i1) || 7942 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 7943 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 7944 // The to-be-promoted operands of this node have not yet been 7945 // promoted (this should be rare because we're going through the 7946 // list backward, but if one of the operands has several users in 7947 // this cluster of to-be-promoted nodes, it is possible). 7948 PromOps.insert(PromOps.begin(), PromOp); 7949 continue; 7950 } 7951 7952 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 7953 PromOp.getNode()->op_end()); 7954 7955 // If there are any constant inputs, make sure they're replaced now. 7956 for (unsigned i = 0; i < 2; ++i) 7957 if (isa<ConstantSDNode>(Ops[C+i])) 7958 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 7959 7960 DAG.ReplaceAllUsesOfValueWith(PromOp, 7961 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 7962 } 7963 7964 // Now we're left with the initial truncation itself. 7965 if (N->getOpcode() == ISD::TRUNCATE) 7966 return N->getOperand(0); 7967 7968 // Otherwise, this is a comparison. The operands to be compared have just 7969 // changed type (to i1), but everything else is the same. 7970 return SDValue(N, 0); 7971 } 7972 7973 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 7974 DAGCombinerInfo &DCI) const { 7975 SelectionDAG &DAG = DCI.DAG; 7976 SDLoc dl(N); 7977 7978 // If we're tracking CR bits, we need to be careful that we don't have: 7979 // zext(binary-ops(trunc(x), trunc(y))) 7980 // or 7981 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 7982 // such that we're unnecessarily moving things into CR bits that can more 7983 // efficiently stay in GPRs. Note that if we're not certain that the high 7984 // bits are set as required by the final extension, we still may need to do 7985 // some masking to get the proper behavior. 7986 7987 // This same functionality is important on PPC64 when dealing with 7988 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 7989 // the return values of functions. Because it is so similar, it is handled 7990 // here as well. 7991 7992 if (N->getValueType(0) != MVT::i32 && 7993 N->getValueType(0) != MVT::i64) 7994 return SDValue(); 7995 7996 if (!((N->getOperand(0).getValueType() == MVT::i1 && 7997 Subtarget.useCRBits()) || 7998 (N->getOperand(0).getValueType() == MVT::i32 && 7999 Subtarget.isPPC64()))) 8000 return SDValue(); 8001 8002 if (N->getOperand(0).getOpcode() != ISD::AND && 8003 N->getOperand(0).getOpcode() != ISD::OR && 8004 N->getOperand(0).getOpcode() != ISD::XOR && 8005 N->getOperand(0).getOpcode() != ISD::SELECT && 8006 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 8007 return SDValue(); 8008 8009 SmallVector<SDValue, 4> Inputs; 8010 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 8011 SmallPtrSet<SDNode *, 16> Visited; 8012 8013 // Visit all inputs, collect all binary operations (and, or, xor and 8014 // select) that are all fed by truncations. 8015 while (!BinOps.empty()) { 8016 SDValue BinOp = BinOps.back(); 8017 BinOps.pop_back(); 8018 8019 if (!Visited.insert(BinOp.getNode())) 8020 continue; 8021 8022 PromOps.push_back(BinOp); 8023 8024 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 8025 // The condition of the select is not promoted. 8026 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 8027 continue; 8028 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 8029 continue; 8030 8031 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 8032 isa<ConstantSDNode>(BinOp.getOperand(i))) { 8033 Inputs.push_back(BinOp.getOperand(i)); 8034 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 8035 BinOp.getOperand(i).getOpcode() == ISD::OR || 8036 BinOp.getOperand(i).getOpcode() == ISD::XOR || 8037 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 8038 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 8039 BinOps.push_back(BinOp.getOperand(i)); 8040 } else { 8041 // We have an input that is not a truncation or another binary 8042 // operation; we'll abort this transformation. 8043 return SDValue(); 8044 } 8045 } 8046 } 8047 8048 // Make sure that this is a self-contained cluster of operations (which 8049 // is not quite the same thing as saying that everything has only one 8050 // use). 8051 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8052 if (isa<ConstantSDNode>(Inputs[i])) 8053 continue; 8054 8055 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 8056 UE = Inputs[i].getNode()->use_end(); 8057 UI != UE; ++UI) { 8058 SDNode *User = *UI; 8059 if (User != N && !Visited.count(User)) 8060 return SDValue(); 8061 8062 // Make sure that we're not going to promote the non-output-value 8063 // operand(s) or SELECT or SELECT_CC. 8064 // FIXME: Although we could sometimes handle this, and it does occur in 8065 // practice that one of the condition inputs to the select is also one of 8066 // the outputs, we currently can't deal with this. 8067 if (User->getOpcode() == ISD::SELECT) { 8068 if (User->getOperand(0) == Inputs[i]) 8069 return SDValue(); 8070 } else if (User->getOpcode() == ISD::SELECT_CC) { 8071 if (User->getOperand(0) == Inputs[i] || 8072 User->getOperand(1) == Inputs[i]) 8073 return SDValue(); 8074 } 8075 } 8076 } 8077 8078 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 8079 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 8080 UE = PromOps[i].getNode()->use_end(); 8081 UI != UE; ++UI) { 8082 SDNode *User = *UI; 8083 if (User != N && !Visited.count(User)) 8084 return SDValue(); 8085 8086 // Make sure that we're not going to promote the non-output-value 8087 // operand(s) or SELECT or SELECT_CC. 8088 // FIXME: Although we could sometimes handle this, and it does occur in 8089 // practice that one of the condition inputs to the select is also one of 8090 // the outputs, we currently can't deal with this. 8091 if (User->getOpcode() == ISD::SELECT) { 8092 if (User->getOperand(0) == PromOps[i]) 8093 return SDValue(); 8094 } else if (User->getOpcode() == ISD::SELECT_CC) { 8095 if (User->getOperand(0) == PromOps[i] || 8096 User->getOperand(1) == PromOps[i]) 8097 return SDValue(); 8098 } 8099 } 8100 } 8101 8102 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 8103 bool ReallyNeedsExt = false; 8104 if (N->getOpcode() != ISD::ANY_EXTEND) { 8105 // If all of the inputs are not already sign/zero extended, then 8106 // we'll still need to do that at the end. 8107 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8108 if (isa<ConstantSDNode>(Inputs[i])) 8109 continue; 8110 8111 unsigned OpBits = 8112 Inputs[i].getOperand(0).getValueSizeInBits(); 8113 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 8114 8115 if ((N->getOpcode() == ISD::ZERO_EXTEND && 8116 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 8117 APInt::getHighBitsSet(OpBits, 8118 OpBits-PromBits))) || 8119 (N->getOpcode() == ISD::SIGN_EXTEND && 8120 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 8121 (OpBits-(PromBits-1)))) { 8122 ReallyNeedsExt = true; 8123 break; 8124 } 8125 } 8126 } 8127 8128 // Replace all inputs, either with the truncation operand, or a 8129 // truncation or extension to the final output type. 8130 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8131 // Constant inputs need to be replaced with the to-be-promoted nodes that 8132 // use them because they might have users outside of the cluster of 8133 // promoted nodes. 8134 if (isa<ConstantSDNode>(Inputs[i])) 8135 continue; 8136 8137 SDValue InSrc = Inputs[i].getOperand(0); 8138 if (Inputs[i].getValueType() == N->getValueType(0)) 8139 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 8140 else if (N->getOpcode() == ISD::SIGN_EXTEND) 8141 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8142 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 8143 else if (N->getOpcode() == ISD::ZERO_EXTEND) 8144 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8145 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 8146 else 8147 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8148 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 8149 } 8150 8151 // Replace all operations (these are all the same, but have a different 8152 // (promoted) return type). DAG.getNode will validate that the types of 8153 // a binary operator match, so go through the list in reverse so that 8154 // we've likely promoted both operands first. 8155 while (!PromOps.empty()) { 8156 SDValue PromOp = PromOps.back(); 8157 PromOps.pop_back(); 8158 8159 unsigned C; 8160 switch (PromOp.getOpcode()) { 8161 default: C = 0; break; 8162 case ISD::SELECT: C = 1; break; 8163 case ISD::SELECT_CC: C = 2; break; 8164 } 8165 8166 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 8167 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 8168 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 8169 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 8170 // The to-be-promoted operands of this node have not yet been 8171 // promoted (this should be rare because we're going through the 8172 // list backward, but if one of the operands has several users in 8173 // this cluster of to-be-promoted nodes, it is possible). 8174 PromOps.insert(PromOps.begin(), PromOp); 8175 continue; 8176 } 8177 8178 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 8179 PromOp.getNode()->op_end()); 8180 8181 // If this node has constant inputs, then they'll need to be promoted here. 8182 for (unsigned i = 0; i < 2; ++i) { 8183 if (!isa<ConstantSDNode>(Ops[C+i])) 8184 continue; 8185 if (Ops[C+i].getValueType() == N->getValueType(0)) 8186 continue; 8187 8188 if (N->getOpcode() == ISD::SIGN_EXTEND) 8189 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8190 else if (N->getOpcode() == ISD::ZERO_EXTEND) 8191 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8192 else 8193 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8194 } 8195 8196 DAG.ReplaceAllUsesOfValueWith(PromOp, 8197 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 8198 } 8199 8200 // Now we're left with the initial extension itself. 8201 if (!ReallyNeedsExt) 8202 return N->getOperand(0); 8203 8204 // To zero extend, just mask off everything except for the first bit (in the 8205 // i1 case). 8206 if (N->getOpcode() == ISD::ZERO_EXTEND) 8207 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 8208 DAG.getConstant(APInt::getLowBitsSet( 8209 N->getValueSizeInBits(0), PromBits), 8210 N->getValueType(0))); 8211 8212 assert(N->getOpcode() == ISD::SIGN_EXTEND && 8213 "Invalid extension type"); 8214 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); 8215 SDValue ShiftCst = 8216 DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy); 8217 return DAG.getNode(ISD::SRA, dl, N->getValueType(0), 8218 DAG.getNode(ISD::SHL, dl, N->getValueType(0), 8219 N->getOperand(0), ShiftCst), ShiftCst); 8220 } 8221 8222 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 8223 DAGCombinerInfo &DCI) const { 8224 const TargetMachine &TM = getTargetMachine(); 8225 SelectionDAG &DAG = DCI.DAG; 8226 SDLoc dl(N); 8227 switch (N->getOpcode()) { 8228 default: break; 8229 case PPCISD::SHL: 8230 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8231 if (C->isNullValue()) // 0 << V -> 0. 8232 return N->getOperand(0); 8233 } 8234 break; 8235 case PPCISD::SRL: 8236 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8237 if (C->isNullValue()) // 0 >>u V -> 0. 8238 return N->getOperand(0); 8239 } 8240 break; 8241 case PPCISD::SRA: 8242 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8243 if (C->isNullValue() || // 0 >>s V -> 0. 8244 C->isAllOnesValue()) // -1 >>s V -> -1. 8245 return N->getOperand(0); 8246 } 8247 break; 8248 case ISD::SIGN_EXTEND: 8249 case ISD::ZERO_EXTEND: 8250 case ISD::ANY_EXTEND: 8251 return DAGCombineExtBoolTrunc(N, DCI); 8252 case ISD::TRUNCATE: 8253 case ISD::SETCC: 8254 case ISD::SELECT_CC: 8255 return DAGCombineTruncBoolExt(N, DCI); 8256 case ISD::FDIV: { 8257 assert(TM.Options.UnsafeFPMath && 8258 "Reciprocal estimates require UnsafeFPMath"); 8259 8260 if (N->getOperand(1).getOpcode() == ISD::FSQRT) { 8261 SDValue RV = 8262 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI); 8263 if (RV.getNode()) { 8264 DCI.AddToWorklist(RV.getNode()); 8265 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 8266 N->getOperand(0), RV); 8267 } 8268 } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND && 8269 N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { 8270 SDValue RV = 8271 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), 8272 DCI); 8273 if (RV.getNode()) { 8274 DCI.AddToWorklist(RV.getNode()); 8275 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)), 8276 N->getValueType(0), RV); 8277 DCI.AddToWorklist(RV.getNode()); 8278 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 8279 N->getOperand(0), RV); 8280 } 8281 } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND && 8282 N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { 8283 SDValue RV = 8284 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), 8285 DCI); 8286 if (RV.getNode()) { 8287 DCI.AddToWorklist(RV.getNode()); 8288 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)), 8289 N->getValueType(0), RV, 8290 N->getOperand(1).getOperand(1)); 8291 DCI.AddToWorklist(RV.getNode()); 8292 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 8293 N->getOperand(0), RV); 8294 } 8295 } 8296 8297 SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI); 8298 if (RV.getNode()) { 8299 DCI.AddToWorklist(RV.getNode()); 8300 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 8301 N->getOperand(0), RV); 8302 } 8303 8304 } 8305 break; 8306 case ISD::FSQRT: { 8307 assert(TM.Options.UnsafeFPMath && 8308 "Reciprocal estimates require UnsafeFPMath"); 8309 8310 // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the 8311 // reciprocal sqrt. 8312 SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI); 8313 if (RV.getNode()) { 8314 DCI.AddToWorklist(RV.getNode()); 8315 RV = DAGCombineFastRecip(RV, DCI); 8316 if (RV.getNode()) { 8317 // Unfortunately, RV is now NaN if the input was exactly 0. Select out 8318 // this case and force the answer to 0. 8319 8320 EVT VT = RV.getValueType(); 8321 8322 SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType()); 8323 if (VT.isVector()) { 8324 assert(VT.getVectorNumElements() == 4 && "Unknown vector type"); 8325 Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero); 8326 } 8327 8328 SDValue ZeroCmp = 8329 DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT), 8330 N->getOperand(0), Zero, ISD::SETEQ); 8331 DCI.AddToWorklist(ZeroCmp.getNode()); 8332 DCI.AddToWorklist(RV.getNode()); 8333 8334 RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT, 8335 ZeroCmp, Zero, RV); 8336 return RV; 8337 } 8338 } 8339 8340 } 8341 break; 8342 case ISD::SINT_TO_FP: 8343 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 8344 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 8345 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 8346 // We allow the src/dst to be either f32/f64, but the intermediate 8347 // type must be i64. 8348 if (N->getOperand(0).getValueType() == MVT::i64 && 8349 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 8350 SDValue Val = N->getOperand(0).getOperand(0); 8351 if (Val.getValueType() == MVT::f32) { 8352 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 8353 DCI.AddToWorklist(Val.getNode()); 8354 } 8355 8356 Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val); 8357 DCI.AddToWorklist(Val.getNode()); 8358 Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val); 8359 DCI.AddToWorklist(Val.getNode()); 8360 if (N->getValueType(0) == MVT::f32) { 8361 Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val, 8362 DAG.getIntPtrConstant(0)); 8363 DCI.AddToWorklist(Val.getNode()); 8364 } 8365 return Val; 8366 } else if (N->getOperand(0).getValueType() == MVT::i32) { 8367 // If the intermediate type is i32, we can avoid the load/store here 8368 // too. 8369 } 8370 } 8371 } 8372 break; 8373 case ISD::STORE: 8374 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 8375 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 8376 !cast<StoreSDNode>(N)->isTruncatingStore() && 8377 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 8378 N->getOperand(1).getValueType() == MVT::i32 && 8379 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 8380 SDValue Val = N->getOperand(1).getOperand(0); 8381 if (Val.getValueType() == MVT::f32) { 8382 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 8383 DCI.AddToWorklist(Val.getNode()); 8384 } 8385 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 8386 DCI.AddToWorklist(Val.getNode()); 8387 8388 SDValue Ops[] = { 8389 N->getOperand(0), Val, N->getOperand(2), 8390 DAG.getValueType(N->getOperand(1).getValueType()) 8391 }; 8392 8393 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 8394 DAG.getVTList(MVT::Other), Ops, 8395 cast<StoreSDNode>(N)->getMemoryVT(), 8396 cast<StoreSDNode>(N)->getMemOperand()); 8397 DCI.AddToWorklist(Val.getNode()); 8398 return Val; 8399 } 8400 8401 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 8402 if (cast<StoreSDNode>(N)->isUnindexed() && 8403 N->getOperand(1).getOpcode() == ISD::BSWAP && 8404 N->getOperand(1).getNode()->hasOneUse() && 8405 (N->getOperand(1).getValueType() == MVT::i32 || 8406 N->getOperand(1).getValueType() == MVT::i16 || 8407 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 8408 TM.getSubtarget<PPCSubtarget>().isPPC64() && 8409 N->getOperand(1).getValueType() == MVT::i64))) { 8410 SDValue BSwapOp = N->getOperand(1).getOperand(0); 8411 // Do an any-extend to 32-bits if this is a half-word input. 8412 if (BSwapOp.getValueType() == MVT::i16) 8413 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 8414 8415 SDValue Ops[] = { 8416 N->getOperand(0), BSwapOp, N->getOperand(2), 8417 DAG.getValueType(N->getOperand(1).getValueType()) 8418 }; 8419 return 8420 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 8421 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 8422 cast<StoreSDNode>(N)->getMemOperand()); 8423 } 8424 break; 8425 case ISD::LOAD: { 8426 LoadSDNode *LD = cast<LoadSDNode>(N); 8427 EVT VT = LD->getValueType(0); 8428 Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); 8429 unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); 8430 if (ISD::isNON_EXTLoad(N) && VT.isVector() && 8431 TM.getSubtarget<PPCSubtarget>().hasAltivec() && 8432 (VT == MVT::v16i8 || VT == MVT::v8i16 || 8433 VT == MVT::v4i32 || VT == MVT::v4f32) && 8434 LD->getAlignment() < ABIAlignment) { 8435 // This is a type-legal unaligned Altivec load. 8436 SDValue Chain = LD->getChain(); 8437 SDValue Ptr = LD->getBasePtr(); 8438 bool isLittleEndian = Subtarget.isLittleEndian(); 8439 8440 // This implements the loading of unaligned vectors as described in 8441 // the venerable Apple Velocity Engine overview. Specifically: 8442 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 8443 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 8444 // 8445 // The general idea is to expand a sequence of one or more unaligned 8446 // loads into an alignment-based permutation-control instruction (lvsl 8447 // or lvsr), a series of regular vector loads (which always truncate 8448 // their input address to an aligned address), and a series of 8449 // permutations. The results of these permutations are the requested 8450 // loaded values. The trick is that the last "extra" load is not taken 8451 // from the address you might suspect (sizeof(vector) bytes after the 8452 // last requested load), but rather sizeof(vector) - 1 bytes after the 8453 // last requested vector. The point of this is to avoid a page fault if 8454 // the base address happened to be aligned. This works because if the 8455 // base address is aligned, then adding less than a full vector length 8456 // will cause the last vector in the sequence to be (re)loaded. 8457 // Otherwise, the next vector will be fetched as you might suspect was 8458 // necessary. 8459 8460 // We might be able to reuse the permutation generation from 8461 // a different base address offset from this one by an aligned amount. 8462 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 8463 // optimization later. 8464 Intrinsic::ID Intr = (isLittleEndian ? 8465 Intrinsic::ppc_altivec_lvsr : 8466 Intrinsic::ppc_altivec_lvsl); 8467 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8); 8468 8469 // Refine the alignment of the original load (a "new" load created here 8470 // which was identical to the first except for the alignment would be 8471 // merged with the existing node regardless). 8472 MachineFunction &MF = DAG.getMachineFunction(); 8473 MachineMemOperand *MMO = 8474 MF.getMachineMemOperand(LD->getPointerInfo(), 8475 LD->getMemOperand()->getFlags(), 8476 LD->getMemoryVT().getStoreSize(), 8477 ABIAlignment); 8478 LD->refineAlignment(MMO); 8479 SDValue BaseLoad = SDValue(LD, 0); 8480 8481 // Note that the value of IncOffset (which is provided to the next 8482 // load's pointer info offset value, and thus used to calculate the 8483 // alignment), and the value of IncValue (which is actually used to 8484 // increment the pointer value) are different! This is because we 8485 // require the next load to appear to be aligned, even though it 8486 // is actually offset from the base pointer by a lesser amount. 8487 int IncOffset = VT.getSizeInBits() / 8; 8488 int IncValue = IncOffset; 8489 8490 // Walk (both up and down) the chain looking for another load at the real 8491 // (aligned) offset (the alignment of the other load does not matter in 8492 // this case). If found, then do not use the offset reduction trick, as 8493 // that will prevent the loads from being later combined (as they would 8494 // otherwise be duplicates). 8495 if (!findConsecutiveLoad(LD, DAG)) 8496 --IncValue; 8497 8498 SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); 8499 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 8500 8501 SDValue ExtraLoad = 8502 DAG.getLoad(VT, dl, Chain, Ptr, 8503 LD->getPointerInfo().getWithOffset(IncOffset), 8504 LD->isVolatile(), LD->isNonTemporal(), 8505 LD->isInvariant(), ABIAlignment); 8506 8507 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 8508 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 8509 8510 if (BaseLoad.getValueType() != MVT::v4i32) 8511 BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad); 8512 8513 if (ExtraLoad.getValueType() != MVT::v4i32) 8514 ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad); 8515 8516 // Because vperm has a big-endian bias, we must reverse the order 8517 // of the input vectors and complement the permute control vector 8518 // when generating little endian code. We have already handled the 8519 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 8520 // and ExtraLoad here. 8521 SDValue Perm; 8522 if (isLittleEndian) 8523 Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, 8524 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 8525 else 8526 Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, 8527 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 8528 8529 if (VT != MVT::v4i32) 8530 Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); 8531 8532 // Now we need to be really careful about how we update the users of the 8533 // original load. We cannot just call DCI.CombineTo (or 8534 // DAG.ReplaceAllUsesWith for that matter), because the load still has 8535 // uses created here (the permutation for example) that need to stay. 8536 SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 8537 while (UI != UE) { 8538 SDUse &Use = UI.getUse(); 8539 SDNode *User = *UI; 8540 // Note: BaseLoad is checked here because it might not be N, but a 8541 // bitcast of N. 8542 if (User == Perm.getNode() || User == BaseLoad.getNode() || 8543 User == TF.getNode() || Use.getResNo() > 1) { 8544 ++UI; 8545 continue; 8546 } 8547 8548 SDValue To = Use.getResNo() ? TF : Perm; 8549 ++UI; 8550 8551 SmallVector<SDValue, 8> Ops; 8552 for (const SDUse &O : User->ops()) { 8553 if (O == Use) 8554 Ops.push_back(To); 8555 else 8556 Ops.push_back(O); 8557 } 8558 8559 DAG.UpdateNodeOperands(User, Ops); 8560 } 8561 8562 return SDValue(N, 0); 8563 } 8564 } 8565 break; 8566 case ISD::INTRINSIC_WO_CHAIN: { 8567 bool isLittleEndian = Subtarget.isLittleEndian(); 8568 Intrinsic::ID Intr = (isLittleEndian ? 8569 Intrinsic::ppc_altivec_lvsr : 8570 Intrinsic::ppc_altivec_lvsl); 8571 if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr && 8572 N->getOperand(1)->getOpcode() == ISD::ADD) { 8573 SDValue Add = N->getOperand(1); 8574 8575 if (DAG.MaskedValueIsZero(Add->getOperand(1), 8576 APInt::getAllOnesValue(4 /* 16 byte alignment */).zext( 8577 Add.getValueType().getScalarType().getSizeInBits()))) { 8578 SDNode *BasePtr = Add->getOperand(0).getNode(); 8579 for (SDNode::use_iterator UI = BasePtr->use_begin(), 8580 UE = BasePtr->use_end(); UI != UE; ++UI) { 8581 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 8582 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == 8583 Intr) { 8584 // We've found another LVSL/LVSR, and this address is an aligned 8585 // multiple of that one. The results will be the same, so use the 8586 // one we've just found instead. 8587 8588 return SDValue(*UI, 0); 8589 } 8590 } 8591 } 8592 } 8593 } 8594 8595 break; 8596 case ISD::BSWAP: 8597 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 8598 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 8599 N->getOperand(0).hasOneUse() && 8600 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 8601 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 8602 TM.getSubtarget<PPCSubtarget>().isPPC64() && 8603 N->getValueType(0) == MVT::i64))) { 8604 SDValue Load = N->getOperand(0); 8605 LoadSDNode *LD = cast<LoadSDNode>(Load); 8606 // Create the byte-swapping load. 8607 SDValue Ops[] = { 8608 LD->getChain(), // Chain 8609 LD->getBasePtr(), // Ptr 8610 DAG.getValueType(N->getValueType(0)) // VT 8611 }; 8612 SDValue BSLoad = 8613 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 8614 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 8615 MVT::i64 : MVT::i32, MVT::Other), 8616 Ops, LD->getMemoryVT(), LD->getMemOperand()); 8617 8618 // If this is an i16 load, insert the truncate. 8619 SDValue ResVal = BSLoad; 8620 if (N->getValueType(0) == MVT::i16) 8621 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 8622 8623 // First, combine the bswap away. This makes the value produced by the 8624 // load dead. 8625 DCI.CombineTo(N, ResVal); 8626 8627 // Next, combine the load away, we give it a bogus result value but a real 8628 // chain result. The result value is dead because the bswap is dead. 8629 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 8630 8631 // Return N so it doesn't get rechecked! 8632 return SDValue(N, 0); 8633 } 8634 8635 break; 8636 case PPCISD::VCMP: { 8637 // If a VCMPo node already exists with exactly the same operands as this 8638 // node, use its result instead of this node (VCMPo computes both a CR6 and 8639 // a normal output). 8640 // 8641 if (!N->getOperand(0).hasOneUse() && 8642 !N->getOperand(1).hasOneUse() && 8643 !N->getOperand(2).hasOneUse()) { 8644 8645 // Scan all of the users of the LHS, looking for VCMPo's that match. 8646 SDNode *VCMPoNode = nullptr; 8647 8648 SDNode *LHSN = N->getOperand(0).getNode(); 8649 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 8650 UI != E; ++UI) 8651 if (UI->getOpcode() == PPCISD::VCMPo && 8652 UI->getOperand(1) == N->getOperand(1) && 8653 UI->getOperand(2) == N->getOperand(2) && 8654 UI->getOperand(0) == N->getOperand(0)) { 8655 VCMPoNode = *UI; 8656 break; 8657 } 8658 8659 // If there is no VCMPo node, or if the flag value has a single use, don't 8660 // transform this. 8661 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 8662 break; 8663 8664 // Look at the (necessarily single) use of the flag value. If it has a 8665 // chain, this transformation is more complex. Note that multiple things 8666 // could use the value result, which we should ignore. 8667 SDNode *FlagUser = nullptr; 8668 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 8669 FlagUser == nullptr; ++UI) { 8670 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 8671 SDNode *User = *UI; 8672 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 8673 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 8674 FlagUser = User; 8675 break; 8676 } 8677 } 8678 } 8679 8680 // If the user is a MFOCRF instruction, we know this is safe. 8681 // Otherwise we give up for right now. 8682 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 8683 return SDValue(VCMPoNode, 0); 8684 } 8685 break; 8686 } 8687 case ISD::BRCOND: { 8688 SDValue Cond = N->getOperand(1); 8689 SDValue Target = N->getOperand(2); 8690 8691 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 8692 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 8693 Intrinsic::ppc_is_decremented_ctr_nonzero) { 8694 8695 // We now need to make the intrinsic dead (it cannot be instruction 8696 // selected). 8697 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 8698 assert(Cond.getNode()->hasOneUse() && 8699 "Counter decrement has more than one use"); 8700 8701 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 8702 N->getOperand(0), Target); 8703 } 8704 } 8705 break; 8706 case ISD::BR_CC: { 8707 // If this is a branch on an altivec predicate comparison, lower this so 8708 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 8709 // lowering is done pre-legalize, because the legalizer lowers the predicate 8710 // compare down to code that is difficult to reassemble. 8711 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 8712 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 8713 8714 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 8715 // value. If so, pass-through the AND to get to the intrinsic. 8716 if (LHS.getOpcode() == ISD::AND && 8717 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 8718 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 8719 Intrinsic::ppc_is_decremented_ctr_nonzero && 8720 isa<ConstantSDNode>(LHS.getOperand(1)) && 8721 !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()-> 8722 isZero()) 8723 LHS = LHS.getOperand(0); 8724 8725 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 8726 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 8727 Intrinsic::ppc_is_decremented_ctr_nonzero && 8728 isa<ConstantSDNode>(RHS)) { 8729 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 8730 "Counter decrement comparison is not EQ or NE"); 8731 8732 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 8733 bool isBDNZ = (CC == ISD::SETEQ && Val) || 8734 (CC == ISD::SETNE && !Val); 8735 8736 // We now need to make the intrinsic dead (it cannot be instruction 8737 // selected). 8738 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 8739 assert(LHS.getNode()->hasOneUse() && 8740 "Counter decrement has more than one use"); 8741 8742 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 8743 N->getOperand(0), N->getOperand(4)); 8744 } 8745 8746 int CompareOpc; 8747 bool isDot; 8748 8749 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 8750 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 8751 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 8752 assert(isDot && "Can't compare against a vector result!"); 8753 8754 // If this is a comparison against something other than 0/1, then we know 8755 // that the condition is never/always true. 8756 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 8757 if (Val != 0 && Val != 1) { 8758 if (CC == ISD::SETEQ) // Cond never true, remove branch. 8759 return N->getOperand(0); 8760 // Always !=, turn it into an unconditional branch. 8761 return DAG.getNode(ISD::BR, dl, MVT::Other, 8762 N->getOperand(0), N->getOperand(4)); 8763 } 8764 8765 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 8766 8767 // Create the PPCISD altivec 'dot' comparison node. 8768 SDValue Ops[] = { 8769 LHS.getOperand(2), // LHS of compare 8770 LHS.getOperand(3), // RHS of compare 8771 DAG.getConstant(CompareOpc, MVT::i32) 8772 }; 8773 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 8774 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 8775 8776 // Unpack the result based on how the target uses it. 8777 PPC::Predicate CompOpc; 8778 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 8779 default: // Can't happen, don't crash on invalid number though. 8780 case 0: // Branch on the value of the EQ bit of CR6. 8781 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 8782 break; 8783 case 1: // Branch on the inverted value of the EQ bit of CR6. 8784 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 8785 break; 8786 case 2: // Branch on the value of the LT bit of CR6. 8787 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 8788 break; 8789 case 3: // Branch on the inverted value of the LT bit of CR6. 8790 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 8791 break; 8792 } 8793 8794 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 8795 DAG.getConstant(CompOpc, MVT::i32), 8796 DAG.getRegister(PPC::CR6, MVT::i32), 8797 N->getOperand(4), CompNode.getValue(1)); 8798 } 8799 break; 8800 } 8801 } 8802 8803 return SDValue(); 8804 } 8805 8806 //===----------------------------------------------------------------------===// 8807 // Inline Assembly Support 8808 //===----------------------------------------------------------------------===// 8809 8810 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 8811 APInt &KnownZero, 8812 APInt &KnownOne, 8813 const SelectionDAG &DAG, 8814 unsigned Depth) const { 8815 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 8816 switch (Op.getOpcode()) { 8817 default: break; 8818 case PPCISD::LBRX: { 8819 // lhbrx is known to have the top bits cleared out. 8820 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 8821 KnownZero = 0xFFFF0000; 8822 break; 8823 } 8824 case ISD::INTRINSIC_WO_CHAIN: { 8825 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 8826 default: break; 8827 case Intrinsic::ppc_altivec_vcmpbfp_p: 8828 case Intrinsic::ppc_altivec_vcmpeqfp_p: 8829 case Intrinsic::ppc_altivec_vcmpequb_p: 8830 case Intrinsic::ppc_altivec_vcmpequh_p: 8831 case Intrinsic::ppc_altivec_vcmpequw_p: 8832 case Intrinsic::ppc_altivec_vcmpgefp_p: 8833 case Intrinsic::ppc_altivec_vcmpgtfp_p: 8834 case Intrinsic::ppc_altivec_vcmpgtsb_p: 8835 case Intrinsic::ppc_altivec_vcmpgtsh_p: 8836 case Intrinsic::ppc_altivec_vcmpgtsw_p: 8837 case Intrinsic::ppc_altivec_vcmpgtub_p: 8838 case Intrinsic::ppc_altivec_vcmpgtuh_p: 8839 case Intrinsic::ppc_altivec_vcmpgtuw_p: 8840 KnownZero = ~1U; // All bits but the low one are known to be zero. 8841 break; 8842 } 8843 } 8844 } 8845 } 8846 8847 8848 /// getConstraintType - Given a constraint, return the type of 8849 /// constraint it is for this target. 8850 PPCTargetLowering::ConstraintType 8851 PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 8852 if (Constraint.size() == 1) { 8853 switch (Constraint[0]) { 8854 default: break; 8855 case 'b': 8856 case 'r': 8857 case 'f': 8858 case 'v': 8859 case 'y': 8860 return C_RegisterClass; 8861 case 'Z': 8862 // FIXME: While Z does indicate a memory constraint, it specifically 8863 // indicates an r+r address (used in conjunction with the 'y' modifier 8864 // in the replacement string). Currently, we're forcing the base 8865 // register to be r0 in the asm printer (which is interpreted as zero) 8866 // and forming the complete address in the second register. This is 8867 // suboptimal. 8868 return C_Memory; 8869 } 8870 } else if (Constraint == "wc") { // individual CR bits. 8871 return C_RegisterClass; 8872 } else if (Constraint == "wa" || Constraint == "wd" || 8873 Constraint == "wf" || Constraint == "ws") { 8874 return C_RegisterClass; // VSX registers. 8875 } 8876 return TargetLowering::getConstraintType(Constraint); 8877 } 8878 8879 /// Examine constraint type and operand type and determine a weight value. 8880 /// This object must already have been set up with the operand type 8881 /// and the current alternative constraint selected. 8882 TargetLowering::ConstraintWeight 8883 PPCTargetLowering::getSingleConstraintMatchWeight( 8884 AsmOperandInfo &info, const char *constraint) const { 8885 ConstraintWeight weight = CW_Invalid; 8886 Value *CallOperandVal = info.CallOperandVal; 8887 // If we don't have a value, we can't do a match, 8888 // but allow it at the lowest weight. 8889 if (!CallOperandVal) 8890 return CW_Default; 8891 Type *type = CallOperandVal->getType(); 8892 8893 // Look at the constraint type. 8894 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 8895 return CW_Register; // an individual CR bit. 8896 else if ((StringRef(constraint) == "wa" || 8897 StringRef(constraint) == "wd" || 8898 StringRef(constraint) == "wf") && 8899 type->isVectorTy()) 8900 return CW_Register; 8901 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 8902 return CW_Register; 8903 8904 switch (*constraint) { 8905 default: 8906 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 8907 break; 8908 case 'b': 8909 if (type->isIntegerTy()) 8910 weight = CW_Register; 8911 break; 8912 case 'f': 8913 if (type->isFloatTy()) 8914 weight = CW_Register; 8915 break; 8916 case 'd': 8917 if (type->isDoubleTy()) 8918 weight = CW_Register; 8919 break; 8920 case 'v': 8921 if (type->isVectorTy()) 8922 weight = CW_Register; 8923 break; 8924 case 'y': 8925 weight = CW_Register; 8926 break; 8927 case 'Z': 8928 weight = CW_Memory; 8929 break; 8930 } 8931 return weight; 8932 } 8933 8934 std::pair<unsigned, const TargetRegisterClass*> 8935 PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 8936 MVT VT) const { 8937 if (Constraint.size() == 1) { 8938 // GCC RS6000 Constraint Letters 8939 switch (Constraint[0]) { 8940 case 'b': // R1-R31 8941 if (VT == MVT::i64 && Subtarget.isPPC64()) 8942 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 8943 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 8944 case 'r': // R0-R31 8945 if (VT == MVT::i64 && Subtarget.isPPC64()) 8946 return std::make_pair(0U, &PPC::G8RCRegClass); 8947 return std::make_pair(0U, &PPC::GPRCRegClass); 8948 case 'f': 8949 if (VT == MVT::f32 || VT == MVT::i32) 8950 return std::make_pair(0U, &PPC::F4RCRegClass); 8951 if (VT == MVT::f64 || VT == MVT::i64) 8952 return std::make_pair(0U, &PPC::F8RCRegClass); 8953 break; 8954 case 'v': 8955 return std::make_pair(0U, &PPC::VRRCRegClass); 8956 case 'y': // crrc 8957 return std::make_pair(0U, &PPC::CRRCRegClass); 8958 } 8959 } else if (Constraint == "wc") { // an individual CR bit. 8960 return std::make_pair(0U, &PPC::CRBITRCRegClass); 8961 } else if (Constraint == "wa" || Constraint == "wd" || 8962 Constraint == "wf") { 8963 return std::make_pair(0U, &PPC::VSRCRegClass); 8964 } else if (Constraint == "ws") { 8965 return std::make_pair(0U, &PPC::VSFRCRegClass); 8966 } 8967 8968 std::pair<unsigned, const TargetRegisterClass*> R = 8969 TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 8970 8971 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 8972 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 8973 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 8974 // register. 8975 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 8976 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 8977 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 8978 PPC::GPRCRegClass.contains(R.first)) { 8979 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 8980 return std::make_pair(TRI->getMatchingSuperReg(R.first, 8981 PPC::sub_32, &PPC::G8RCRegClass), 8982 &PPC::G8RCRegClass); 8983 } 8984 8985 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 8986 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 8987 R.first = PPC::CR0; 8988 R.second = &PPC::CRRCRegClass; 8989 } 8990 8991 return R; 8992 } 8993 8994 8995 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 8996 /// vector. If it is invalid, don't add anything to Ops. 8997 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 8998 std::string &Constraint, 8999 std::vector<SDValue>&Ops, 9000 SelectionDAG &DAG) const { 9001 SDValue Result; 9002 9003 // Only support length 1 constraints. 9004 if (Constraint.length() > 1) return; 9005 9006 char Letter = Constraint[0]; 9007 switch (Letter) { 9008 default: break; 9009 case 'I': 9010 case 'J': 9011 case 'K': 9012 case 'L': 9013 case 'M': 9014 case 'N': 9015 case 'O': 9016 case 'P': { 9017 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 9018 if (!CST) return; // Must be an immediate to match. 9019 int64_t Value = CST->getSExtValue(); 9020 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 9021 // numbers are printed as such. 9022 switch (Letter) { 9023 default: llvm_unreachable("Unknown constraint letter!"); 9024 case 'I': // "I" is a signed 16-bit constant. 9025 if (isInt<16>(Value)) 9026 Result = DAG.getTargetConstant(Value, TCVT); 9027 break; 9028 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 9029 if (isShiftedUInt<16, 16>(Value)) 9030 Result = DAG.getTargetConstant(Value, TCVT); 9031 break; 9032 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 9033 if (isShiftedInt<16, 16>(Value)) 9034 Result = DAG.getTargetConstant(Value, TCVT); 9035 break; 9036 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 9037 if (isUInt<16>(Value)) 9038 Result = DAG.getTargetConstant(Value, TCVT); 9039 break; 9040 case 'M': // "M" is a constant that is greater than 31. 9041 if (Value > 31) 9042 Result = DAG.getTargetConstant(Value, TCVT); 9043 break; 9044 case 'N': // "N" is a positive constant that is an exact power of two. 9045 if (Value > 0 && isPowerOf2_64(Value)) 9046 Result = DAG.getTargetConstant(Value, TCVT); 9047 break; 9048 case 'O': // "O" is the constant zero. 9049 if (Value == 0) 9050 Result = DAG.getTargetConstant(Value, TCVT); 9051 break; 9052 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 9053 if (isInt<16>(-Value)) 9054 Result = DAG.getTargetConstant(Value, TCVT); 9055 break; 9056 } 9057 break; 9058 } 9059 } 9060 9061 if (Result.getNode()) { 9062 Ops.push_back(Result); 9063 return; 9064 } 9065 9066 // Handle standard constraint letters. 9067 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 9068 } 9069 9070 // isLegalAddressingMode - Return true if the addressing mode represented 9071 // by AM is legal for this target, for a load/store of the specified type. 9072 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 9073 Type *Ty) const { 9074 // FIXME: PPC does not allow r+i addressing modes for vectors! 9075 9076 // PPC allows a sign-extended 16-bit immediate field. 9077 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 9078 return false; 9079 9080 // No global is ever allowed as a base. 9081 if (AM.BaseGV) 9082 return false; 9083 9084 // PPC only support r+r, 9085 switch (AM.Scale) { 9086 case 0: // "r+i" or just "i", depending on HasBaseReg. 9087 break; 9088 case 1: 9089 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 9090 return false; 9091 // Otherwise we have r+r or r+i. 9092 break; 9093 case 2: 9094 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 9095 return false; 9096 // Allow 2*r as r+r. 9097 break; 9098 default: 9099 // No other scales are supported. 9100 return false; 9101 } 9102 9103 return true; 9104 } 9105 9106 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 9107 SelectionDAG &DAG) const { 9108 MachineFunction &MF = DAG.getMachineFunction(); 9109 MachineFrameInfo *MFI = MF.getFrameInfo(); 9110 MFI->setReturnAddressIsTaken(true); 9111 9112 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 9113 return SDValue(); 9114 9115 SDLoc dl(Op); 9116 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9117 9118 // Make sure the function does not optimize away the store of the RA to 9119 // the stack. 9120 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 9121 FuncInfo->setLRStoreRequired(); 9122 bool isPPC64 = Subtarget.isPPC64(); 9123 bool isDarwinABI = Subtarget.isDarwinABI(); 9124 9125 if (Depth > 0) { 9126 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 9127 SDValue Offset = 9128 9129 DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), 9130 isPPC64? MVT::i64 : MVT::i32); 9131 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 9132 DAG.getNode(ISD::ADD, dl, getPointerTy(), 9133 FrameAddr, Offset), 9134 MachinePointerInfo(), false, false, false, 0); 9135 } 9136 9137 // Just load the return address off the stack. 9138 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 9139 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 9140 RetAddrFI, MachinePointerInfo(), false, false, false, 0); 9141 } 9142 9143 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 9144 SelectionDAG &DAG) const { 9145 SDLoc dl(Op); 9146 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9147 9148 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 9149 bool isPPC64 = PtrVT == MVT::i64; 9150 9151 MachineFunction &MF = DAG.getMachineFunction(); 9152 MachineFrameInfo *MFI = MF.getFrameInfo(); 9153 MFI->setFrameAddressIsTaken(true); 9154 9155 // Naked functions never have a frame pointer, and so we use r1. For all 9156 // other functions, this decision must be delayed until during PEI. 9157 unsigned FrameReg; 9158 if (MF.getFunction()->getAttributes().hasAttribute( 9159 AttributeSet::FunctionIndex, Attribute::Naked)) 9160 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 9161 else 9162 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 9163 9164 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 9165 PtrVT); 9166 while (Depth--) 9167 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 9168 FrameAddr, MachinePointerInfo(), false, false, 9169 false, 0); 9170 return FrameAddr; 9171 } 9172 9173 // FIXME? Maybe this could be a TableGen attribute on some registers and 9174 // this table could be generated automatically from RegInfo. 9175 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, 9176 EVT VT) const { 9177 bool isPPC64 = Subtarget.isPPC64(); 9178 bool isDarwinABI = Subtarget.isDarwinABI(); 9179 9180 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 9181 (!isPPC64 && VT != MVT::i32)) 9182 report_fatal_error("Invalid register global variable type"); 9183 9184 bool is64Bit = isPPC64 && VT == MVT::i64; 9185 unsigned Reg = StringSwitch<unsigned>(RegName) 9186 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 9187 .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2)) 9188 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 9189 (is64Bit ? PPC::X13 : PPC::R13)) 9190 .Default(0); 9191 9192 if (Reg) 9193 return Reg; 9194 report_fatal_error("Invalid register name global variable"); 9195 } 9196 9197 bool 9198 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 9199 // The PowerPC target isn't yet aware of offsets. 9200 return false; 9201 } 9202 9203 /// getOptimalMemOpType - Returns the target specific optimal type for load 9204 /// and store operations as a result of memset, memcpy, and memmove 9205 /// lowering. If DstAlign is zero that means it's safe to destination 9206 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 9207 /// means there isn't a need to check it against alignment requirement, 9208 /// probably because the source does not need to be loaded. If 'IsMemset' is 9209 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 9210 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 9211 /// source is constant so it does not need to be loaded. 9212 /// It returns EVT::Other if the type should be determined using generic 9213 /// target-independent logic. 9214 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 9215 unsigned DstAlign, unsigned SrcAlign, 9216 bool IsMemset, bool ZeroMemset, 9217 bool MemcpyStrSrc, 9218 MachineFunction &MF) const { 9219 if (Subtarget.isPPC64()) { 9220 return MVT::i64; 9221 } else { 9222 return MVT::i32; 9223 } 9224 } 9225 9226 /// \brief Returns true if it is beneficial to convert a load of a constant 9227 /// to just the constant itself. 9228 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 9229 Type *Ty) const { 9230 assert(Ty->isIntegerTy()); 9231 9232 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 9233 if (BitSize == 0 || BitSize > 64) 9234 return false; 9235 return true; 9236 } 9237 9238 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 9239 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 9240 return false; 9241 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 9242 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 9243 return NumBits1 == 64 && NumBits2 == 32; 9244 } 9245 9246 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 9247 if (!VT1.isInteger() || !VT2.isInteger()) 9248 return false; 9249 unsigned NumBits1 = VT1.getSizeInBits(); 9250 unsigned NumBits2 = VT2.getSizeInBits(); 9251 return NumBits1 == 64 && NumBits2 == 32; 9252 } 9253 9254 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 9255 return isInt<16>(Imm) || isUInt<16>(Imm); 9256 } 9257 9258 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 9259 return isInt<16>(Imm) || isUInt<16>(Imm); 9260 } 9261 9262 bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, 9263 unsigned, 9264 bool *Fast) const { 9265 if (DisablePPCUnaligned) 9266 return false; 9267 9268 // PowerPC supports unaligned memory access for simple non-vector types. 9269 // Although accessing unaligned addresses is not as efficient as accessing 9270 // aligned addresses, it is generally more efficient than manual expansion, 9271 // and generally only traps for software emulation when crossing page 9272 // boundaries. 9273 9274 if (!VT.isSimple()) 9275 return false; 9276 9277 if (VT.getSimpleVT().isVector()) { 9278 if (Subtarget.hasVSX()) { 9279 if (VT != MVT::v2f64 && VT != MVT::v2i64) 9280 return false; 9281 } else { 9282 return false; 9283 } 9284 } 9285 9286 if (VT == MVT::ppcf128) 9287 return false; 9288 9289 if (Fast) 9290 *Fast = true; 9291 9292 return true; 9293 } 9294 9295 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 9296 VT = VT.getScalarType(); 9297 9298 if (!VT.isSimple()) 9299 return false; 9300 9301 switch (VT.getSimpleVT().SimpleTy) { 9302 case MVT::f32: 9303 case MVT::f64: 9304 return true; 9305 default: 9306 break; 9307 } 9308 9309 return false; 9310 } 9311 9312 bool 9313 PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 9314 EVT VT , unsigned DefinedValues) const { 9315 if (VT == MVT::v2i64) 9316 return false; 9317 9318 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 9319 } 9320 9321 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 9322 if (DisableILPPref || Subtarget.enableMachineScheduler()) 9323 return TargetLowering::getSchedulingPreference(N); 9324 9325 return Sched::ILP; 9326 } 9327 9328 // Create a fast isel object. 9329 FastISel * 9330 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 9331 const TargetLibraryInfo *LibInfo) const { 9332 return PPC::createFastISel(FuncInfo, LibInfo); 9333 } 9334