1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PPCISelLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCISelLowering.h" 15 #include "PPCMachineFunctionInfo.h" 16 #include "PPCPerfectShuffle.h" 17 #include "PPCTargetMachine.h" 18 #include "MCTargetDesc/PPCPredicates.h" 19 #include "llvm/CallingConv.h" 20 #include "llvm/Constants.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/Intrinsics.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/CodeGen/CallingConvLower.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/SelectionDAG.h" 31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/MathExtras.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/Target/TargetOptions.h" 37 using namespace llvm; 38 39 static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 40 CCValAssign::LocInfo &LocInfo, 41 ISD::ArgFlagsTy &ArgFlags, 42 CCState &State); 43 static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 44 MVT &LocVT, 45 CCValAssign::LocInfo &LocInfo, 46 ISD::ArgFlagsTy &ArgFlags, 47 CCState &State); 48 static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 49 MVT &LocVT, 50 CCValAssign::LocInfo &LocInfo, 51 ISD::ArgFlagsTy &ArgFlags, 52 CCState &State); 53 54 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 55 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 56 57 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 58 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 59 60 static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { 61 if (TM.getSubtargetImpl()->isDarwin()) 62 return new TargetLoweringObjectFileMachO(); 63 64 return new TargetLoweringObjectFileELF(); 65 } 66 67 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 68 : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { 69 const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); 70 71 setPow2DivIsCheap(); 72 73 // Use _setjmp/_longjmp instead of setjmp/longjmp. 74 setUseUnderscoreSetJmp(true); 75 setUseUnderscoreLongJmp(true); 76 77 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 78 // arguments are at least 4/8 bytes aligned. 79 bool isPPC64 = Subtarget->isPPC64(); 80 setMinStackArgumentAlignment(isPPC64 ? 8:4); 81 82 // Set up the register classes. 83 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 84 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 85 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 86 87 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 88 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 89 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); 90 91 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 92 93 // PowerPC has pre-inc load and store's. 94 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 95 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 96 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 97 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 98 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 99 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 100 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 101 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 102 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 103 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 104 105 // This is used in the ppcf128->int sequence. Note it has different semantics 106 // from FP_ROUND: that rounds to nearest, this rounds to zero. 107 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 108 109 // We do not currently implement these libm ops for PowerPC. 110 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 111 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 112 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 113 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 114 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 115 116 // PowerPC has no SREM/UREM instructions 117 setOperationAction(ISD::SREM, MVT::i32, Expand); 118 setOperationAction(ISD::UREM, MVT::i32, Expand); 119 setOperationAction(ISD::SREM, MVT::i64, Expand); 120 setOperationAction(ISD::UREM, MVT::i64, Expand); 121 122 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 123 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 124 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 125 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 126 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 127 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 128 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 129 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 130 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 131 132 // We don't support sin/cos/sqrt/fmod/pow 133 setOperationAction(ISD::FSIN , MVT::f64, Expand); 134 setOperationAction(ISD::FCOS , MVT::f64, Expand); 135 setOperationAction(ISD::FREM , MVT::f64, Expand); 136 setOperationAction(ISD::FPOW , MVT::f64, Expand); 137 setOperationAction(ISD::FMA , MVT::f64, Legal); 138 setOperationAction(ISD::FSIN , MVT::f32, Expand); 139 setOperationAction(ISD::FCOS , MVT::f32, Expand); 140 setOperationAction(ISD::FREM , MVT::f32, Expand); 141 setOperationAction(ISD::FPOW , MVT::f32, Expand); 142 setOperationAction(ISD::FMA , MVT::f32, Legal); 143 144 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 145 146 // If we're enabling GP optimizations, use hardware square root 147 if (!Subtarget->hasFSQRT()) { 148 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 149 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 150 } 151 152 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 153 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 154 155 // PowerPC does not have BSWAP, CTPOP or CTTZ 156 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 157 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 158 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 159 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 160 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 161 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 162 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 163 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 164 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 165 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 166 167 // PowerPC does not have ROTR 168 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 169 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 170 171 // PowerPC does not have Select 172 setOperationAction(ISD::SELECT, MVT::i32, Expand); 173 setOperationAction(ISD::SELECT, MVT::i64, Expand); 174 setOperationAction(ISD::SELECT, MVT::f32, Expand); 175 setOperationAction(ISD::SELECT, MVT::f64, Expand); 176 177 // PowerPC wants to turn select_cc of FP into fsel when possible. 178 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 179 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 180 181 // PowerPC wants to optimize integer setcc a bit 182 setOperationAction(ISD::SETCC, MVT::i32, Custom); 183 184 // PowerPC does not have BRCOND which requires SetCC 185 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 186 187 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 188 189 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 190 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 191 192 // PowerPC does not have [U|S]INT_TO_FP 193 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 194 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 195 196 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 197 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 198 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 199 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 200 201 // We cannot sextinreg(i1). Expand to shifts. 202 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 203 204 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 205 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 206 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 207 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 208 209 210 // We want to legalize GlobalAddress and ConstantPool nodes into the 211 // appropriate instructions to materialize the address. 212 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 213 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 214 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 215 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 216 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 217 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 218 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 219 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 220 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 221 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 222 223 // TRAP is legal. 224 setOperationAction(ISD::TRAP, MVT::Other, Legal); 225 226 // TRAMPOLINE is custom lowered. 227 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 228 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 229 230 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 231 setOperationAction(ISD::VASTART , MVT::Other, Custom); 232 233 if (Subtarget->isSVR4ABI()) { 234 if (isPPC64) { 235 // VAARG always uses double-word chunks, so promote anything smaller. 236 setOperationAction(ISD::VAARG, MVT::i1, Promote); 237 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 238 setOperationAction(ISD::VAARG, MVT::i8, Promote); 239 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 240 setOperationAction(ISD::VAARG, MVT::i16, Promote); 241 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 242 setOperationAction(ISD::VAARG, MVT::i32, Promote); 243 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 244 setOperationAction(ISD::VAARG, MVT::Other, Expand); 245 } else { 246 // VAARG is custom lowered with the 32-bit SVR4 ABI. 247 setOperationAction(ISD::VAARG, MVT::Other, Custom); 248 setOperationAction(ISD::VAARG, MVT::i64, Custom); 249 } 250 } else 251 setOperationAction(ISD::VAARG, MVT::Other, Expand); 252 253 // Use the default implementation. 254 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 255 setOperationAction(ISD::VAEND , MVT::Other, Expand); 256 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 257 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 258 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 259 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 260 261 // We want to custom lower some of our intrinsics. 262 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 263 264 // Comparisons that require checking two conditions. 265 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 266 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 267 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 268 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 269 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 270 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 271 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 272 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 273 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 274 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 275 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 276 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 277 278 if (Subtarget->has64BitSupport()) { 279 // They also have instructions for converting between i64 and fp. 280 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 281 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 282 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 283 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 284 // This is just the low 32 bits of a (signed) fp->i64 conversion. 285 // We cannot do this with Promote because i64 is not a legal type. 286 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 287 288 // FIXME: disable this lowered code. This generates 64-bit register values, 289 // and we don't model the fact that the top part is clobbered by calls. We 290 // need to flag these together so that the value isn't live across a call. 291 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 292 } else { 293 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 294 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 295 } 296 297 if (Subtarget->use64BitRegs()) { 298 // 64-bit PowerPC implementations can support i64 types directly 299 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 300 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 301 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 302 // 64-bit PowerPC wants to expand i128 shifts itself. 303 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 304 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 305 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 306 } else { 307 // 32-bit PowerPC wants to expand i64 shifts itself. 308 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 309 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 310 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 311 } 312 313 if (Subtarget->hasAltivec()) { 314 // First set operation action for all vector types to expand. Then we 315 // will selectively turn on ones that can be effectively codegen'd. 316 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 317 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 318 MVT::SimpleValueType VT = (MVT::SimpleValueType)i; 319 320 // add/sub are legal for all supported vector VT's. 321 setOperationAction(ISD::ADD , VT, Legal); 322 setOperationAction(ISD::SUB , VT, Legal); 323 324 // We promote all shuffles to v16i8. 325 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 326 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 327 328 // We promote all non-typed operations to v4i32. 329 setOperationAction(ISD::AND , VT, Promote); 330 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 331 setOperationAction(ISD::OR , VT, Promote); 332 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 333 setOperationAction(ISD::XOR , VT, Promote); 334 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 335 setOperationAction(ISD::LOAD , VT, Promote); 336 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 337 setOperationAction(ISD::SELECT, VT, Promote); 338 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 339 setOperationAction(ISD::STORE, VT, Promote); 340 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 341 342 // No other operations are legal. 343 setOperationAction(ISD::MUL , VT, Expand); 344 setOperationAction(ISD::SDIV, VT, Expand); 345 setOperationAction(ISD::SREM, VT, Expand); 346 setOperationAction(ISD::UDIV, VT, Expand); 347 setOperationAction(ISD::UREM, VT, Expand); 348 setOperationAction(ISD::FDIV, VT, Expand); 349 setOperationAction(ISD::FNEG, VT, Expand); 350 setOperationAction(ISD::FSQRT, VT, Expand); 351 setOperationAction(ISD::FLOG, VT, Expand); 352 setOperationAction(ISD::FLOG10, VT, Expand); 353 setOperationAction(ISD::FLOG2, VT, Expand); 354 setOperationAction(ISD::FEXP, VT, Expand); 355 setOperationAction(ISD::FEXP2, VT, Expand); 356 setOperationAction(ISD::FSIN, VT, Expand); 357 setOperationAction(ISD::FCOS, VT, Expand); 358 setOperationAction(ISD::FABS, VT, Expand); 359 setOperationAction(ISD::FPOWI, VT, Expand); 360 setOperationAction(ISD::FFLOOR, VT, Expand); 361 setOperationAction(ISD::FCEIL, VT, Expand); 362 setOperationAction(ISD::FTRUNC, VT, Expand); 363 setOperationAction(ISD::FRINT, VT, Expand); 364 setOperationAction(ISD::FNEARBYINT, VT, Expand); 365 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 366 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 367 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 368 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 369 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 370 setOperationAction(ISD::UDIVREM, VT, Expand); 371 setOperationAction(ISD::SDIVREM, VT, Expand); 372 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 373 setOperationAction(ISD::FPOW, VT, Expand); 374 setOperationAction(ISD::CTPOP, VT, Expand); 375 setOperationAction(ISD::CTLZ, VT, Expand); 376 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 377 setOperationAction(ISD::CTTZ, VT, Expand); 378 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 379 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 380 381 for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 382 j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { 383 MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j; 384 setTruncStoreAction(VT, InnerVT, Expand); 385 } 386 setLoadExtAction(ISD::SEXTLOAD, VT, Expand); 387 setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); 388 setLoadExtAction(ISD::EXTLOAD, VT, Expand); 389 } 390 391 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 392 // with merges, splats, etc. 393 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 394 395 setOperationAction(ISD::AND , MVT::v4i32, Legal); 396 setOperationAction(ISD::OR , MVT::v4i32, Legal); 397 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 398 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 399 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 400 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 401 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 402 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 403 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 404 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 405 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 406 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 407 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 408 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 409 410 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 411 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 412 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 413 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 414 415 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 416 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 417 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 418 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 419 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 420 421 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 422 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 423 424 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 425 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 426 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 427 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 428 429 // Altivec does not contain unordered floating-point compare instructions 430 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 431 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 432 setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); 433 setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); 434 setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); 435 setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); 436 } 437 438 if (Subtarget->has64BitSupport()) { 439 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 440 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); 441 } 442 443 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); 444 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); 445 446 setBooleanContents(ZeroOrOneBooleanContent); 447 setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? 448 449 if (isPPC64) { 450 setStackPointerRegisterToSaveRestore(PPC::X1); 451 setExceptionPointerRegister(PPC::X3); 452 setExceptionSelectorRegister(PPC::X4); 453 } else { 454 setStackPointerRegisterToSaveRestore(PPC::R1); 455 setExceptionPointerRegister(PPC::R3); 456 setExceptionSelectorRegister(PPC::R4); 457 } 458 459 // We have target-specific dag combine patterns for the following nodes: 460 setTargetDAGCombine(ISD::SINT_TO_FP); 461 setTargetDAGCombine(ISD::STORE); 462 setTargetDAGCombine(ISD::BR_CC); 463 setTargetDAGCombine(ISD::BSWAP); 464 465 // Darwin long double math library functions have $LDBL128 appended. 466 if (Subtarget->isDarwin()) { 467 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 468 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 469 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 470 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 471 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 472 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 473 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 474 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 475 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 476 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 477 } 478 479 setMinFunctionAlignment(2); 480 if (PPCSubTarget.isDarwin()) 481 setPrefFunctionAlignment(4); 482 483 if (isPPC64 && Subtarget->isJITCodeModel()) 484 // Temporary workaround for the inability of PPC64 JIT to handle jump 485 // tables. 486 setSupportJumpTables(false); 487 488 setInsertFencesForAtomic(true); 489 490 setSchedulingPreference(Sched::Hybrid); 491 492 computeRegisterProperties(); 493 494 // The Freescale cores does better with aggressive inlining of memcpy and 495 // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). 496 if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || 497 Subtarget->getDarwinDirective() == PPC::DIR_E5500) { 498 maxStoresPerMemset = 32; 499 maxStoresPerMemsetOptSize = 16; 500 maxStoresPerMemcpy = 32; 501 maxStoresPerMemcpyOptSize = 8; 502 maxStoresPerMemmove = 32; 503 maxStoresPerMemmoveOptSize = 8; 504 505 setPrefFunctionAlignment(4); 506 benefitFromCodePlacementOpt = true; 507 } 508 } 509 510 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 511 /// function arguments in the caller parameter area. 512 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { 513 const TargetMachine &TM = getTargetMachine(); 514 // Darwin passes everything on 4 byte boundary. 515 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) 516 return 4; 517 518 // 16byte and wider vectors are passed on 16byte boundary. 519 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) 520 if (VTy->getBitWidth() >= 128) 521 return 16; 522 523 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 524 if (PPCSubTarget.isPPC64()) 525 return 8; 526 527 return 4; 528 } 529 530 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 531 switch (Opcode) { 532 default: return 0; 533 case PPCISD::FSEL: return "PPCISD::FSEL"; 534 case PPCISD::FCFID: return "PPCISD::FCFID"; 535 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 536 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 537 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 538 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 539 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 540 case PPCISD::VPERM: return "PPCISD::VPERM"; 541 case PPCISD::Hi: return "PPCISD::Hi"; 542 case PPCISD::Lo: return "PPCISD::Lo"; 543 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 544 case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE"; 545 case PPCISD::LOAD: return "PPCISD::LOAD"; 546 case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; 547 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 548 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 549 case PPCISD::SRL: return "PPCISD::SRL"; 550 case PPCISD::SRA: return "PPCISD::SRA"; 551 case PPCISD::SHL: return "PPCISD::SHL"; 552 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 553 case PPCISD::STD_32: return "PPCISD::STD_32"; 554 case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; 555 case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4"; 556 case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; 557 case PPCISD::NOP: return "PPCISD::NOP"; 558 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 559 case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; 560 case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; 561 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 562 case PPCISD::MFCR: return "PPCISD::MFCR"; 563 case PPCISD::VCMP: return "PPCISD::VCMP"; 564 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 565 case PPCISD::LBRX: return "PPCISD::LBRX"; 566 case PPCISD::STBRX: return "PPCISD::STBRX"; 567 case PPCISD::LARX: return "PPCISD::LARX"; 568 case PPCISD::STCX: return "PPCISD::STCX"; 569 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 570 case PPCISD::MFFS: return "PPCISD::MFFS"; 571 case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; 572 case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; 573 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 574 case PPCISD::MTFSF: return "PPCISD::MTFSF"; 575 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 576 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 577 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 578 case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; 579 case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; 580 case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; 581 } 582 } 583 584 EVT PPCTargetLowering::getSetCCResultType(EVT VT) const { 585 if (!VT.isVector()) 586 return MVT::i32; 587 return VT.changeVectorElementTypeToInteger(); 588 } 589 590 //===----------------------------------------------------------------------===// 591 // Node matching predicates, for use by the tblgen matching code. 592 //===----------------------------------------------------------------------===// 593 594 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 595 static bool isFloatingPointZero(SDValue Op) { 596 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 597 return CFP->getValueAPF().isZero(); 598 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 599 // Maybe this has already been legalized into the constant pool? 600 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 601 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 602 return CFP->getValueAPF().isZero(); 603 } 604 return false; 605 } 606 607 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 608 /// true if Op is undef or if it matches the specified value. 609 static bool isConstantOrUndef(int Op, int Val) { 610 return Op < 0 || Op == Val; 611 } 612 613 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 614 /// VPKUHUM instruction. 615 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 616 if (!isUnary) { 617 for (unsigned i = 0; i != 16; ++i) 618 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 619 return false; 620 } else { 621 for (unsigned i = 0; i != 8; ++i) 622 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || 623 !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) 624 return false; 625 } 626 return true; 627 } 628 629 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 630 /// VPKUWUM instruction. 631 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 632 if (!isUnary) { 633 for (unsigned i = 0; i != 16; i += 2) 634 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 635 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 636 return false; 637 } else { 638 for (unsigned i = 0; i != 8; i += 2) 639 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 640 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || 641 !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || 642 !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) 643 return false; 644 } 645 return true; 646 } 647 648 /// isVMerge - Common function, used to match vmrg* shuffles. 649 /// 650 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 651 unsigned LHSStart, unsigned RHSStart) { 652 assert(N->getValueType(0) == MVT::v16i8 && 653 "PPC only supports shuffles by bytes!"); 654 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 655 "Unsupported merge size!"); 656 657 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 658 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 659 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 660 LHSStart+j+i*UnitSize) || 661 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 662 RHSStart+j+i*UnitSize)) 663 return false; 664 } 665 return true; 666 } 667 668 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 669 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 670 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 671 bool isUnary) { 672 if (!isUnary) 673 return isVMerge(N, UnitSize, 8, 24); 674 return isVMerge(N, UnitSize, 8, 8); 675 } 676 677 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 678 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 679 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 680 bool isUnary) { 681 if (!isUnary) 682 return isVMerge(N, UnitSize, 0, 16); 683 return isVMerge(N, UnitSize, 0, 0); 684 } 685 686 687 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 688 /// amount, otherwise return -1. 689 int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 690 assert(N->getValueType(0) == MVT::v16i8 && 691 "PPC only supports shuffles by bytes!"); 692 693 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 694 695 // Find the first non-undef value in the shuffle mask. 696 unsigned i; 697 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 698 /*search*/; 699 700 if (i == 16) return -1; // all undef. 701 702 // Otherwise, check to see if the rest of the elements are consecutively 703 // numbered from this value. 704 unsigned ShiftAmt = SVOp->getMaskElt(i); 705 if (ShiftAmt < i) return -1; 706 ShiftAmt -= i; 707 708 if (!isUnary) { 709 // Check the rest of the elements to see if they are consecutive. 710 for (++i; i != 16; ++i) 711 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 712 return -1; 713 } else { 714 // Check the rest of the elements to see if they are consecutive. 715 for (++i; i != 16; ++i) 716 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 717 return -1; 718 } 719 return ShiftAmt; 720 } 721 722 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 723 /// specifies a splat of a single element that is suitable for input to 724 /// VSPLTB/VSPLTH/VSPLTW. 725 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 726 assert(N->getValueType(0) == MVT::v16i8 && 727 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 728 729 // This is a splat operation if each element of the permute is the same, and 730 // if the value doesn't reference the second vector. 731 unsigned ElementBase = N->getMaskElt(0); 732 733 // FIXME: Handle UNDEF elements too! 734 if (ElementBase >= 16) 735 return false; 736 737 // Check that the indices are consecutive, in the case of a multi-byte element 738 // splatted with a v16i8 mask. 739 for (unsigned i = 1; i != EltSize; ++i) 740 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 741 return false; 742 743 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 744 if (N->getMaskElt(i) < 0) continue; 745 for (unsigned j = 0; j != EltSize; ++j) 746 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 747 return false; 748 } 749 return true; 750 } 751 752 /// isAllNegativeZeroVector - Returns true if all elements of build_vector 753 /// are -0.0. 754 bool PPC::isAllNegativeZeroVector(SDNode *N) { 755 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); 756 757 APInt APVal, APUndef; 758 unsigned BitSize; 759 bool HasAnyUndefs; 760 761 if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) 762 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 763 return CFP->getValueAPF().isNegZero(); 764 765 return false; 766 } 767 768 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 769 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 770 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 771 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 772 assert(isSplatShuffleMask(SVOp, EltSize)); 773 return SVOp->getMaskElt(0) / EltSize; 774 } 775 776 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 777 /// by using a vspltis[bhw] instruction of the specified element size, return 778 /// the constant being splatted. The ByteSize field indicates the number of 779 /// bytes of each element [124] -> [bhw]. 780 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 781 SDValue OpVal(0, 0); 782 783 // If ByteSize of the splat is bigger than the element size of the 784 // build_vector, then we have a case where we are checking for a splat where 785 // multiple elements of the buildvector are folded together into a single 786 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 787 unsigned EltSize = 16/N->getNumOperands(); 788 if (EltSize < ByteSize) { 789 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 790 SDValue UniquedVals[4]; 791 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 792 793 // See if all of the elements in the buildvector agree across. 794 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 795 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 796 // If the element isn't a constant, bail fully out. 797 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 798 799 800 if (UniquedVals[i&(Multiple-1)].getNode() == 0) 801 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 802 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 803 return SDValue(); // no match. 804 } 805 806 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 807 // either constant or undef values that are identical for each chunk. See 808 // if these chunks can form into a larger vspltis*. 809 810 // Check to see if all of the leading entries are either 0 or -1. If 811 // neither, then this won't fit into the immediate field. 812 bool LeadingZero = true; 813 bool LeadingOnes = true; 814 for (unsigned i = 0; i != Multiple-1; ++i) { 815 if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. 816 817 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 818 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 819 } 820 // Finally, check the least significant entry. 821 if (LeadingZero) { 822 if (UniquedVals[Multiple-1].getNode() == 0) 823 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 824 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 825 if (Val < 16) 826 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 827 } 828 if (LeadingOnes) { 829 if (UniquedVals[Multiple-1].getNode() == 0) 830 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 831 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 832 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 833 return DAG.getTargetConstant(Val, MVT::i32); 834 } 835 836 return SDValue(); 837 } 838 839 // Check to see if this buildvec has a single non-undef value in its elements. 840 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 841 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 842 if (OpVal.getNode() == 0) 843 OpVal = N->getOperand(i); 844 else if (OpVal != N->getOperand(i)) 845 return SDValue(); 846 } 847 848 if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. 849 850 unsigned ValSizeInBytes = EltSize; 851 uint64_t Value = 0; 852 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 853 Value = CN->getZExtValue(); 854 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 855 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 856 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 857 } 858 859 // If the splat value is larger than the element value, then we can never do 860 // this splat. The only case that we could fit the replicated bits into our 861 // immediate field for would be zero, and we prefer to use vxor for it. 862 if (ValSizeInBytes < ByteSize) return SDValue(); 863 864 // If the element value is larger than the splat value, cut it in half and 865 // check to see if the two halves are equal. Continue doing this until we 866 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 867 while (ValSizeInBytes > ByteSize) { 868 ValSizeInBytes >>= 1; 869 870 // If the top half equals the bottom half, we're still ok. 871 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 872 (Value & ((1 << (8*ValSizeInBytes))-1))) 873 return SDValue(); 874 } 875 876 // Properly sign extend the value. 877 int MaskVal = SignExtend32(Value, ByteSize * 8); 878 879 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 880 if (MaskVal == 0) return SDValue(); 881 882 // Finally, if this value fits in a 5 bit sext field, return it 883 if (SignExtend32<5>(MaskVal) == MaskVal) 884 return DAG.getTargetConstant(MaskVal, MVT::i32); 885 return SDValue(); 886 } 887 888 //===----------------------------------------------------------------------===// 889 // Addressing Mode Selection 890 //===----------------------------------------------------------------------===// 891 892 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 893 /// or 64-bit immediate, and if the value can be accurately represented as a 894 /// sign extension from a 16-bit value. If so, this returns true and the 895 /// immediate. 896 static bool isIntS16Immediate(SDNode *N, short &Imm) { 897 if (N->getOpcode() != ISD::Constant) 898 return false; 899 900 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 901 if (N->getValueType(0) == MVT::i32) 902 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 903 else 904 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 905 } 906 static bool isIntS16Immediate(SDValue Op, short &Imm) { 907 return isIntS16Immediate(Op.getNode(), Imm); 908 } 909 910 911 /// SelectAddressRegReg - Given the specified addressed, check to see if it 912 /// can be represented as an indexed [r+r] operation. Returns false if it 913 /// can be more efficiently represented with [r+imm]. 914 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 915 SDValue &Index, 916 SelectionDAG &DAG) const { 917 short imm = 0; 918 if (N.getOpcode() == ISD::ADD) { 919 if (isIntS16Immediate(N.getOperand(1), imm)) 920 return false; // r+i 921 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 922 return false; // r+i 923 924 Base = N.getOperand(0); 925 Index = N.getOperand(1); 926 return true; 927 } else if (N.getOpcode() == ISD::OR) { 928 if (isIntS16Immediate(N.getOperand(1), imm)) 929 return false; // r+i can fold it if we can. 930 931 // If this is an or of disjoint bitfields, we can codegen this as an add 932 // (for better address arithmetic) if the LHS and RHS of the OR are provably 933 // disjoint. 934 APInt LHSKnownZero, LHSKnownOne; 935 APInt RHSKnownZero, RHSKnownOne; 936 DAG.ComputeMaskedBits(N.getOperand(0), 937 LHSKnownZero, LHSKnownOne); 938 939 if (LHSKnownZero.getBoolValue()) { 940 DAG.ComputeMaskedBits(N.getOperand(1), 941 RHSKnownZero, RHSKnownOne); 942 // If all of the bits are known zero on the LHS or RHS, the add won't 943 // carry. 944 if (~(LHSKnownZero | RHSKnownZero) == 0) { 945 Base = N.getOperand(0); 946 Index = N.getOperand(1); 947 return true; 948 } 949 } 950 } 951 952 return false; 953 } 954 955 /// Returns true if the address N can be represented by a base register plus 956 /// a signed 16-bit displacement [r+imm], and if it is not better 957 /// represented as reg+reg. 958 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 959 SDValue &Base, 960 SelectionDAG &DAG) const { 961 // FIXME dl should come from parent load or store, not from address 962 DebugLoc dl = N.getDebugLoc(); 963 // If this can be more profitably realized as r+r, fail. 964 if (SelectAddressRegReg(N, Disp, Base, DAG)) 965 return false; 966 967 if (N.getOpcode() == ISD::ADD) { 968 short imm = 0; 969 if (isIntS16Immediate(N.getOperand(1), imm)) { 970 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 971 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 972 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 973 } else { 974 Base = N.getOperand(0); 975 } 976 return true; // [r+i] 977 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 978 // Match LOAD (ADD (X, Lo(G))). 979 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 980 && "Cannot handle constant offsets yet!"); 981 Disp = N.getOperand(1).getOperand(0); // The global address. 982 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 983 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 984 Disp.getOpcode() == ISD::TargetConstantPool || 985 Disp.getOpcode() == ISD::TargetJumpTable); 986 Base = N.getOperand(0); 987 return true; // [&g+r] 988 } 989 } else if (N.getOpcode() == ISD::OR) { 990 short imm = 0; 991 if (isIntS16Immediate(N.getOperand(1), imm)) { 992 // If this is an or of disjoint bitfields, we can codegen this as an add 993 // (for better address arithmetic) if the LHS and RHS of the OR are 994 // provably disjoint. 995 APInt LHSKnownZero, LHSKnownOne; 996 DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 997 998 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 999 // If all of the bits are known zero on the LHS or RHS, the add won't 1000 // carry. 1001 Base = N.getOperand(0); 1002 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 1003 return true; 1004 } 1005 } 1006 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1007 // Loading from a constant address. 1008 1009 // If this address fits entirely in a 16-bit sext immediate field, codegen 1010 // this as "d, 0" 1011 short Imm; 1012 if (isIntS16Immediate(CN, Imm)) { 1013 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 1014 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, 1015 CN->getValueType(0)); 1016 return true; 1017 } 1018 1019 // Handle 32-bit sext immediates with LIS + addr mode. 1020 if (CN->getValueType(0) == MVT::i32 || 1021 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { 1022 int Addr = (int)CN->getZExtValue(); 1023 1024 // Otherwise, break this down into an LIS + disp. 1025 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 1026 1027 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 1028 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1029 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1030 return true; 1031 } 1032 } 1033 1034 Disp = DAG.getTargetConstant(0, getPointerTy()); 1035 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 1036 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1037 else 1038 Base = N; 1039 return true; // [r+0] 1040 } 1041 1042 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1043 /// represented as an indexed [r+r] operation. 1044 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1045 SDValue &Index, 1046 SelectionDAG &DAG) const { 1047 // Check to see if we can easily represent this as an [r+r] address. This 1048 // will fail if it thinks that the address is more profitably represented as 1049 // reg+imm, e.g. where imm = 0. 1050 if (SelectAddressRegReg(N, Base, Index, DAG)) 1051 return true; 1052 1053 // If the operand is an addition, always emit this as [r+r], since this is 1054 // better (for code size, and execution, as the memop does the add for free) 1055 // than emitting an explicit add. 1056 if (N.getOpcode() == ISD::ADD) { 1057 Base = N.getOperand(0); 1058 Index = N.getOperand(1); 1059 return true; 1060 } 1061 1062 // Otherwise, do it the hard way, using R0 as the base register. 1063 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, 1064 N.getValueType()); 1065 Index = N; 1066 return true; 1067 } 1068 1069 /// SelectAddressRegImmShift - Returns true if the address N can be 1070 /// represented by a base register plus a signed 14-bit displacement 1071 /// [r+imm*4]. Suitable for use by STD and friends. 1072 bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, 1073 SDValue &Base, 1074 SelectionDAG &DAG) const { 1075 // FIXME dl should come from the parent load or store, not the address 1076 DebugLoc dl = N.getDebugLoc(); 1077 // If this can be more profitably realized as r+r, fail. 1078 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1079 return false; 1080 1081 if (N.getOpcode() == ISD::ADD) { 1082 short imm = 0; 1083 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 1084 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 1085 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1086 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1087 } else { 1088 Base = N.getOperand(0); 1089 } 1090 return true; // [r+i] 1091 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1092 // Match LOAD (ADD (X, Lo(G))). 1093 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1094 && "Cannot handle constant offsets yet!"); 1095 Disp = N.getOperand(1).getOperand(0); // The global address. 1096 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1097 Disp.getOpcode() == ISD::TargetConstantPool || 1098 Disp.getOpcode() == ISD::TargetJumpTable); 1099 Base = N.getOperand(0); 1100 return true; // [&g+r] 1101 } 1102 } else if (N.getOpcode() == ISD::OR) { 1103 short imm = 0; 1104 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 1105 // If this is an or of disjoint bitfields, we can codegen this as an add 1106 // (for better address arithmetic) if the LHS and RHS of the OR are 1107 // provably disjoint. 1108 APInt LHSKnownZero, LHSKnownOne; 1109 DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1110 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1111 // If all of the bits are known zero on the LHS or RHS, the add won't 1112 // carry. 1113 Base = N.getOperand(0); 1114 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 1115 return true; 1116 } 1117 } 1118 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1119 // Loading from a constant address. Verify low two bits are clear. 1120 if ((CN->getZExtValue() & 3) == 0) { 1121 // If this address fits entirely in a 14-bit sext immediate field, codegen 1122 // this as "d, 0" 1123 short Imm; 1124 if (isIntS16Immediate(CN, Imm)) { 1125 Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); 1126 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, 1127 CN->getValueType(0)); 1128 return true; 1129 } 1130 1131 // Fold the low-part of 32-bit absolute addresses into addr mode. 1132 if (CN->getValueType(0) == MVT::i32 || 1133 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { 1134 int Addr = (int)CN->getZExtValue(); 1135 1136 // Otherwise, break this down into an LIS + disp. 1137 Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); 1138 Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); 1139 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1140 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0); 1141 return true; 1142 } 1143 } 1144 } 1145 1146 Disp = DAG.getTargetConstant(0, getPointerTy()); 1147 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 1148 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1149 else 1150 Base = N; 1151 return true; // [r+0] 1152 } 1153 1154 1155 /// getPreIndexedAddressParts - returns true by value, base pointer and 1156 /// offset pointer and addressing mode by reference if the node's address 1157 /// can be legally represented as pre-indexed load / store address. 1158 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1159 SDValue &Offset, 1160 ISD::MemIndexedMode &AM, 1161 SelectionDAG &DAG) const { 1162 if (DisablePPCPreinc) return false; 1163 1164 SDValue Ptr; 1165 EVT VT; 1166 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1167 Ptr = LD->getBasePtr(); 1168 VT = LD->getMemoryVT(); 1169 1170 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1171 Ptr = ST->getBasePtr(); 1172 VT = ST->getMemoryVT(); 1173 } else 1174 return false; 1175 1176 // PowerPC doesn't have preinc load/store instructions for vectors. 1177 if (VT.isVector()) 1178 return false; 1179 1180 if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) { 1181 AM = ISD::PRE_INC; 1182 return true; 1183 } 1184 1185 // LDU/STU use reg+imm*4, others use reg+imm. 1186 if (VT != MVT::i64) { 1187 // reg + imm 1188 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) 1189 return false; 1190 } else { 1191 // reg + imm * 4. 1192 if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) 1193 return false; 1194 } 1195 1196 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1197 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1198 // sext i32 to i64 when addr mode is r+i. 1199 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1200 LD->getExtensionType() == ISD::SEXTLOAD && 1201 isa<ConstantSDNode>(Offset)) 1202 return false; 1203 } 1204 1205 AM = ISD::PRE_INC; 1206 return true; 1207 } 1208 1209 //===----------------------------------------------------------------------===// 1210 // LowerOperation implementation 1211 //===----------------------------------------------------------------------===// 1212 1213 /// GetLabelAccessInfo - Return true if we should reference labels using a 1214 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1215 static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, 1216 unsigned &LoOpFlags, const GlobalValue *GV = 0) { 1217 HiOpFlags = PPCII::MO_HA16; 1218 LoOpFlags = PPCII::MO_LO16; 1219 1220 // Don't use the pic base if not in PIC relocation model. Or if we are on a 1221 // non-darwin platform. We don't support PIC on other platforms yet. 1222 bool isPIC = TM.getRelocationModel() == Reloc::PIC_ && 1223 TM.getSubtarget<PPCSubtarget>().isDarwin(); 1224 if (isPIC) { 1225 HiOpFlags |= PPCII::MO_PIC_FLAG; 1226 LoOpFlags |= PPCII::MO_PIC_FLAG; 1227 } 1228 1229 // If this is a reference to a global value that requires a non-lazy-ptr, make 1230 // sure that instruction lowering adds it. 1231 if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { 1232 HiOpFlags |= PPCII::MO_NLP_FLAG; 1233 LoOpFlags |= PPCII::MO_NLP_FLAG; 1234 1235 if (GV->hasHiddenVisibility()) { 1236 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1237 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1238 } 1239 } 1240 1241 return isPIC; 1242 } 1243 1244 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 1245 SelectionDAG &DAG) { 1246 EVT PtrVT = HiPart.getValueType(); 1247 SDValue Zero = DAG.getConstant(0, PtrVT); 1248 DebugLoc DL = HiPart.getDebugLoc(); 1249 1250 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 1251 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 1252 1253 // With PIC, the first instruction is actually "GR+hi(&G)". 1254 if (isPIC) 1255 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 1256 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 1257 1258 // Generate non-pic code that has direct accesses to the constant pool. 1259 // The address of the global is just (hi(&g)+lo(&g)). 1260 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 1261 } 1262 1263 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1264 SelectionDAG &DAG) const { 1265 EVT PtrVT = Op.getValueType(); 1266 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1267 const Constant *C = CP->getConstVal(); 1268 1269 // 64-bit SVR4 ABI code is always position-independent. 1270 // The actual address of the GlobalValue is stored in the TOC. 1271 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1272 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 1273 return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA, 1274 DAG.getRegister(PPC::X2, MVT::i64)); 1275 } 1276 1277 unsigned MOHiFlag, MOLoFlag; 1278 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1279 SDValue CPIHi = 1280 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 1281 SDValue CPILo = 1282 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 1283 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 1284 } 1285 1286 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 1287 EVT PtrVT = Op.getValueType(); 1288 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1289 1290 // 64-bit SVR4 ABI code is always position-independent. 1291 // The actual address of the GlobalValue is stored in the TOC. 1292 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1293 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1294 return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA, 1295 DAG.getRegister(PPC::X2, MVT::i64)); 1296 } 1297 1298 unsigned MOHiFlag, MOLoFlag; 1299 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1300 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 1301 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 1302 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 1303 } 1304 1305 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 1306 SelectionDAG &DAG) const { 1307 EVT PtrVT = Op.getValueType(); 1308 1309 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1310 1311 unsigned MOHiFlag, MOLoFlag; 1312 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1313 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 1314 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 1315 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 1316 } 1317 1318 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1319 SelectionDAG &DAG) const { 1320 1321 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1322 DebugLoc dl = GA->getDebugLoc(); 1323 const GlobalValue *GV = GA->getGlobal(); 1324 EVT PtrVT = getPointerTy(); 1325 bool is64bit = PPCSubTarget.isPPC64(); 1326 1327 TLSModel::Model model = getTargetMachine().getTLSModel(GV); 1328 1329 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1330 PPCII::MO_TPREL16_HA); 1331 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1332 PPCII::MO_TPREL16_LO); 1333 1334 if (model != TLSModel::LocalExec) 1335 llvm_unreachable("only local-exec TLS mode supported"); 1336 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 1337 is64bit ? MVT::i64 : MVT::i32); 1338 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 1339 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 1340 } 1341 1342 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 1343 SelectionDAG &DAG) const { 1344 EVT PtrVT = Op.getValueType(); 1345 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1346 DebugLoc DL = GSDN->getDebugLoc(); 1347 const GlobalValue *GV = GSDN->getGlobal(); 1348 1349 // 64-bit SVR4 ABI code is always position-independent. 1350 // The actual address of the GlobalValue is stored in the TOC. 1351 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1352 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 1353 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, 1354 DAG.getRegister(PPC::X2, MVT::i64)); 1355 } 1356 1357 unsigned MOHiFlag, MOLoFlag; 1358 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); 1359 1360 SDValue GAHi = 1361 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 1362 SDValue GALo = 1363 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 1364 1365 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 1366 1367 // If the global reference is actually to a non-lazy-pointer, we have to do an 1368 // extra load to get the address of the global. 1369 if (MOHiFlag & PPCII::MO_NLP_FLAG) 1370 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 1371 false, false, false, 0); 1372 return Ptr; 1373 } 1374 1375 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 1376 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1377 DebugLoc dl = Op.getDebugLoc(); 1378 1379 // If we're comparing for equality to zero, expose the fact that this is 1380 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1381 // fold the new nodes. 1382 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1383 if (C->isNullValue() && CC == ISD::SETEQ) { 1384 EVT VT = Op.getOperand(0).getValueType(); 1385 SDValue Zext = Op.getOperand(0); 1386 if (VT.bitsLT(MVT::i32)) { 1387 VT = MVT::i32; 1388 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 1389 } 1390 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1391 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 1392 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 1393 DAG.getConstant(Log2b, MVT::i32)); 1394 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 1395 } 1396 // Leave comparisons against 0 and -1 alone for now, since they're usually 1397 // optimized. FIXME: revisit this when we can custom lower all setcc 1398 // optimizations. 1399 if (C->isAllOnesValue() || C->isNullValue()) 1400 return SDValue(); 1401 } 1402 1403 // If we have an integer seteq/setne, turn it into a compare against zero 1404 // by xor'ing the rhs with the lhs, which is faster than setting a 1405 // condition register, reading it back out, and masking the correct bit. The 1406 // normal approach here uses sub to do this instead of xor. Using xor exposes 1407 // the result to other bit-twiddling opportunities. 1408 EVT LHSVT = Op.getOperand(0).getValueType(); 1409 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1410 EVT VT = Op.getValueType(); 1411 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 1412 Op.getOperand(1)); 1413 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); 1414 } 1415 return SDValue(); 1416 } 1417 1418 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 1419 const PPCSubtarget &Subtarget) const { 1420 SDNode *Node = Op.getNode(); 1421 EVT VT = Node->getValueType(0); 1422 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1423 SDValue InChain = Node->getOperand(0); 1424 SDValue VAListPtr = Node->getOperand(1); 1425 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 1426 DebugLoc dl = Node->getDebugLoc(); 1427 1428 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 1429 1430 // gpr_index 1431 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1432 VAListPtr, MachinePointerInfo(SV), MVT::i8, 1433 false, false, 0); 1434 InChain = GprIndex.getValue(1); 1435 1436 if (VT == MVT::i64) { 1437 // Check if GprIndex is even 1438 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 1439 DAG.getConstant(1, MVT::i32)); 1440 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 1441 DAG.getConstant(0, MVT::i32), ISD::SETNE); 1442 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 1443 DAG.getConstant(1, MVT::i32)); 1444 // Align GprIndex to be even if it isn't 1445 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 1446 GprIndex); 1447 } 1448 1449 // fpr index is 1 byte after gpr 1450 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1451 DAG.getConstant(1, MVT::i32)); 1452 1453 // fpr 1454 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1455 FprPtr, MachinePointerInfo(SV), MVT::i8, 1456 false, false, 0); 1457 InChain = FprIndex.getValue(1); 1458 1459 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1460 DAG.getConstant(8, MVT::i32)); 1461 1462 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1463 DAG.getConstant(4, MVT::i32)); 1464 1465 // areas 1466 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 1467 MachinePointerInfo(), false, false, 1468 false, 0); 1469 InChain = OverflowArea.getValue(1); 1470 1471 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 1472 MachinePointerInfo(), false, false, 1473 false, 0); 1474 InChain = RegSaveArea.getValue(1); 1475 1476 // select overflow_area if index > 8 1477 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 1478 DAG.getConstant(8, MVT::i32), ISD::SETLT); 1479 1480 // adjustment constant gpr_index * 4/8 1481 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 1482 VT.isInteger() ? GprIndex : FprIndex, 1483 DAG.getConstant(VT.isInteger() ? 4 : 8, 1484 MVT::i32)); 1485 1486 // OurReg = RegSaveArea + RegConstant 1487 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 1488 RegConstant); 1489 1490 // Floating types are 32 bytes into RegSaveArea 1491 if (VT.isFloatingPoint()) 1492 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 1493 DAG.getConstant(32, MVT::i32)); 1494 1495 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 1496 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 1497 VT.isInteger() ? GprIndex : FprIndex, 1498 DAG.getConstant(VT == MVT::i64 ? 2 : 1, 1499 MVT::i32)); 1500 1501 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 1502 VT.isInteger() ? VAListPtr : FprPtr, 1503 MachinePointerInfo(SV), 1504 MVT::i8, false, false, 0); 1505 1506 // determine if we should load from reg_save_area or overflow_area 1507 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 1508 1509 // increase overflow_area by 4/8 if gpr/fpr > 8 1510 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 1511 DAG.getConstant(VT.isInteger() ? 4 : 8, 1512 MVT::i32)); 1513 1514 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 1515 OverflowAreaPlusN); 1516 1517 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 1518 OverflowAreaPtr, 1519 MachinePointerInfo(), 1520 MVT::i32, false, false, 0); 1521 1522 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 1523 false, false, false, 0); 1524 } 1525 1526 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 1527 SelectionDAG &DAG) const { 1528 return Op.getOperand(0); 1529 } 1530 1531 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 1532 SelectionDAG &DAG) const { 1533 SDValue Chain = Op.getOperand(0); 1534 SDValue Trmp = Op.getOperand(1); // trampoline 1535 SDValue FPtr = Op.getOperand(2); // nested function 1536 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 1537 DebugLoc dl = Op.getDebugLoc(); 1538 1539 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1540 bool isPPC64 = (PtrVT == MVT::i64); 1541 Type *IntPtrTy = 1542 DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( 1543 *DAG.getContext()); 1544 1545 TargetLowering::ArgListTy Args; 1546 TargetLowering::ArgListEntry Entry; 1547 1548 Entry.Ty = IntPtrTy; 1549 Entry.Node = Trmp; Args.push_back(Entry); 1550 1551 // TrampSize == (isPPC64 ? 48 : 40); 1552 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, 1553 isPPC64 ? MVT::i64 : MVT::i32); 1554 Args.push_back(Entry); 1555 1556 Entry.Node = FPtr; Args.push_back(Entry); 1557 Entry.Node = Nest; Args.push_back(Entry); 1558 1559 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 1560 TargetLowering::CallLoweringInfo CLI(Chain, 1561 Type::getVoidTy(*DAG.getContext()), 1562 false, false, false, false, 0, 1563 CallingConv::C, 1564 /*isTailCall=*/false, 1565 /*doesNotRet=*/false, 1566 /*isReturnValueUsed=*/true, 1567 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 1568 Args, DAG, dl); 1569 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 1570 1571 return CallResult.second; 1572 } 1573 1574 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 1575 const PPCSubtarget &Subtarget) const { 1576 MachineFunction &MF = DAG.getMachineFunction(); 1577 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1578 1579 DebugLoc dl = Op.getDebugLoc(); 1580 1581 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 1582 // vastart just stores the address of the VarArgsFrameIndex slot into the 1583 // memory location argument. 1584 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1585 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1586 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1587 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 1588 MachinePointerInfo(SV), 1589 false, false, 0); 1590 } 1591 1592 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 1593 // We suppose the given va_list is already allocated. 1594 // 1595 // typedef struct { 1596 // char gpr; /* index into the array of 8 GPRs 1597 // * stored in the register save area 1598 // * gpr=0 corresponds to r3, 1599 // * gpr=1 to r4, etc. 1600 // */ 1601 // char fpr; /* index into the array of 8 FPRs 1602 // * stored in the register save area 1603 // * fpr=0 corresponds to f1, 1604 // * fpr=1 to f2, etc. 1605 // */ 1606 // char *overflow_arg_area; 1607 // /* location on stack that holds 1608 // * the next overflow argument 1609 // */ 1610 // char *reg_save_area; 1611 // /* where r3:r10 and f1:f8 (if saved) 1612 // * are stored 1613 // */ 1614 // } va_list[1]; 1615 1616 1617 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); 1618 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); 1619 1620 1621 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1622 1623 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 1624 PtrVT); 1625 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1626 PtrVT); 1627 1628 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 1629 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 1630 1631 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 1632 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 1633 1634 uint64_t FPROffset = 1; 1635 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 1636 1637 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1638 1639 // Store first byte : number of int regs 1640 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 1641 Op.getOperand(1), 1642 MachinePointerInfo(SV), 1643 MVT::i8, false, false, 0); 1644 uint64_t nextOffset = FPROffset; 1645 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 1646 ConstFPROffset); 1647 1648 // Store second byte : number of float regs 1649 SDValue secondStore = 1650 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 1651 MachinePointerInfo(SV, nextOffset), MVT::i8, 1652 false, false, 0); 1653 nextOffset += StackOffset; 1654 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 1655 1656 // Store second word : arguments given on stack 1657 SDValue thirdStore = 1658 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 1659 MachinePointerInfo(SV, nextOffset), 1660 false, false, 0); 1661 nextOffset += FrameOffset; 1662 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 1663 1664 // Store third word : arguments given in registers 1665 return DAG.getStore(thirdStore, dl, FR, nextPtr, 1666 MachinePointerInfo(SV, nextOffset), 1667 false, false, 0); 1668 1669 } 1670 1671 #include "PPCGenCallingConv.inc" 1672 1673 static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 1674 CCValAssign::LocInfo &LocInfo, 1675 ISD::ArgFlagsTy &ArgFlags, 1676 CCState &State) { 1677 return true; 1678 } 1679 1680 static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 1681 MVT &LocVT, 1682 CCValAssign::LocInfo &LocInfo, 1683 ISD::ArgFlagsTy &ArgFlags, 1684 CCState &State) { 1685 static const uint16_t ArgRegs[] = { 1686 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1687 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1688 }; 1689 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1690 1691 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1692 1693 // Skip one register if the first unallocated register has an even register 1694 // number and there are still argument registers available which have not been 1695 // allocated yet. RegNum is actually an index into ArgRegs, which means we 1696 // need to skip a register if RegNum is odd. 1697 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 1698 State.AllocateReg(ArgRegs[RegNum]); 1699 } 1700 1701 // Always return false here, as this function only makes sure that the first 1702 // unallocated register has an odd register number and does not actually 1703 // allocate a register for the current argument. 1704 return false; 1705 } 1706 1707 static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 1708 MVT &LocVT, 1709 CCValAssign::LocInfo &LocInfo, 1710 ISD::ArgFlagsTy &ArgFlags, 1711 CCState &State) { 1712 static const uint16_t ArgRegs[] = { 1713 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1714 PPC::F8 1715 }; 1716 1717 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1718 1719 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1720 1721 // If there is only one Floating-point register left we need to put both f64 1722 // values of a split ppc_fp128 value on the stack. 1723 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 1724 State.AllocateReg(ArgRegs[RegNum]); 1725 } 1726 1727 // Always return false here, as this function only makes sure that the two f64 1728 // values a ppc_fp128 value is split into are both passed in registers or both 1729 // passed on the stack and does not actually allocate a register for the 1730 // current argument. 1731 return false; 1732 } 1733 1734 /// GetFPR - Get the set of FP registers that should be allocated for arguments, 1735 /// on Darwin. 1736 static const uint16_t *GetFPR() { 1737 static const uint16_t FPR[] = { 1738 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1739 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1740 }; 1741 1742 return FPR; 1743 } 1744 1745 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 1746 /// the stack. 1747 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 1748 unsigned PtrByteSize) { 1749 unsigned ArgSize = ArgVT.getSizeInBits()/8; 1750 if (Flags.isByVal()) 1751 ArgSize = Flags.getByValSize(); 1752 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1753 1754 return ArgSize; 1755 } 1756 1757 SDValue 1758 PPCTargetLowering::LowerFormalArguments(SDValue Chain, 1759 CallingConv::ID CallConv, bool isVarArg, 1760 const SmallVectorImpl<ISD::InputArg> 1761 &Ins, 1762 DebugLoc dl, SelectionDAG &DAG, 1763 SmallVectorImpl<SDValue> &InVals) 1764 const { 1765 if (PPCSubTarget.isSVR4ABI()) { 1766 if (PPCSubTarget.isPPC64()) 1767 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 1768 dl, DAG, InVals); 1769 else 1770 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 1771 dl, DAG, InVals); 1772 } else { 1773 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 1774 dl, DAG, InVals); 1775 } 1776 } 1777 1778 SDValue 1779 PPCTargetLowering::LowerFormalArguments_32SVR4( 1780 SDValue Chain, 1781 CallingConv::ID CallConv, bool isVarArg, 1782 const SmallVectorImpl<ISD::InputArg> 1783 &Ins, 1784 DebugLoc dl, SelectionDAG &DAG, 1785 SmallVectorImpl<SDValue> &InVals) const { 1786 1787 // 32-bit SVR4 ABI Stack Frame Layout: 1788 // +-----------------------------------+ 1789 // +--> | Back chain | 1790 // | +-----------------------------------+ 1791 // | | Floating-point register save area | 1792 // | +-----------------------------------+ 1793 // | | General register save area | 1794 // | +-----------------------------------+ 1795 // | | CR save word | 1796 // | +-----------------------------------+ 1797 // | | VRSAVE save word | 1798 // | +-----------------------------------+ 1799 // | | Alignment padding | 1800 // | +-----------------------------------+ 1801 // | | Vector register save area | 1802 // | +-----------------------------------+ 1803 // | | Local variable space | 1804 // | +-----------------------------------+ 1805 // | | Parameter list area | 1806 // | +-----------------------------------+ 1807 // | | LR save word | 1808 // | +-----------------------------------+ 1809 // SP--> +--- | Back chain | 1810 // +-----------------------------------+ 1811 // 1812 // Specifications: 1813 // System V Application Binary Interface PowerPC Processor Supplement 1814 // AltiVec Technology Programming Interface Manual 1815 1816 MachineFunction &MF = DAG.getMachineFunction(); 1817 MachineFrameInfo *MFI = MF.getFrameInfo(); 1818 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1819 1820 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1821 // Potential tail calls could cause overwriting of argument stack slots. 1822 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 1823 (CallConv == CallingConv::Fast)); 1824 unsigned PtrByteSize = 4; 1825 1826 // Assign locations to all of the incoming arguments. 1827 SmallVector<CCValAssign, 16> ArgLocs; 1828 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1829 getTargetMachine(), ArgLocs, *DAG.getContext()); 1830 1831 // Reserve space for the linkage area on the stack. 1832 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 1833 1834 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4); 1835 1836 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1837 CCValAssign &VA = ArgLocs[i]; 1838 1839 // Arguments stored in registers. 1840 if (VA.isRegLoc()) { 1841 const TargetRegisterClass *RC; 1842 EVT ValVT = VA.getValVT(); 1843 1844 switch (ValVT.getSimpleVT().SimpleTy) { 1845 default: 1846 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 1847 case MVT::i32: 1848 RC = &PPC::GPRCRegClass; 1849 break; 1850 case MVT::f32: 1851 RC = &PPC::F4RCRegClass; 1852 break; 1853 case MVT::f64: 1854 RC = &PPC::F8RCRegClass; 1855 break; 1856 case MVT::v16i8: 1857 case MVT::v8i16: 1858 case MVT::v4i32: 1859 case MVT::v4f32: 1860 RC = &PPC::VRRCRegClass; 1861 break; 1862 } 1863 1864 // Transform the arguments stored in physical registers into virtual ones. 1865 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 1866 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); 1867 1868 InVals.push_back(ArgValue); 1869 } else { 1870 // Argument stored in memory. 1871 assert(VA.isMemLoc()); 1872 1873 unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; 1874 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 1875 isImmutable); 1876 1877 // Create load nodes to retrieve arguments from the stack. 1878 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1879 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 1880 MachinePointerInfo(), 1881 false, false, false, 0)); 1882 } 1883 } 1884 1885 // Assign locations to all of the incoming aggregate by value arguments. 1886 // Aggregates passed by value are stored in the local variable space of the 1887 // caller's stack frame, right above the parameter list area. 1888 SmallVector<CCValAssign, 16> ByValArgLocs; 1889 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1890 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 1891 1892 // Reserve stack space for the allocations in CCInfo. 1893 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 1894 1895 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal); 1896 1897 // Area that is at least reserved in the caller of this function. 1898 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 1899 1900 // Set the size that is at least reserved in caller of this function. Tail 1901 // call optimized function's reserved stack space needs to be aligned so that 1902 // taking the difference between two stack areas will result in an aligned 1903 // stack. 1904 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1905 1906 MinReservedArea = 1907 std::max(MinReservedArea, 1908 PPCFrameLowering::getMinCallFrameSize(false, false)); 1909 1910 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> 1911 getStackAlignment(); 1912 unsigned AlignMask = TargetAlign-1; 1913 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 1914 1915 FI->setMinReservedArea(MinReservedArea); 1916 1917 SmallVector<SDValue, 8> MemOps; 1918 1919 // If the function takes variable number of arguments, make a frame index for 1920 // the start of the first vararg value... for expansion of llvm.va_start. 1921 if (isVarArg) { 1922 static const uint16_t GPArgRegs[] = { 1923 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1924 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1925 }; 1926 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 1927 1928 static const uint16_t FPArgRegs[] = { 1929 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1930 PPC::F8 1931 }; 1932 const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 1933 1934 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, 1935 NumGPArgRegs)); 1936 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, 1937 NumFPArgRegs)); 1938 1939 // Make room for NumGPArgRegs and NumFPArgRegs. 1940 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 1941 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; 1942 1943 FuncInfo->setVarArgsStackOffset( 1944 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 1945 CCInfo.getNextStackOffset(), true)); 1946 1947 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 1948 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1949 1950 // The fixed integer arguments of a variadic function are stored to the 1951 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 1952 // the result of va_next. 1953 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 1954 // Get an existing live-in vreg, or add a new one. 1955 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 1956 if (!VReg) 1957 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 1958 1959 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 1960 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 1961 MachinePointerInfo(), false, false, 0); 1962 MemOps.push_back(Store); 1963 // Increment the address by four for the next argument to store 1964 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 1965 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 1966 } 1967 1968 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 1969 // is set. 1970 // The double arguments are stored to the VarArgsFrameIndex 1971 // on the stack. 1972 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 1973 // Get an existing live-in vreg, or add a new one. 1974 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 1975 if (!VReg) 1976 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 1977 1978 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 1979 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 1980 MachinePointerInfo(), false, false, 0); 1981 MemOps.push_back(Store); 1982 // Increment the address by eight for the next argument to store 1983 SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, 1984 PtrVT); 1985 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 1986 } 1987 } 1988 1989 if (!MemOps.empty()) 1990 Chain = DAG.getNode(ISD::TokenFactor, dl, 1991 MVT::Other, &MemOps[0], MemOps.size()); 1992 1993 return Chain; 1994 } 1995 1996 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 1997 // value to MVT::i64 and then truncate to the correct register size. 1998 SDValue 1999 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 2000 SelectionDAG &DAG, SDValue ArgVal, 2001 DebugLoc dl) const { 2002 if (Flags.isSExt()) 2003 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 2004 DAG.getValueType(ObjectVT)); 2005 else if (Flags.isZExt()) 2006 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 2007 DAG.getValueType(ObjectVT)); 2008 2009 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 2010 } 2011 2012 // Set the size that is at least reserved in caller of this function. Tail 2013 // call optimized functions' reserved stack space needs to be aligned so that 2014 // taking the difference between two stack areas will result in an aligned 2015 // stack. 2016 void 2017 PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, 2018 unsigned nAltivecParamsAtEnd, 2019 unsigned MinReservedArea, 2020 bool isPPC64) const { 2021 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2022 // Add the Altivec parameters at the end, if needed. 2023 if (nAltivecParamsAtEnd) { 2024 MinReservedArea = ((MinReservedArea+15)/16)*16; 2025 MinReservedArea += 16*nAltivecParamsAtEnd; 2026 } 2027 MinReservedArea = 2028 std::max(MinReservedArea, 2029 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 2030 unsigned TargetAlign 2031 = DAG.getMachineFunction().getTarget().getFrameLowering()-> 2032 getStackAlignment(); 2033 unsigned AlignMask = TargetAlign-1; 2034 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 2035 FI->setMinReservedArea(MinReservedArea); 2036 } 2037 2038 SDValue 2039 PPCTargetLowering::LowerFormalArguments_64SVR4( 2040 SDValue Chain, 2041 CallingConv::ID CallConv, bool isVarArg, 2042 const SmallVectorImpl<ISD::InputArg> 2043 &Ins, 2044 DebugLoc dl, SelectionDAG &DAG, 2045 SmallVectorImpl<SDValue> &InVals) const { 2046 // TODO: add description of PPC stack frame format, or at least some docs. 2047 // 2048 MachineFunction &MF = DAG.getMachineFunction(); 2049 MachineFrameInfo *MFI = MF.getFrameInfo(); 2050 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2051 2052 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2053 // Potential tail calls could cause overwriting of argument stack slots. 2054 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2055 (CallConv == CallingConv::Fast)); 2056 unsigned PtrByteSize = 8; 2057 2058 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); 2059 // Area that is at least reserved in caller of this function. 2060 unsigned MinReservedArea = ArgOffset; 2061 2062 static const uint16_t GPR[] = { 2063 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2064 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2065 }; 2066 2067 static const uint16_t *FPR = GetFPR(); 2068 2069 static const uint16_t VR[] = { 2070 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2071 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2072 }; 2073 2074 const unsigned Num_GPR_Regs = array_lengthof(GPR); 2075 const unsigned Num_FPR_Regs = 13; 2076 const unsigned Num_VR_Regs = array_lengthof(VR); 2077 2078 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2079 2080 // Add DAG nodes to load the arguments or copy them out of registers. On 2081 // entry to a function on PPC, the arguments start after the linkage area, 2082 // although the first ones are often in registers. 2083 2084 SmallVector<SDValue, 8> MemOps; 2085 unsigned nAltivecParamsAtEnd = 0; 2086 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2087 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { 2088 SDValue ArgVal; 2089 bool needsLoad = false; 2090 EVT ObjectVT = Ins[ArgNo].VT; 2091 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 2092 unsigned ArgSize = ObjSize; 2093 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2094 2095 unsigned CurArgOffset = ArgOffset; 2096 2097 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 2098 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 2099 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 2100 if (isVarArg) { 2101 MinReservedArea = ((MinReservedArea+15)/16)*16; 2102 MinReservedArea += CalculateStackSlotSize(ObjectVT, 2103 Flags, 2104 PtrByteSize); 2105 } else 2106 nAltivecParamsAtEnd++; 2107 } else 2108 // Calculate min reserved area. 2109 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 2110 Flags, 2111 PtrByteSize); 2112 2113 // FIXME the codegen can be much improved in some cases. 2114 // We do not have to keep everything in memory. 2115 if (Flags.isByVal()) { 2116 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2117 ObjSize = Flags.getByValSize(); 2118 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2119 // Empty aggregate parameters do not take up registers. Examples: 2120 // struct { } a; 2121 // union { } b; 2122 // int c[0]; 2123 // etc. However, we have to provide a place-holder in InVals, so 2124 // pretend we have an 8-byte item at the current address for that 2125 // purpose. 2126 if (!ObjSize) { 2127 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2128 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2129 InVals.push_back(FIN); 2130 continue; 2131 } 2132 // All aggregates smaller than 8 bytes must be passed right-justified. 2133 if (ObjSize < PtrByteSize) 2134 CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); 2135 // The value of the object is its address. 2136 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); 2137 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2138 InVals.push_back(FIN); 2139 2140 if (ObjSize < 8) { 2141 if (GPR_idx != Num_GPR_Regs) { 2142 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2143 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2144 SDValue Store; 2145 2146 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 2147 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 2148 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 2149 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 2150 MachinePointerInfo(FuncArg, CurArgOffset), 2151 ObjType, false, false, 0); 2152 } else { 2153 // For sizes that don't fit a truncating store (3, 5, 6, 7), 2154 // store the whole register as-is to the parameter save area 2155 // slot. The address of the parameter was already calculated 2156 // above (InVals.push_back(FIN)) to be the right-justified 2157 // offset within the slot. For this store, we need a new 2158 // frame index that points at the beginning of the slot. 2159 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2160 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2161 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2162 MachinePointerInfo(FuncArg, ArgOffset), 2163 false, false, 0); 2164 } 2165 2166 MemOps.push_back(Store); 2167 ++GPR_idx; 2168 } 2169 // Whether we copied from a register or not, advance the offset 2170 // into the parameter save area by a full doubleword. 2171 ArgOffset += PtrByteSize; 2172 continue; 2173 } 2174 2175 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2176 // Store whatever pieces of the object are in registers 2177 // to memory. ArgOffset will be the address of the beginning 2178 // of the object. 2179 if (GPR_idx != Num_GPR_Regs) { 2180 unsigned VReg; 2181 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2182 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2183 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2184 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2185 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2186 MachinePointerInfo(FuncArg, ArgOffset), 2187 false, false, 0); 2188 MemOps.push_back(Store); 2189 ++GPR_idx; 2190 ArgOffset += PtrByteSize; 2191 } else { 2192 ArgOffset += ArgSize - j; 2193 break; 2194 } 2195 } 2196 continue; 2197 } 2198 2199 switch (ObjectVT.getSimpleVT().SimpleTy) { 2200 default: llvm_unreachable("Unhandled argument type!"); 2201 case MVT::i32: 2202 case MVT::i64: 2203 if (GPR_idx != Num_GPR_Regs) { 2204 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2205 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2206 2207 if (ObjectVT == MVT::i32) 2208 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2209 // value to MVT::i64 and then truncate to the correct register size. 2210 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2211 2212 ++GPR_idx; 2213 } else { 2214 needsLoad = true; 2215 ArgSize = PtrByteSize; 2216 } 2217 ArgOffset += 8; 2218 break; 2219 2220 case MVT::f32: 2221 case MVT::f64: 2222 // Every 8 bytes of argument space consumes one of the GPRs available for 2223 // argument passing. 2224 if (GPR_idx != Num_GPR_Regs) { 2225 ++GPR_idx; 2226 } 2227 if (FPR_idx != Num_FPR_Regs) { 2228 unsigned VReg; 2229 2230 if (ObjectVT == MVT::f32) 2231 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2232 else 2233 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 2234 2235 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2236 ++FPR_idx; 2237 } else { 2238 needsLoad = true; 2239 ArgSize = PtrByteSize; 2240 } 2241 2242 ArgOffset += 8; 2243 break; 2244 case MVT::v4f32: 2245 case MVT::v4i32: 2246 case MVT::v8i16: 2247 case MVT::v16i8: 2248 // Note that vector arguments in registers don't reserve stack space, 2249 // except in varargs functions. 2250 if (VR_idx != Num_VR_Regs) { 2251 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2252 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2253 if (isVarArg) { 2254 while ((ArgOffset % 16) != 0) { 2255 ArgOffset += PtrByteSize; 2256 if (GPR_idx != Num_GPR_Regs) 2257 GPR_idx++; 2258 } 2259 ArgOffset += 16; 2260 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 2261 } 2262 ++VR_idx; 2263 } else { 2264 // Vectors are aligned. 2265 ArgOffset = ((ArgOffset+15)/16)*16; 2266 CurArgOffset = ArgOffset; 2267 ArgOffset += 16; 2268 needsLoad = true; 2269 } 2270 break; 2271 } 2272 2273 // We need to load the argument to a virtual register if we determined 2274 // above that we ran out of physical registers of the appropriate type. 2275 if (needsLoad) { 2276 int FI = MFI->CreateFixedObject(ObjSize, 2277 CurArgOffset + (ArgSize - ObjSize), 2278 isImmutable); 2279 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2280 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2281 false, false, false, 0); 2282 } 2283 2284 InVals.push_back(ArgVal); 2285 } 2286 2287 // Set the size that is at least reserved in caller of this function. Tail 2288 // call optimized functions' reserved stack space needs to be aligned so that 2289 // taking the difference between two stack areas will result in an aligned 2290 // stack. 2291 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true); 2292 2293 // If the function takes variable number of arguments, make a frame index for 2294 // the start of the first vararg value... for expansion of llvm.va_start. 2295 if (isVarArg) { 2296 int Depth = ArgOffset; 2297 2298 FuncInfo->setVarArgsFrameIndex( 2299 MFI->CreateFixedObject(PtrByteSize, Depth, true)); 2300 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2301 2302 // If this function is vararg, store any remaining integer argument regs 2303 // to their spots on the stack so that they may be loaded by deferencing the 2304 // result of va_next. 2305 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 2306 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2307 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2308 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2309 MachinePointerInfo(), false, false, 0); 2310 MemOps.push_back(Store); 2311 // Increment the address by four for the next argument to store 2312 SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT); 2313 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2314 } 2315 } 2316 2317 if (!MemOps.empty()) 2318 Chain = DAG.getNode(ISD::TokenFactor, dl, 2319 MVT::Other, &MemOps[0], MemOps.size()); 2320 2321 return Chain; 2322 } 2323 2324 SDValue 2325 PPCTargetLowering::LowerFormalArguments_Darwin( 2326 SDValue Chain, 2327 CallingConv::ID CallConv, bool isVarArg, 2328 const SmallVectorImpl<ISD::InputArg> 2329 &Ins, 2330 DebugLoc dl, SelectionDAG &DAG, 2331 SmallVectorImpl<SDValue> &InVals) const { 2332 // TODO: add description of PPC stack frame format, or at least some docs. 2333 // 2334 MachineFunction &MF = DAG.getMachineFunction(); 2335 MachineFrameInfo *MFI = MF.getFrameInfo(); 2336 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2337 2338 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2339 bool isPPC64 = PtrVT == MVT::i64; 2340 // Potential tail calls could cause overwriting of argument stack slots. 2341 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2342 (CallConv == CallingConv::Fast)); 2343 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2344 2345 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 2346 // Area that is at least reserved in caller of this function. 2347 unsigned MinReservedArea = ArgOffset; 2348 2349 static const uint16_t GPR_32[] = { // 32-bit registers. 2350 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2351 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2352 }; 2353 static const uint16_t GPR_64[] = { // 64-bit registers. 2354 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2355 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2356 }; 2357 2358 static const uint16_t *FPR = GetFPR(); 2359 2360 static const uint16_t VR[] = { 2361 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2362 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2363 }; 2364 2365 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 2366 const unsigned Num_FPR_Regs = 13; 2367 const unsigned Num_VR_Regs = array_lengthof( VR); 2368 2369 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2370 2371 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 2372 2373 // In 32-bit non-varargs functions, the stack space for vectors is after the 2374 // stack space for non-vectors. We do not use this space unless we have 2375 // too many vectors to fit in registers, something that only occurs in 2376 // constructed examples:), but we have to walk the arglist to figure 2377 // that out...for the pathological case, compute VecArgOffset as the 2378 // start of the vector parameter area. Computing VecArgOffset is the 2379 // entire point of the following loop. 2380 unsigned VecArgOffset = ArgOffset; 2381 if (!isVarArg && !isPPC64) { 2382 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 2383 ++ArgNo) { 2384 EVT ObjectVT = Ins[ArgNo].VT; 2385 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2386 2387 if (Flags.isByVal()) { 2388 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 2389 unsigned ObjSize = Flags.getByValSize(); 2390 unsigned ArgSize = 2391 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2392 VecArgOffset += ArgSize; 2393 continue; 2394 } 2395 2396 switch(ObjectVT.getSimpleVT().SimpleTy) { 2397 default: llvm_unreachable("Unhandled argument type!"); 2398 case MVT::i32: 2399 case MVT::f32: 2400 VecArgOffset += 4; 2401 break; 2402 case MVT::i64: // PPC64 2403 case MVT::f64: 2404 // FIXME: We are guaranteed to be !isPPC64 at this point. 2405 // Does MVT::i64 apply? 2406 VecArgOffset += 8; 2407 break; 2408 case MVT::v4f32: 2409 case MVT::v4i32: 2410 case MVT::v8i16: 2411 case MVT::v16i8: 2412 // Nothing to do, we're only looking at Nonvector args here. 2413 break; 2414 } 2415 } 2416 } 2417 // We've found where the vector parameter area in memory is. Skip the 2418 // first 12 parameters; these don't use that memory. 2419 VecArgOffset = ((VecArgOffset+15)/16)*16; 2420 VecArgOffset += 12*16; 2421 2422 // Add DAG nodes to load the arguments or copy them out of registers. On 2423 // entry to a function on PPC, the arguments start after the linkage area, 2424 // although the first ones are often in registers. 2425 2426 SmallVector<SDValue, 8> MemOps; 2427 unsigned nAltivecParamsAtEnd = 0; 2428 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2429 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { 2430 SDValue ArgVal; 2431 bool needsLoad = false; 2432 EVT ObjectVT = Ins[ArgNo].VT; 2433 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 2434 unsigned ArgSize = ObjSize; 2435 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2436 2437 unsigned CurArgOffset = ArgOffset; 2438 2439 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 2440 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 2441 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 2442 if (isVarArg || isPPC64) { 2443 MinReservedArea = ((MinReservedArea+15)/16)*16; 2444 MinReservedArea += CalculateStackSlotSize(ObjectVT, 2445 Flags, 2446 PtrByteSize); 2447 } else nAltivecParamsAtEnd++; 2448 } else 2449 // Calculate min reserved area. 2450 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 2451 Flags, 2452 PtrByteSize); 2453 2454 // FIXME the codegen can be much improved in some cases. 2455 // We do not have to keep everything in memory. 2456 if (Flags.isByVal()) { 2457 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2458 ObjSize = Flags.getByValSize(); 2459 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2460 // Objects of size 1 and 2 are right justified, everything else is 2461 // left justified. This means the memory address is adjusted forwards. 2462 if (ObjSize==1 || ObjSize==2) { 2463 CurArgOffset = CurArgOffset + (4 - ObjSize); 2464 } 2465 // The value of the object is its address. 2466 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); 2467 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2468 InVals.push_back(FIN); 2469 if (ObjSize==1 || ObjSize==2) { 2470 if (GPR_idx != Num_GPR_Regs) { 2471 unsigned VReg; 2472 if (isPPC64) 2473 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2474 else 2475 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2476 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2477 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 2478 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 2479 MachinePointerInfo(FuncArg, 2480 CurArgOffset), 2481 ObjType, false, false, 0); 2482 MemOps.push_back(Store); 2483 ++GPR_idx; 2484 } 2485 2486 ArgOffset += PtrByteSize; 2487 2488 continue; 2489 } 2490 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2491 // Store whatever pieces of the object are in registers 2492 // to memory. ArgOffset will be the address of the beginning 2493 // of the object. 2494 if (GPR_idx != Num_GPR_Regs) { 2495 unsigned VReg; 2496 if (isPPC64) 2497 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2498 else 2499 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2500 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2501 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2502 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2503 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2504 MachinePointerInfo(FuncArg, ArgOffset), 2505 false, false, 0); 2506 MemOps.push_back(Store); 2507 ++GPR_idx; 2508 ArgOffset += PtrByteSize; 2509 } else { 2510 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 2511 break; 2512 } 2513 } 2514 continue; 2515 } 2516 2517 switch (ObjectVT.getSimpleVT().SimpleTy) { 2518 default: llvm_unreachable("Unhandled argument type!"); 2519 case MVT::i32: 2520 if (!isPPC64) { 2521 if (GPR_idx != Num_GPR_Regs) { 2522 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2523 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2524 ++GPR_idx; 2525 } else { 2526 needsLoad = true; 2527 ArgSize = PtrByteSize; 2528 } 2529 // All int arguments reserve stack space in the Darwin ABI. 2530 ArgOffset += PtrByteSize; 2531 break; 2532 } 2533 // FALLTHROUGH 2534 case MVT::i64: // PPC64 2535 if (GPR_idx != Num_GPR_Regs) { 2536 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2537 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2538 2539 if (ObjectVT == MVT::i32) 2540 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2541 // value to MVT::i64 and then truncate to the correct register size. 2542 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2543 2544 ++GPR_idx; 2545 } else { 2546 needsLoad = true; 2547 ArgSize = PtrByteSize; 2548 } 2549 // All int arguments reserve stack space in the Darwin ABI. 2550 ArgOffset += 8; 2551 break; 2552 2553 case MVT::f32: 2554 case MVT::f64: 2555 // Every 4 bytes of argument space consumes one of the GPRs available for 2556 // argument passing. 2557 if (GPR_idx != Num_GPR_Regs) { 2558 ++GPR_idx; 2559 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 2560 ++GPR_idx; 2561 } 2562 if (FPR_idx != Num_FPR_Regs) { 2563 unsigned VReg; 2564 2565 if (ObjectVT == MVT::f32) 2566 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2567 else 2568 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 2569 2570 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2571 ++FPR_idx; 2572 } else { 2573 needsLoad = true; 2574 } 2575 2576 // All FP arguments reserve stack space in the Darwin ABI. 2577 ArgOffset += isPPC64 ? 8 : ObjSize; 2578 break; 2579 case MVT::v4f32: 2580 case MVT::v4i32: 2581 case MVT::v8i16: 2582 case MVT::v16i8: 2583 // Note that vector arguments in registers don't reserve stack space, 2584 // except in varargs functions. 2585 if (VR_idx != Num_VR_Regs) { 2586 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2587 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2588 if (isVarArg) { 2589 while ((ArgOffset % 16) != 0) { 2590 ArgOffset += PtrByteSize; 2591 if (GPR_idx != Num_GPR_Regs) 2592 GPR_idx++; 2593 } 2594 ArgOffset += 16; 2595 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 2596 } 2597 ++VR_idx; 2598 } else { 2599 if (!isVarArg && !isPPC64) { 2600 // Vectors go after all the nonvectors. 2601 CurArgOffset = VecArgOffset; 2602 VecArgOffset += 16; 2603 } else { 2604 // Vectors are aligned. 2605 ArgOffset = ((ArgOffset+15)/16)*16; 2606 CurArgOffset = ArgOffset; 2607 ArgOffset += 16; 2608 } 2609 needsLoad = true; 2610 } 2611 break; 2612 } 2613 2614 // We need to load the argument to a virtual register if we determined above 2615 // that we ran out of physical registers of the appropriate type. 2616 if (needsLoad) { 2617 int FI = MFI->CreateFixedObject(ObjSize, 2618 CurArgOffset + (ArgSize - ObjSize), 2619 isImmutable); 2620 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2621 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2622 false, false, false, 0); 2623 } 2624 2625 InVals.push_back(ArgVal); 2626 } 2627 2628 // Set the size that is at least reserved in caller of this function. Tail 2629 // call optimized functions' reserved stack space needs to be aligned so that 2630 // taking the difference between two stack areas will result in an aligned 2631 // stack. 2632 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64); 2633 2634 // If the function takes variable number of arguments, make a frame index for 2635 // the start of the first vararg value... for expansion of llvm.va_start. 2636 if (isVarArg) { 2637 int Depth = ArgOffset; 2638 2639 FuncInfo->setVarArgsFrameIndex( 2640 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2641 Depth, true)); 2642 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2643 2644 // If this function is vararg, store any remaining integer argument regs 2645 // to their spots on the stack so that they may be loaded by deferencing the 2646 // result of va_next. 2647 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 2648 unsigned VReg; 2649 2650 if (isPPC64) 2651 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2652 else 2653 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2654 2655 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2656 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2657 MachinePointerInfo(), false, false, 0); 2658 MemOps.push_back(Store); 2659 // Increment the address by four for the next argument to store 2660 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2661 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2662 } 2663 } 2664 2665 if (!MemOps.empty()) 2666 Chain = DAG.getNode(ISD::TokenFactor, dl, 2667 MVT::Other, &MemOps[0], MemOps.size()); 2668 2669 return Chain; 2670 } 2671 2672 /// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus 2673 /// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI. 2674 static unsigned 2675 CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, 2676 bool isPPC64, 2677 bool isVarArg, 2678 unsigned CC, 2679 const SmallVectorImpl<ISD::OutputArg> 2680 &Outs, 2681 const SmallVectorImpl<SDValue> &OutVals, 2682 unsigned &nAltivecParamsAtEnd) { 2683 // Count how many bytes are to be pushed on the stack, including the linkage 2684 // area, and parameter passing area. We start with 24/48 bytes, which is 2685 // prereserved space for [SP][CR][LR][3 x unused]. 2686 unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true); 2687 unsigned NumOps = Outs.size(); 2688 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2689 2690 // Add up all the space actually used. 2691 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 2692 // they all go in registers, but we must reserve stack space for them for 2693 // possible use by the caller. In varargs or 64-bit calls, parameters are 2694 // assigned stack space in order, with padding so Altivec parameters are 2695 // 16-byte aligned. 2696 nAltivecParamsAtEnd = 0; 2697 for (unsigned i = 0; i != NumOps; ++i) { 2698 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2699 EVT ArgVT = Outs[i].VT; 2700 // Varargs Altivec parameters are padded to a 16 byte boundary. 2701 if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || 2702 ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { 2703 if (!isVarArg && !isPPC64) { 2704 // Non-varargs Altivec parameters go after all the non-Altivec 2705 // parameters; handle those later so we know how much padding we need. 2706 nAltivecParamsAtEnd++; 2707 continue; 2708 } 2709 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 2710 NumBytes = ((NumBytes+15)/16)*16; 2711 } 2712 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2713 } 2714 2715 // Allow for Altivec parameters at the end, if needed. 2716 if (nAltivecParamsAtEnd) { 2717 NumBytes = ((NumBytes+15)/16)*16; 2718 NumBytes += 16*nAltivecParamsAtEnd; 2719 } 2720 2721 // The prolog code of the callee may store up to 8 GPR argument registers to 2722 // the stack, allowing va_start to index over them in memory if its varargs. 2723 // Because we cannot tell if this is needed on the caller side, we have to 2724 // conservatively assume that it is needed. As such, make sure we have at 2725 // least enough stack space for the caller to store the 8 GPRs. 2726 NumBytes = std::max(NumBytes, 2727 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 2728 2729 // Tail call needs the stack to be aligned. 2730 if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){ 2731 unsigned TargetAlign = DAG.getMachineFunction().getTarget(). 2732 getFrameLowering()->getStackAlignment(); 2733 unsigned AlignMask = TargetAlign-1; 2734 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2735 } 2736 2737 return NumBytes; 2738 } 2739 2740 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 2741 /// adjusted to accommodate the arguments for the tailcall. 2742 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 2743 unsigned ParamSize) { 2744 2745 if (!isTailCall) return 0; 2746 2747 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 2748 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 2749 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 2750 // Remember only if the new adjustement is bigger. 2751 if (SPDiff < FI->getTailCallSPDelta()) 2752 FI->setTailCallSPDelta(SPDiff); 2753 2754 return SPDiff; 2755 } 2756 2757 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 2758 /// for tail call optimization. Targets which want to do tail call 2759 /// optimization should implement this function. 2760 bool 2761 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 2762 CallingConv::ID CalleeCC, 2763 bool isVarArg, 2764 const SmallVectorImpl<ISD::InputArg> &Ins, 2765 SelectionDAG& DAG) const { 2766 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 2767 return false; 2768 2769 // Variable argument functions are not supported. 2770 if (isVarArg) 2771 return false; 2772 2773 MachineFunction &MF = DAG.getMachineFunction(); 2774 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 2775 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 2776 // Functions containing by val parameters are not supported. 2777 for (unsigned i = 0; i != Ins.size(); i++) { 2778 ISD::ArgFlagsTy Flags = Ins[i].Flags; 2779 if (Flags.isByVal()) return false; 2780 } 2781 2782 // Non PIC/GOT tail calls are supported. 2783 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 2784 return true; 2785 2786 // At the moment we can only do local tail calls (in same module, hidden 2787 // or protected) if we are generating PIC. 2788 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2789 return G->getGlobal()->hasHiddenVisibility() 2790 || G->getGlobal()->hasProtectedVisibility(); 2791 } 2792 2793 return false; 2794 } 2795 2796 /// isCallCompatibleAddress - Return the immediate to use if the specified 2797 /// 32-bit value is representable in the immediate field of a BxA instruction. 2798 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 2799 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 2800 if (!C) return 0; 2801 2802 int Addr = C->getZExtValue(); 2803 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 2804 SignExtend32<26>(Addr) != Addr) 2805 return 0; // Top 6 bits have to be sext of immediate. 2806 2807 return DAG.getConstant((int)C->getZExtValue() >> 2, 2808 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 2809 } 2810 2811 namespace { 2812 2813 struct TailCallArgumentInfo { 2814 SDValue Arg; 2815 SDValue FrameIdxOp; 2816 int FrameIdx; 2817 2818 TailCallArgumentInfo() : FrameIdx(0) {} 2819 }; 2820 2821 } 2822 2823 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 2824 static void 2825 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 2826 SDValue Chain, 2827 const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs, 2828 SmallVector<SDValue, 8> &MemOpChains, 2829 DebugLoc dl) { 2830 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 2831 SDValue Arg = TailCallArgs[i].Arg; 2832 SDValue FIN = TailCallArgs[i].FrameIdxOp; 2833 int FI = TailCallArgs[i].FrameIdx; 2834 // Store relative to framepointer. 2835 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, 2836 MachinePointerInfo::getFixedStack(FI), 2837 false, false, 0)); 2838 } 2839 } 2840 2841 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 2842 /// the appropriate stack slot for the tail call optimized function call. 2843 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 2844 MachineFunction &MF, 2845 SDValue Chain, 2846 SDValue OldRetAddr, 2847 SDValue OldFP, 2848 int SPDiff, 2849 bool isPPC64, 2850 bool isDarwinABI, 2851 DebugLoc dl) { 2852 if (SPDiff) { 2853 // Calculate the new stack slot for the return address. 2854 int SlotSize = isPPC64 ? 8 : 4; 2855 int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, 2856 isDarwinABI); 2857 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 2858 NewRetAddrLoc, true); 2859 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 2860 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 2861 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 2862 MachinePointerInfo::getFixedStack(NewRetAddr), 2863 false, false, 0); 2864 2865 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 2866 // slot as the FP is never overwritten. 2867 if (isDarwinABI) { 2868 int NewFPLoc = 2869 SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); 2870 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 2871 true); 2872 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 2873 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 2874 MachinePointerInfo::getFixedStack(NewFPIdx), 2875 false, false, 0); 2876 } 2877 } 2878 return Chain; 2879 } 2880 2881 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 2882 /// the position of the argument. 2883 static void 2884 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 2885 SDValue Arg, int SPDiff, unsigned ArgOffset, 2886 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { 2887 int Offset = ArgOffset + SPDiff; 2888 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 2889 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 2890 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 2891 SDValue FIN = DAG.getFrameIndex(FI, VT); 2892 TailCallArgumentInfo Info; 2893 Info.Arg = Arg; 2894 Info.FrameIdxOp = FIN; 2895 Info.FrameIdx = FI; 2896 TailCallArguments.push_back(Info); 2897 } 2898 2899 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 2900 /// stack slot. Returns the chain as result and the loaded frame pointers in 2901 /// LROpOut/FPOpout. Used when tail calling. 2902 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 2903 int SPDiff, 2904 SDValue Chain, 2905 SDValue &LROpOut, 2906 SDValue &FPOpOut, 2907 bool isDarwinABI, 2908 DebugLoc dl) const { 2909 if (SPDiff) { 2910 // Load the LR and FP stack slot for later adjusting. 2911 EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; 2912 LROpOut = getReturnAddrFrameIndex(DAG); 2913 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 2914 false, false, false, 0); 2915 Chain = SDValue(LROpOut.getNode(), 1); 2916 2917 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 2918 // slot as the FP is never overwritten. 2919 if (isDarwinABI) { 2920 FPOpOut = getFramePointerFrameIndex(DAG); 2921 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 2922 false, false, false, 0); 2923 Chain = SDValue(FPOpOut.getNode(), 1); 2924 } 2925 } 2926 return Chain; 2927 } 2928 2929 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 2930 /// by "Src" to address "Dst" of size "Size". Alignment information is 2931 /// specified by the specific parameter attribute. The copy will be passed as 2932 /// a byval function parameter. 2933 /// Sometimes what we are copying is the end of a larger object, the part that 2934 /// does not fit in registers. 2935 static SDValue 2936 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 2937 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 2938 DebugLoc dl) { 2939 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 2940 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 2941 false, false, MachinePointerInfo(0), 2942 MachinePointerInfo(0)); 2943 } 2944 2945 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 2946 /// tail calls. 2947 static void 2948 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 2949 SDValue Arg, SDValue PtrOff, int SPDiff, 2950 unsigned ArgOffset, bool isPPC64, bool isTailCall, 2951 bool isVector, SmallVector<SDValue, 8> &MemOpChains, 2952 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments, 2953 DebugLoc dl) { 2954 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2955 if (!isTailCall) { 2956 if (isVector) { 2957 SDValue StackPtr; 2958 if (isPPC64) 2959 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 2960 else 2961 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 2962 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 2963 DAG.getConstant(ArgOffset, PtrVT)); 2964 } 2965 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 2966 MachinePointerInfo(), false, false, 0)); 2967 // Calculate and remember argument location. 2968 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 2969 TailCallArguments); 2970 } 2971 2972 static 2973 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 2974 DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 2975 SDValue LROp, SDValue FPOp, bool isDarwinABI, 2976 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) { 2977 MachineFunction &MF = DAG.getMachineFunction(); 2978 2979 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 2980 // might overwrite each other in case of tail call optimization. 2981 SmallVector<SDValue, 8> MemOpChains2; 2982 // Do not flag preceding copytoreg stuff together with the following stuff. 2983 InFlag = SDValue(); 2984 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 2985 MemOpChains2, dl); 2986 if (!MemOpChains2.empty()) 2987 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2988 &MemOpChains2[0], MemOpChains2.size()); 2989 2990 // Store the return address to the appropriate stack slot. 2991 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 2992 isPPC64, isDarwinABI, dl); 2993 2994 // Emit callseq_end just before tailcall node. 2995 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 2996 DAG.getIntPtrConstant(0, true), InFlag); 2997 InFlag = Chain.getValue(1); 2998 } 2999 3000 static 3001 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 3002 SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall, 3003 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, 3004 SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys, 3005 const PPCSubtarget &PPCSubTarget) { 3006 3007 bool isPPC64 = PPCSubTarget.isPPC64(); 3008 bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); 3009 3010 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3011 NodeTys.push_back(MVT::Other); // Returns a chain 3012 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 3013 3014 unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin; 3015 3016 bool needIndirectCall = true; 3017 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 3018 // If this is an absolute destination address, use the munged value. 3019 Callee = SDValue(Dest, 0); 3020 needIndirectCall = false; 3021 } 3022 3023 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3024 // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 3025 // Use indirect calls for ALL functions calls in JIT mode, since the 3026 // far-call stubs may be outside relocation limits for a BL instruction. 3027 if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { 3028 unsigned OpFlags = 0; 3029 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 3030 (PPCSubTarget.getTargetTriple().isMacOSX() && 3031 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 3032 (G->getGlobal()->isDeclaration() || 3033 G->getGlobal()->isWeakForLinker())) { 3034 // PC-relative references to external symbols should go through $stub, 3035 // unless we're building with the leopard linker or later, which 3036 // automatically synthesizes these stubs. 3037 OpFlags = PPCII::MO_DARWIN_STUB; 3038 } 3039 3040 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 3041 // every direct call is) turn it into a TargetGlobalAddress / 3042 // TargetExternalSymbol node so that legalize doesn't hack it. 3043 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 3044 Callee.getValueType(), 3045 0, OpFlags); 3046 needIndirectCall = false; 3047 } 3048 } 3049 3050 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3051 unsigned char OpFlags = 0; 3052 3053 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 3054 (PPCSubTarget.getTargetTriple().isMacOSX() && 3055 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { 3056 // PC-relative references to external symbols should go through $stub, 3057 // unless we're building with the leopard linker or later, which 3058 // automatically synthesizes these stubs. 3059 OpFlags = PPCII::MO_DARWIN_STUB; 3060 } 3061 3062 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 3063 OpFlags); 3064 needIndirectCall = false; 3065 } 3066 3067 if (needIndirectCall) { 3068 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 3069 // to do the call, we can't use PPCISD::CALL. 3070 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 3071 3072 if (isSVR4ABI && isPPC64) { 3073 // Function pointers in the 64-bit SVR4 ABI do not point to the function 3074 // entry point, but to the function descriptor (the function entry point 3075 // address is part of the function descriptor though). 3076 // The function descriptor is a three doubleword structure with the 3077 // following fields: function entry point, TOC base address and 3078 // environment pointer. 3079 // Thus for a call through a function pointer, the following actions need 3080 // to be performed: 3081 // 1. Save the TOC of the caller in the TOC save area of its stack 3082 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 3083 // 2. Load the address of the function entry point from the function 3084 // descriptor. 3085 // 3. Load the TOC of the callee from the function descriptor into r2. 3086 // 4. Load the environment pointer from the function descriptor into 3087 // r11. 3088 // 5. Branch to the function entry point address. 3089 // 6. On return of the callee, the TOC of the caller needs to be 3090 // restored (this is done in FinishCall()). 3091 // 3092 // All those operations are flagged together to ensure that no other 3093 // operations can be scheduled in between. E.g. without flagging the 3094 // operations together, a TOC access in the caller could be scheduled 3095 // between the load of the callee TOC and the branch to the callee, which 3096 // results in the TOC access going through the TOC of the callee instead 3097 // of going through the TOC of the caller, which leads to incorrect code. 3098 3099 // Load the address of the function entry point from the function 3100 // descriptor. 3101 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); 3102 SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps, 3103 InFlag.getNode() ? 3 : 2); 3104 Chain = LoadFuncPtr.getValue(1); 3105 InFlag = LoadFuncPtr.getValue(2); 3106 3107 // Load environment pointer into r11. 3108 // Offset of the environment pointer within the function descriptor. 3109 SDValue PtrOff = DAG.getIntPtrConstant(16); 3110 3111 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 3112 SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, 3113 InFlag); 3114 Chain = LoadEnvPtr.getValue(1); 3115 InFlag = LoadEnvPtr.getValue(2); 3116 3117 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 3118 InFlag); 3119 Chain = EnvVal.getValue(0); 3120 InFlag = EnvVal.getValue(1); 3121 3122 // Load TOC of the callee into r2. We are using a target-specific load 3123 // with r2 hard coded, because the result of a target-independent load 3124 // would never go directly into r2, since r2 is a reserved register (which 3125 // prevents the register allocator from allocating it), resulting in an 3126 // additional register being allocated and an unnecessary move instruction 3127 // being generated. 3128 VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3129 SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, 3130 Callee, InFlag); 3131 Chain = LoadTOCPtr.getValue(0); 3132 InFlag = LoadTOCPtr.getValue(1); 3133 3134 MTCTROps[0] = Chain; 3135 MTCTROps[1] = LoadFuncPtr; 3136 MTCTROps[2] = InFlag; 3137 } 3138 3139 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, 3140 2 + (InFlag.getNode() != 0)); 3141 InFlag = Chain.getValue(1); 3142 3143 NodeTys.clear(); 3144 NodeTys.push_back(MVT::Other); 3145 NodeTys.push_back(MVT::Glue); 3146 Ops.push_back(Chain); 3147 CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin; 3148 Callee.setNode(0); 3149 // Add CTR register as callee so a bctr can be emitted later. 3150 if (isTailCall) 3151 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 3152 } 3153 3154 // If this is a direct call, pass the chain and the callee. 3155 if (Callee.getNode()) { 3156 Ops.push_back(Chain); 3157 Ops.push_back(Callee); 3158 } 3159 // If this is a tail call add stack pointer delta. 3160 if (isTailCall) 3161 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 3162 3163 // Add argument registers to the end of the list so that they are known live 3164 // into the call. 3165 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 3166 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 3167 RegsToPass[i].second.getValueType())); 3168 3169 return CallOpc; 3170 } 3171 3172 static 3173 bool isLocalCall(const SDValue &Callee) 3174 { 3175 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3176 return !G->getGlobal()->isDeclaration() && 3177 !G->getGlobal()->isWeakForLinker(); 3178 return false; 3179 } 3180 3181 SDValue 3182 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 3183 CallingConv::ID CallConv, bool isVarArg, 3184 const SmallVectorImpl<ISD::InputArg> &Ins, 3185 DebugLoc dl, SelectionDAG &DAG, 3186 SmallVectorImpl<SDValue> &InVals) const { 3187 3188 SmallVector<CCValAssign, 16> RVLocs; 3189 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3190 getTargetMachine(), RVLocs, *DAG.getContext()); 3191 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 3192 3193 // Copy all of the result registers out of their specified physreg. 3194 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 3195 CCValAssign &VA = RVLocs[i]; 3196 assert(VA.isRegLoc() && "Can only return in registers!"); 3197 3198 SDValue Val = DAG.getCopyFromReg(Chain, dl, 3199 VA.getLocReg(), VA.getLocVT(), InFlag); 3200 Chain = Val.getValue(1); 3201 InFlag = Val.getValue(2); 3202 3203 switch (VA.getLocInfo()) { 3204 default: llvm_unreachable("Unknown loc info!"); 3205 case CCValAssign::Full: break; 3206 case CCValAssign::AExt: 3207 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3208 break; 3209 case CCValAssign::ZExt: 3210 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 3211 DAG.getValueType(VA.getValVT())); 3212 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3213 break; 3214 case CCValAssign::SExt: 3215 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 3216 DAG.getValueType(VA.getValVT())); 3217 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3218 break; 3219 } 3220 3221 InVals.push_back(Val); 3222 } 3223 3224 return Chain; 3225 } 3226 3227 SDValue 3228 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, 3229 bool isTailCall, bool isVarArg, 3230 SelectionDAG &DAG, 3231 SmallVector<std::pair<unsigned, SDValue>, 8> 3232 &RegsToPass, 3233 SDValue InFlag, SDValue Chain, 3234 SDValue &Callee, 3235 int SPDiff, unsigned NumBytes, 3236 const SmallVectorImpl<ISD::InputArg> &Ins, 3237 SmallVectorImpl<SDValue> &InVals) const { 3238 std::vector<EVT> NodeTys; 3239 SmallVector<SDValue, 8> Ops; 3240 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, 3241 isTailCall, RegsToPass, Ops, NodeTys, 3242 PPCSubTarget); 3243 3244 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 3245 if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) 3246 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 3247 3248 // When performing tail call optimization the callee pops its arguments off 3249 // the stack. Account for this here so these bytes can be pushed back on in 3250 // PPCRegisterInfo::eliminateCallFramePseudoInstr. 3251 int BytesCalleePops = 3252 (CallConv == CallingConv::Fast && 3253 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 3254 3255 // Add a register mask operand representing the call-preserved registers. 3256 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 3257 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 3258 assert(Mask && "Missing call preserved mask for calling convention"); 3259 Ops.push_back(DAG.getRegisterMask(Mask)); 3260 3261 if (InFlag.getNode()) 3262 Ops.push_back(InFlag); 3263 3264 // Emit tail call. 3265 if (isTailCall) { 3266 // If this is the first return lowered for this function, add the regs 3267 // to the liveout set for the function. 3268 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 3269 SmallVector<CCValAssign, 16> RVLocs; 3270 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3271 getTargetMachine(), RVLocs, *DAG.getContext()); 3272 CCInfo.AnalyzeCallResult(Ins, RetCC_PPC); 3273 for (unsigned i = 0; i != RVLocs.size(); ++i) 3274 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 3275 } 3276 3277 assert(((Callee.getOpcode() == ISD::Register && 3278 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 3279 Callee.getOpcode() == ISD::TargetExternalSymbol || 3280 Callee.getOpcode() == ISD::TargetGlobalAddress || 3281 isa<ConstantSDNode>(Callee)) && 3282 "Expecting an global address, external symbol, absolute value or register"); 3283 3284 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); 3285 } 3286 3287 // Add a NOP immediately after the branch instruction when using the 64-bit 3288 // SVR4 ABI. At link time, if caller and callee are in a different module and 3289 // thus have a different TOC, the call will be replaced with a call to a stub 3290 // function which saves the current TOC, loads the TOC of the callee and 3291 // branches to the callee. The NOP will be replaced with a load instruction 3292 // which restores the TOC of the caller from the TOC save slot of the current 3293 // stack frame. If caller and callee belong to the same module (and have the 3294 // same TOC), the NOP will remain unchanged. 3295 3296 bool needsTOCRestore = false; 3297 if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { 3298 if (CallOpc == PPCISD::BCTRL_SVR4) { 3299 // This is a call through a function pointer. 3300 // Restore the caller TOC from the save area into R2. 3301 // See PrepareCall() for more information about calls through function 3302 // pointers in the 64-bit SVR4 ABI. 3303 // We are using a target-specific load with r2 hard coded, because the 3304 // result of a target-independent load would never go directly into r2, 3305 // since r2 is a reserved register (which prevents the register allocator 3306 // from allocating it), resulting in an additional register being 3307 // allocated and an unnecessary move instruction being generated. 3308 needsTOCRestore = true; 3309 } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) { 3310 // Otherwise insert NOP for non-local calls. 3311 CallOpc = PPCISD::CALL_NOP_SVR4; 3312 } 3313 } 3314 3315 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 3316 InFlag = Chain.getValue(1); 3317 3318 if (needsTOCRestore) { 3319 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3320 Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); 3321 InFlag = Chain.getValue(1); 3322 } 3323 3324 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3325 DAG.getIntPtrConstant(BytesCalleePops, true), 3326 InFlag); 3327 if (!Ins.empty()) 3328 InFlag = Chain.getValue(1); 3329 3330 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 3331 Ins, dl, DAG, InVals); 3332 } 3333 3334 SDValue 3335 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 3336 SmallVectorImpl<SDValue> &InVals) const { 3337 SelectionDAG &DAG = CLI.DAG; 3338 DebugLoc &dl = CLI.DL; 3339 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; 3340 SmallVector<SDValue, 32> &OutVals = CLI.OutVals; 3341 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; 3342 SDValue Chain = CLI.Chain; 3343 SDValue Callee = CLI.Callee; 3344 bool &isTailCall = CLI.IsTailCall; 3345 CallingConv::ID CallConv = CLI.CallConv; 3346 bool isVarArg = CLI.IsVarArg; 3347 3348 if (isTailCall) 3349 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 3350 Ins, DAG); 3351 3352 if (PPCSubTarget.isSVR4ABI()) { 3353 if (PPCSubTarget.isPPC64()) 3354 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 3355 isTailCall, Outs, OutVals, Ins, 3356 dl, DAG, InVals); 3357 else 3358 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 3359 isTailCall, Outs, OutVals, Ins, 3360 dl, DAG, InVals); 3361 } 3362 3363 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 3364 isTailCall, Outs, OutVals, Ins, 3365 dl, DAG, InVals); 3366 } 3367 3368 SDValue 3369 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, 3370 CallingConv::ID CallConv, bool isVarArg, 3371 bool isTailCall, 3372 const SmallVectorImpl<ISD::OutputArg> &Outs, 3373 const SmallVectorImpl<SDValue> &OutVals, 3374 const SmallVectorImpl<ISD::InputArg> &Ins, 3375 DebugLoc dl, SelectionDAG &DAG, 3376 SmallVectorImpl<SDValue> &InVals) const { 3377 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 3378 // of the 32-bit SVR4 ABI stack frame layout. 3379 3380 assert((CallConv == CallingConv::C || 3381 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 3382 3383 unsigned PtrByteSize = 4; 3384 3385 MachineFunction &MF = DAG.getMachineFunction(); 3386 3387 // Mark this function as potentially containing a function that contains a 3388 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3389 // and restoring the callers stack pointer in this functions epilog. This is 3390 // done because by tail calling the called function might overwrite the value 3391 // in this function's (MF) stack pointer stack slot 0(SP). 3392 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3393 CallConv == CallingConv::Fast) 3394 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3395 3396 // Count how many bytes are to be pushed on the stack, including the linkage 3397 // area, parameter list area and the part of the local variable space which 3398 // contains copies of aggregates which are passed by value. 3399 3400 // Assign locations to all of the outgoing arguments. 3401 SmallVector<CCValAssign, 16> ArgLocs; 3402 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3403 getTargetMachine(), ArgLocs, *DAG.getContext()); 3404 3405 // Reserve space for the linkage area on the stack. 3406 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 3407 3408 if (isVarArg) { 3409 // Handle fixed and variable vector arguments differently. 3410 // Fixed vector arguments go into registers as long as registers are 3411 // available. Variable vector arguments always go into memory. 3412 unsigned NumArgs = Outs.size(); 3413 3414 for (unsigned i = 0; i != NumArgs; ++i) { 3415 MVT ArgVT = Outs[i].VT; 3416 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3417 bool Result; 3418 3419 if (Outs[i].IsFixed) { 3420 Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 3421 CCInfo); 3422 } else { 3423 Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 3424 ArgFlags, CCInfo); 3425 } 3426 3427 if (Result) { 3428 #ifndef NDEBUG 3429 errs() << "Call operand #" << i << " has unhandled type " 3430 << EVT(ArgVT).getEVTString() << "\n"; 3431 #endif 3432 llvm_unreachable(0); 3433 } 3434 } 3435 } else { 3436 // All arguments are treated the same. 3437 CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4); 3438 } 3439 3440 // Assign locations to all of the outgoing aggregate by value arguments. 3441 SmallVector<CCValAssign, 16> ByValArgLocs; 3442 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3443 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 3444 3445 // Reserve stack space for the allocations in CCInfo. 3446 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 3447 3448 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal); 3449 3450 // Size of the linkage area, parameter list area and the part of the local 3451 // space variable where copies of aggregates which are passed by value are 3452 // stored. 3453 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 3454 3455 // Calculate by how many bytes the stack has to be adjusted in case of tail 3456 // call optimization. 3457 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 3458 3459 // Adjust the stack pointer for the new arguments... 3460 // These operations are automatically eliminated by the prolog/epilog pass 3461 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 3462 SDValue CallSeqStart = Chain; 3463 3464 // Load the return address and frame pointer so it can be moved somewhere else 3465 // later. 3466 SDValue LROp, FPOp; 3467 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 3468 dl); 3469 3470 // Set up a copy of the stack pointer for use loading and storing any 3471 // arguments that may not fit in the registers available for argument 3472 // passing. 3473 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3474 3475 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3476 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 3477 SmallVector<SDValue, 8> MemOpChains; 3478 3479 bool seenFloatArg = false; 3480 // Walk the register/memloc assignments, inserting copies/loads. 3481 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 3482 i != e; 3483 ++i) { 3484 CCValAssign &VA = ArgLocs[i]; 3485 SDValue Arg = OutVals[i]; 3486 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3487 3488 if (Flags.isByVal()) { 3489 // Argument is an aggregate which is passed by value, thus we need to 3490 // create a copy of it in the local variable space of the current stack 3491 // frame (which is the stack frame of the caller) and pass the address of 3492 // this copy to the callee. 3493 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 3494 CCValAssign &ByValVA = ByValArgLocs[j++]; 3495 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 3496 3497 // Memory reserved in the local variable space of the callers stack frame. 3498 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 3499 3500 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3501 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3502 3503 // Create a copy of the argument in the local area of the current 3504 // stack frame. 3505 SDValue MemcpyCall = 3506 CreateCopyOfByValArgument(Arg, PtrOff, 3507 CallSeqStart.getNode()->getOperand(0), 3508 Flags, DAG, dl); 3509 3510 // This must go outside the CALLSEQ_START..END. 3511 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3512 CallSeqStart.getNode()->getOperand(1)); 3513 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3514 NewCallSeqStart.getNode()); 3515 Chain = CallSeqStart = NewCallSeqStart; 3516 3517 // Pass the address of the aggregate copy on the stack either in a 3518 // physical register or in the parameter list area of the current stack 3519 // frame to the callee. 3520 Arg = PtrOff; 3521 } 3522 3523 if (VA.isRegLoc()) { 3524 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 3525 // Put argument in a physical register. 3526 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 3527 } else { 3528 // Put argument in the parameter list area of the current stack frame. 3529 assert(VA.isMemLoc()); 3530 unsigned LocMemOffset = VA.getLocMemOffset(); 3531 3532 if (!isTailCall) { 3533 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3534 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3535 3536 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3537 MachinePointerInfo(), 3538 false, false, 0)); 3539 } else { 3540 // Calculate and remember argument location. 3541 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 3542 TailCallArguments); 3543 } 3544 } 3545 } 3546 3547 if (!MemOpChains.empty()) 3548 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3549 &MemOpChains[0], MemOpChains.size()); 3550 3551 // Build a sequence of copy-to-reg nodes chained together with token chain 3552 // and flag operands which copy the outgoing args into the appropriate regs. 3553 SDValue InFlag; 3554 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 3555 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 3556 RegsToPass[i].second, InFlag); 3557 InFlag = Chain.getValue(1); 3558 } 3559 3560 // Set CR bit 6 to true if this is a vararg call with floating args passed in 3561 // registers. 3562 if (isVarArg) { 3563 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3564 SDValue Ops[] = { Chain, InFlag }; 3565 3566 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 3567 dl, VTs, Ops, InFlag.getNode() ? 2 : 1); 3568 3569 InFlag = Chain.getValue(1); 3570 } 3571 3572 if (isTailCall) 3573 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 3574 false, TailCallArguments); 3575 3576 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 3577 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 3578 Ins, InVals); 3579 } 3580 3581 // Copy an argument into memory, being careful to do this outside the 3582 // call sequence for the call to which the argument belongs. 3583 SDValue 3584 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 3585 SDValue CallSeqStart, 3586 ISD::ArgFlagsTy Flags, 3587 SelectionDAG &DAG, 3588 DebugLoc dl) const { 3589 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 3590 CallSeqStart.getNode()->getOperand(0), 3591 Flags, DAG, dl); 3592 // The MEMCPY must go outside the CALLSEQ_START..END. 3593 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3594 CallSeqStart.getNode()->getOperand(1)); 3595 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3596 NewCallSeqStart.getNode()); 3597 return NewCallSeqStart; 3598 } 3599 3600 SDValue 3601 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, 3602 CallingConv::ID CallConv, bool isVarArg, 3603 bool isTailCall, 3604 const SmallVectorImpl<ISD::OutputArg> &Outs, 3605 const SmallVectorImpl<SDValue> &OutVals, 3606 const SmallVectorImpl<ISD::InputArg> &Ins, 3607 DebugLoc dl, SelectionDAG &DAG, 3608 SmallVectorImpl<SDValue> &InVals) const { 3609 3610 unsigned NumOps = Outs.size(); 3611 3612 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3613 unsigned PtrByteSize = 8; 3614 3615 MachineFunction &MF = DAG.getMachineFunction(); 3616 3617 // Mark this function as potentially containing a function that contains a 3618 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3619 // and restoring the callers stack pointer in this functions epilog. This is 3620 // done because by tail calling the called function might overwrite the value 3621 // in this function's (MF) stack pointer stack slot 0(SP). 3622 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3623 CallConv == CallingConv::Fast) 3624 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3625 3626 unsigned nAltivecParamsAtEnd = 0; 3627 3628 // Count how many bytes are to be pushed on the stack, including the linkage 3629 // area, and parameter passing area. We start with at least 48 bytes, which 3630 // is reserved space for [SP][CR][LR][3 x unused]. 3631 // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result 3632 // of this call. 3633 unsigned NumBytes = 3634 CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv, 3635 Outs, OutVals, nAltivecParamsAtEnd); 3636 3637 // Calculate by how many bytes the stack has to be adjusted in case of tail 3638 // call optimization. 3639 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 3640 3641 // To protect arguments on the stack from being clobbered in a tail call, 3642 // force all the loads to happen before doing any other lowering. 3643 if (isTailCall) 3644 Chain = DAG.getStackArgumentTokenFactor(Chain); 3645 3646 // Adjust the stack pointer for the new arguments... 3647 // These operations are automatically eliminated by the prolog/epilog pass 3648 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 3649 SDValue CallSeqStart = Chain; 3650 3651 // Load the return address and frame pointer so it can be move somewhere else 3652 // later. 3653 SDValue LROp, FPOp; 3654 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 3655 dl); 3656 3657 // Set up a copy of the stack pointer for use loading and storing any 3658 // arguments that may not fit in the registers available for argument 3659 // passing. 3660 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3661 3662 // Figure out which arguments are going to go in registers, and which in 3663 // memory. Also, if this is a vararg function, floating point operations 3664 // must be stored to our stack, and loaded into integer regs as well, if 3665 // any integer regs are available for argument passing. 3666 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); 3667 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3668 3669 static const uint16_t GPR[] = { 3670 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3671 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3672 }; 3673 static const uint16_t *FPR = GetFPR(); 3674 3675 static const uint16_t VR[] = { 3676 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3677 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3678 }; 3679 const unsigned NumGPRs = array_lengthof(GPR); 3680 const unsigned NumFPRs = 13; 3681 const unsigned NumVRs = array_lengthof(VR); 3682 3683 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3684 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 3685 3686 SmallVector<SDValue, 8> MemOpChains; 3687 for (unsigned i = 0; i != NumOps; ++i) { 3688 SDValue Arg = OutVals[i]; 3689 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3690 3691 // PtrOff will be used to store the current argument to the stack if a 3692 // register cannot be found for it. 3693 SDValue PtrOff; 3694 3695 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 3696 3697 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 3698 3699 // Promote integers to 64-bit values. 3700 if (Arg.getValueType() == MVT::i32) { 3701 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 3702 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 3703 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 3704 } 3705 3706 // FIXME memcpy is used way more than necessary. Correctness first. 3707 // Note: "by value" is code for passing a structure by value, not 3708 // basic types. 3709 if (Flags.isByVal()) { 3710 // Note: Size includes alignment padding, so 3711 // struct x { short a; char b; } 3712 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 3713 // These are the proper values we need for right-justifying the 3714 // aggregate in a parameter register. 3715 unsigned Size = Flags.getByValSize(); 3716 3717 // An empty aggregate parameter takes up no storage and no 3718 // registers. 3719 if (Size == 0) 3720 continue; 3721 3722 // All aggregates smaller than 8 bytes must be passed right-justified. 3723 if (Size==1 || Size==2 || Size==4) { 3724 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 3725 if (GPR_idx != NumGPRs) { 3726 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 3727 MachinePointerInfo(), VT, 3728 false, false, 0); 3729 MemOpChains.push_back(Load.getValue(1)); 3730 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3731 3732 ArgOffset += PtrByteSize; 3733 continue; 3734 } 3735 } 3736 3737 if (GPR_idx == NumGPRs && Size < 8) { 3738 SDValue Const = DAG.getConstant(PtrByteSize - Size, 3739 PtrOff.getValueType()); 3740 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 3741 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 3742 CallSeqStart, 3743 Flags, DAG, dl); 3744 ArgOffset += PtrByteSize; 3745 continue; 3746 } 3747 // Copy entire object into memory. There are cases where gcc-generated 3748 // code assumes it is there, even if it could be put entirely into 3749 // registers. (This is not what the doc says.) 3750 3751 // FIXME: The above statement is likely due to a misunderstanding of the 3752 // documents. All arguments must be copied into the parameter area BY 3753 // THE CALLEE in the event that the callee takes the address of any 3754 // formal argument. That has not yet been implemented. However, it is 3755 // reasonable to use the stack area as a staging area for the register 3756 // load. 3757 3758 // Skip this for small aggregates, as we will use the same slot for a 3759 // right-justified copy, below. 3760 if (Size >= 8) 3761 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 3762 CallSeqStart, 3763 Flags, DAG, dl); 3764 3765 // When a register is available, pass a small aggregate right-justified. 3766 if (Size < 8 && GPR_idx != NumGPRs) { 3767 // The easiest way to get this right-justified in a register 3768 // is to copy the structure into the rightmost portion of a 3769 // local variable slot, then load the whole slot into the 3770 // register. 3771 // FIXME: The memcpy seems to produce pretty awful code for 3772 // small aggregates, particularly for packed ones. 3773 // FIXME: It would be preferable to use the slot in the 3774 // parameter save area instead of a new local variable. 3775 SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); 3776 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 3777 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 3778 CallSeqStart, 3779 Flags, DAG, dl); 3780 3781 // Load the slot into the register. 3782 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, 3783 MachinePointerInfo(), 3784 false, false, false, 0); 3785 MemOpChains.push_back(Load.getValue(1)); 3786 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3787 3788 // Done with this argument. 3789 ArgOffset += PtrByteSize; 3790 continue; 3791 } 3792 3793 // For aggregates larger than PtrByteSize, copy the pieces of the 3794 // object that fit into registers from the parameter save area. 3795 for (unsigned j=0; j<Size; j+=PtrByteSize) { 3796 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 3797 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 3798 if (GPR_idx != NumGPRs) { 3799 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 3800 MachinePointerInfo(), 3801 false, false, false, 0); 3802 MemOpChains.push_back(Load.getValue(1)); 3803 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3804 ArgOffset += PtrByteSize; 3805 } else { 3806 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 3807 break; 3808 } 3809 } 3810 continue; 3811 } 3812 3813 switch (Arg.getValueType().getSimpleVT().SimpleTy) { 3814 default: llvm_unreachable("Unexpected ValueType for argument!"); 3815 case MVT::i32: 3816 case MVT::i64: 3817 if (GPR_idx != NumGPRs) { 3818 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 3819 } else { 3820 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 3821 true, isTailCall, false, MemOpChains, 3822 TailCallArguments, dl); 3823 } 3824 ArgOffset += PtrByteSize; 3825 break; 3826 case MVT::f32: 3827 case MVT::f64: 3828 if (FPR_idx != NumFPRs) { 3829 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 3830 3831 if (isVarArg) { 3832 // A single float or an aggregate containing only a single float 3833 // must be passed right-justified in the stack doubleword, and 3834 // in the GPR, if one is available. 3835 SDValue StoreOff; 3836 if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) { 3837 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 3838 StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 3839 } else 3840 StoreOff = PtrOff; 3841 3842 SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff, 3843 MachinePointerInfo(), false, false, 0); 3844 MemOpChains.push_back(Store); 3845 3846 // Float varargs are always shadowed in available integer registers 3847 if (GPR_idx != NumGPRs) { 3848 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 3849 MachinePointerInfo(), false, false, 3850 false, 0); 3851 MemOpChains.push_back(Load.getValue(1)); 3852 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3853 } 3854 } else if (GPR_idx != NumGPRs) 3855 // If we have any FPRs remaining, we may also have GPRs remaining. 3856 ++GPR_idx; 3857 } else { 3858 // Single-precision floating-point values are mapped to the 3859 // second (rightmost) word of the stack doubleword. 3860 if (Arg.getValueType() == MVT::f32) { 3861 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 3862 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 3863 } 3864 3865 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 3866 true, isTailCall, false, MemOpChains, 3867 TailCallArguments, dl); 3868 } 3869 ArgOffset += 8; 3870 break; 3871 case MVT::v4f32: 3872 case MVT::v4i32: 3873 case MVT::v8i16: 3874 case MVT::v16i8: 3875 if (isVarArg) { 3876 // These go aligned on the stack, or in the corresponding R registers 3877 // when within range. The Darwin PPC ABI doc claims they also go in 3878 // V registers; in fact gcc does this only for arguments that are 3879 // prototyped, not for those that match the ... We do it for all 3880 // arguments, seems to work. 3881 while (ArgOffset % 16 !=0) { 3882 ArgOffset += PtrByteSize; 3883 if (GPR_idx != NumGPRs) 3884 GPR_idx++; 3885 } 3886 // We could elide this store in the case where the object fits 3887 // entirely in R registers. Maybe later. 3888 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3889 DAG.getConstant(ArgOffset, PtrVT)); 3890 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 3891 MachinePointerInfo(), false, false, 0); 3892 MemOpChains.push_back(Store); 3893 if (VR_idx != NumVRs) { 3894 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 3895 MachinePointerInfo(), 3896 false, false, false, 0); 3897 MemOpChains.push_back(Load.getValue(1)); 3898 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 3899 } 3900 ArgOffset += 16; 3901 for (unsigned i=0; i<16; i+=PtrByteSize) { 3902 if (GPR_idx == NumGPRs) 3903 break; 3904 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 3905 DAG.getConstant(i, PtrVT)); 3906 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 3907 false, false, false, 0); 3908 MemOpChains.push_back(Load.getValue(1)); 3909 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3910 } 3911 break; 3912 } 3913 3914 // Non-varargs Altivec params generally go in registers, but have 3915 // stack space allocated at the end. 3916 if (VR_idx != NumVRs) { 3917 // Doesn't have GPR space allocated. 3918 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 3919 } else { 3920 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 3921 true, isTailCall, true, MemOpChains, 3922 TailCallArguments, dl); 3923 ArgOffset += 16; 3924 } 3925 break; 3926 } 3927 } 3928 3929 if (!MemOpChains.empty()) 3930 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3931 &MemOpChains[0], MemOpChains.size()); 3932 3933 // Check if this is an indirect call (MTCTR/BCTRL). 3934 // See PrepareCall() for more information about calls through function 3935 // pointers in the 64-bit SVR4 ABI. 3936 if (!isTailCall && 3937 !dyn_cast<GlobalAddressSDNode>(Callee) && 3938 !dyn_cast<ExternalSymbolSDNode>(Callee) && 3939 !isBLACompatibleAddress(Callee, DAG)) { 3940 // Load r2 into a virtual register and store it to the TOC save area. 3941 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 3942 // TOC save area offset. 3943 SDValue PtrOff = DAG.getIntPtrConstant(40); 3944 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 3945 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), 3946 false, false, 0); 3947 // R12 must contain the address of an indirect callee. This does not 3948 // mean the MTCTR instruction must use R12; it's easier to model this 3949 // as an extra parameter, so do that. 3950 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 3951 } 3952 3953 // Build a sequence of copy-to-reg nodes chained together with token chain 3954 // and flag operands which copy the outgoing args into the appropriate regs. 3955 SDValue InFlag; 3956 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 3957 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 3958 RegsToPass[i].second, InFlag); 3959 InFlag = Chain.getValue(1); 3960 } 3961 3962 if (isTailCall) 3963 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, 3964 FPOp, true, TailCallArguments); 3965 3966 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 3967 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 3968 Ins, InVals); 3969 } 3970 3971 SDValue 3972 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 3973 CallingConv::ID CallConv, bool isVarArg, 3974 bool isTailCall, 3975 const SmallVectorImpl<ISD::OutputArg> &Outs, 3976 const SmallVectorImpl<SDValue> &OutVals, 3977 const SmallVectorImpl<ISD::InputArg> &Ins, 3978 DebugLoc dl, SelectionDAG &DAG, 3979 SmallVectorImpl<SDValue> &InVals) const { 3980 3981 unsigned NumOps = Outs.size(); 3982 3983 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3984 bool isPPC64 = PtrVT == MVT::i64; 3985 unsigned PtrByteSize = isPPC64 ? 8 : 4; 3986 3987 MachineFunction &MF = DAG.getMachineFunction(); 3988 3989 // Mark this function as potentially containing a function that contains a 3990 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3991 // and restoring the callers stack pointer in this functions epilog. This is 3992 // done because by tail calling the called function might overwrite the value 3993 // in this function's (MF) stack pointer stack slot 0(SP). 3994 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3995 CallConv == CallingConv::Fast) 3996 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3997 3998 unsigned nAltivecParamsAtEnd = 0; 3999 4000 // Count how many bytes are to be pushed on the stack, including the linkage 4001 // area, and parameter passing area. We start with 24/48 bytes, which is 4002 // prereserved space for [SP][CR][LR][3 x unused]. 4003 unsigned NumBytes = 4004 CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, 4005 Outs, OutVals, 4006 nAltivecParamsAtEnd); 4007 4008 // Calculate by how many bytes the stack has to be adjusted in case of tail 4009 // call optimization. 4010 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4011 4012 // To protect arguments on the stack from being clobbered in a tail call, 4013 // force all the loads to happen before doing any other lowering. 4014 if (isTailCall) 4015 Chain = DAG.getStackArgumentTokenFactor(Chain); 4016 4017 // Adjust the stack pointer for the new arguments... 4018 // These operations are automatically eliminated by the prolog/epilog pass 4019 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 4020 SDValue CallSeqStart = Chain; 4021 4022 // Load the return address and frame pointer so it can be move somewhere else 4023 // later. 4024 SDValue LROp, FPOp; 4025 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4026 dl); 4027 4028 // Set up a copy of the stack pointer for use loading and storing any 4029 // arguments that may not fit in the registers available for argument 4030 // passing. 4031 SDValue StackPtr; 4032 if (isPPC64) 4033 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4034 else 4035 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4036 4037 // Figure out which arguments are going to go in registers, and which in 4038 // memory. Also, if this is a vararg function, floating point operations 4039 // must be stored to our stack, and loaded into integer regs as well, if 4040 // any integer regs are available for argument passing. 4041 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 4042 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4043 4044 static const uint16_t GPR_32[] = { // 32-bit registers. 4045 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 4046 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 4047 }; 4048 static const uint16_t GPR_64[] = { // 64-bit registers. 4049 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4050 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4051 }; 4052 static const uint16_t *FPR = GetFPR(); 4053 4054 static const uint16_t VR[] = { 4055 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4056 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4057 }; 4058 const unsigned NumGPRs = array_lengthof(GPR_32); 4059 const unsigned NumFPRs = 13; 4060 const unsigned NumVRs = array_lengthof(VR); 4061 4062 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 4063 4064 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4065 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4066 4067 SmallVector<SDValue, 8> MemOpChains; 4068 for (unsigned i = 0; i != NumOps; ++i) { 4069 SDValue Arg = OutVals[i]; 4070 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4071 4072 // PtrOff will be used to store the current argument to the stack if a 4073 // register cannot be found for it. 4074 SDValue PtrOff; 4075 4076 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4077 4078 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4079 4080 // On PPC64, promote integers to 64-bit values. 4081 if (isPPC64 && Arg.getValueType() == MVT::i32) { 4082 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4083 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4084 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4085 } 4086 4087 // FIXME memcpy is used way more than necessary. Correctness first. 4088 // Note: "by value" is code for passing a structure by value, not 4089 // basic types. 4090 if (Flags.isByVal()) { 4091 unsigned Size = Flags.getByValSize(); 4092 // Very small objects are passed right-justified. Everything else is 4093 // passed left-justified. 4094 if (Size==1 || Size==2) { 4095 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 4096 if (GPR_idx != NumGPRs) { 4097 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4098 MachinePointerInfo(), VT, 4099 false, false, 0); 4100 MemOpChains.push_back(Load.getValue(1)); 4101 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4102 4103 ArgOffset += PtrByteSize; 4104 } else { 4105 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4106 PtrOff.getValueType()); 4107 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4108 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4109 CallSeqStart, 4110 Flags, DAG, dl); 4111 ArgOffset += PtrByteSize; 4112 } 4113 continue; 4114 } 4115 // Copy entire object into memory. There are cases where gcc-generated 4116 // code assumes it is there, even if it could be put entirely into 4117 // registers. (This is not what the doc says.) 4118 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4119 CallSeqStart, 4120 Flags, DAG, dl); 4121 4122 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 4123 // copy the pieces of the object that fit into registers from the 4124 // parameter save area. 4125 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4126 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4127 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4128 if (GPR_idx != NumGPRs) { 4129 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4130 MachinePointerInfo(), 4131 false, false, false, 0); 4132 MemOpChains.push_back(Load.getValue(1)); 4133 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4134 ArgOffset += PtrByteSize; 4135 } else { 4136 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4137 break; 4138 } 4139 } 4140 continue; 4141 } 4142 4143 switch (Arg.getValueType().getSimpleVT().SimpleTy) { 4144 default: llvm_unreachable("Unexpected ValueType for argument!"); 4145 case MVT::i32: 4146 case MVT::i64: 4147 if (GPR_idx != NumGPRs) { 4148 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4149 } else { 4150 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4151 isPPC64, isTailCall, false, MemOpChains, 4152 TailCallArguments, dl); 4153 } 4154 ArgOffset += PtrByteSize; 4155 break; 4156 case MVT::f32: 4157 case MVT::f64: 4158 if (FPR_idx != NumFPRs) { 4159 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4160 4161 if (isVarArg) { 4162 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4163 MachinePointerInfo(), false, false, 0); 4164 MemOpChains.push_back(Store); 4165 4166 // Float varargs are always shadowed in available integer registers 4167 if (GPR_idx != NumGPRs) { 4168 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4169 MachinePointerInfo(), false, false, 4170 false, 0); 4171 MemOpChains.push_back(Load.getValue(1)); 4172 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4173 } 4174 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 4175 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4176 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4177 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4178 MachinePointerInfo(), 4179 false, false, false, 0); 4180 MemOpChains.push_back(Load.getValue(1)); 4181 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4182 } 4183 } else { 4184 // If we have any FPRs remaining, we may also have GPRs remaining. 4185 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 4186 // GPRs. 4187 if (GPR_idx != NumGPRs) 4188 ++GPR_idx; 4189 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 4190 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 4191 ++GPR_idx; 4192 } 4193 } else 4194 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4195 isPPC64, isTailCall, false, MemOpChains, 4196 TailCallArguments, dl); 4197 if (isPPC64) 4198 ArgOffset += 8; 4199 else 4200 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 4201 break; 4202 case MVT::v4f32: 4203 case MVT::v4i32: 4204 case MVT::v8i16: 4205 case MVT::v16i8: 4206 if (isVarArg) { 4207 // These go aligned on the stack, or in the corresponding R registers 4208 // when within range. The Darwin PPC ABI doc claims they also go in 4209 // V registers; in fact gcc does this only for arguments that are 4210 // prototyped, not for those that match the ... We do it for all 4211 // arguments, seems to work. 4212 while (ArgOffset % 16 !=0) { 4213 ArgOffset += PtrByteSize; 4214 if (GPR_idx != NumGPRs) 4215 GPR_idx++; 4216 } 4217 // We could elide this store in the case where the object fits 4218 // entirely in R registers. Maybe later. 4219 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4220 DAG.getConstant(ArgOffset, PtrVT)); 4221 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4222 MachinePointerInfo(), false, false, 0); 4223 MemOpChains.push_back(Store); 4224 if (VR_idx != NumVRs) { 4225 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4226 MachinePointerInfo(), 4227 false, false, false, 0); 4228 MemOpChains.push_back(Load.getValue(1)); 4229 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 4230 } 4231 ArgOffset += 16; 4232 for (unsigned i=0; i<16; i+=PtrByteSize) { 4233 if (GPR_idx == NumGPRs) 4234 break; 4235 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4236 DAG.getConstant(i, PtrVT)); 4237 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4238 false, false, false, 0); 4239 MemOpChains.push_back(Load.getValue(1)); 4240 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4241 } 4242 break; 4243 } 4244 4245 // Non-varargs Altivec params generally go in registers, but have 4246 // stack space allocated at the end. 4247 if (VR_idx != NumVRs) { 4248 // Doesn't have GPR space allocated. 4249 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 4250 } else if (nAltivecParamsAtEnd==0) { 4251 // We are emitting Altivec params in order. 4252 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4253 isPPC64, isTailCall, true, MemOpChains, 4254 TailCallArguments, dl); 4255 ArgOffset += 16; 4256 } 4257 break; 4258 } 4259 } 4260 // If all Altivec parameters fit in registers, as they usually do, 4261 // they get stack space following the non-Altivec parameters. We 4262 // don't track this here because nobody below needs it. 4263 // If there are more Altivec parameters than fit in registers emit 4264 // the stores here. 4265 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 4266 unsigned j = 0; 4267 // Offset is aligned; skip 1st 12 params which go in V registers. 4268 ArgOffset = ((ArgOffset+15)/16)*16; 4269 ArgOffset += 12*16; 4270 for (unsigned i = 0; i != NumOps; ++i) { 4271 SDValue Arg = OutVals[i]; 4272 EVT ArgType = Outs[i].VT; 4273 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 4274 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 4275 if (++j > NumVRs) { 4276 SDValue PtrOff; 4277 // We are emitting Altivec params in order. 4278 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4279 isPPC64, isTailCall, true, MemOpChains, 4280 TailCallArguments, dl); 4281 ArgOffset += 16; 4282 } 4283 } 4284 } 4285 } 4286 4287 if (!MemOpChains.empty()) 4288 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 4289 &MemOpChains[0], MemOpChains.size()); 4290 4291 // On Darwin, R12 must contain the address of an indirect callee. This does 4292 // not mean the MTCTR instruction must use R12; it's easier to model this as 4293 // an extra parameter, so do that. 4294 if (!isTailCall && 4295 !dyn_cast<GlobalAddressSDNode>(Callee) && 4296 !dyn_cast<ExternalSymbolSDNode>(Callee) && 4297 !isBLACompatibleAddress(Callee, DAG)) 4298 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 4299 PPC::R12), Callee)); 4300 4301 // Build a sequence of copy-to-reg nodes chained together with token chain 4302 // and flag operands which copy the outgoing args into the appropriate regs. 4303 SDValue InFlag; 4304 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4305 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4306 RegsToPass[i].second, InFlag); 4307 InFlag = Chain.getValue(1); 4308 } 4309 4310 if (isTailCall) 4311 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 4312 FPOp, true, TailCallArguments); 4313 4314 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 4315 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4316 Ins, InVals); 4317 } 4318 4319 bool 4320 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 4321 MachineFunction &MF, bool isVarArg, 4322 const SmallVectorImpl<ISD::OutputArg> &Outs, 4323 LLVMContext &Context) const { 4324 SmallVector<CCValAssign, 16> RVLocs; 4325 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), 4326 RVLocs, Context); 4327 return CCInfo.CheckReturn(Outs, RetCC_PPC); 4328 } 4329 4330 SDValue 4331 PPCTargetLowering::LowerReturn(SDValue Chain, 4332 CallingConv::ID CallConv, bool isVarArg, 4333 const SmallVectorImpl<ISD::OutputArg> &Outs, 4334 const SmallVectorImpl<SDValue> &OutVals, 4335 DebugLoc dl, SelectionDAG &DAG) const { 4336 4337 SmallVector<CCValAssign, 16> RVLocs; 4338 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4339 getTargetMachine(), RVLocs, *DAG.getContext()); 4340 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 4341 4342 // If this is the first return lowered for this function, add the regs to the 4343 // liveout set for the function. 4344 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 4345 for (unsigned i = 0; i != RVLocs.size(); ++i) 4346 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 4347 } 4348 4349 SDValue Flag; 4350 4351 // Copy the result values into the output registers. 4352 for (unsigned i = 0; i != RVLocs.size(); ++i) { 4353 CCValAssign &VA = RVLocs[i]; 4354 assert(VA.isRegLoc() && "Can only return in registers!"); 4355 4356 SDValue Arg = OutVals[i]; 4357 4358 switch (VA.getLocInfo()) { 4359 default: llvm_unreachable("Unknown loc info!"); 4360 case CCValAssign::Full: break; 4361 case CCValAssign::AExt: 4362 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 4363 break; 4364 case CCValAssign::ZExt: 4365 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 4366 break; 4367 case CCValAssign::SExt: 4368 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 4369 break; 4370 } 4371 4372 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 4373 Flag = Chain.getValue(1); 4374 } 4375 4376 if (Flag.getNode()) 4377 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 4378 else 4379 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain); 4380 } 4381 4382 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 4383 const PPCSubtarget &Subtarget) const { 4384 // When we pop the dynamic allocation we need to restore the SP link. 4385 DebugLoc dl = Op.getDebugLoc(); 4386 4387 // Get the corect type for pointers. 4388 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4389 4390 // Construct the stack pointer operand. 4391 bool isPPC64 = Subtarget.isPPC64(); 4392 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 4393 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 4394 4395 // Get the operands for the STACKRESTORE. 4396 SDValue Chain = Op.getOperand(0); 4397 SDValue SaveSP = Op.getOperand(1); 4398 4399 // Load the old link SP. 4400 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 4401 MachinePointerInfo(), 4402 false, false, false, 0); 4403 4404 // Restore the stack pointer. 4405 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 4406 4407 // Store the old link SP. 4408 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 4409 false, false, 0); 4410 } 4411 4412 4413 4414 SDValue 4415 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 4416 MachineFunction &MF = DAG.getMachineFunction(); 4417 bool isPPC64 = PPCSubTarget.isPPC64(); 4418 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 4419 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4420 4421 // Get current frame pointer save index. The users of this index will be 4422 // primarily DYNALLOC instructions. 4423 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 4424 int RASI = FI->getReturnAddrSaveIndex(); 4425 4426 // If the frame pointer save index hasn't been defined yet. 4427 if (!RASI) { 4428 // Find out what the fix offset of the frame pointer save area. 4429 int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); 4430 // Allocate the frame index for frame pointer save area. 4431 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); 4432 // Save the result. 4433 FI->setReturnAddrSaveIndex(RASI); 4434 } 4435 return DAG.getFrameIndex(RASI, PtrVT); 4436 } 4437 4438 SDValue 4439 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 4440 MachineFunction &MF = DAG.getMachineFunction(); 4441 bool isPPC64 = PPCSubTarget.isPPC64(); 4442 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 4443 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4444 4445 // Get current frame pointer save index. The users of this index will be 4446 // primarily DYNALLOC instructions. 4447 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 4448 int FPSI = FI->getFramePointerSaveIndex(); 4449 4450 // If the frame pointer save index hasn't been defined yet. 4451 if (!FPSI) { 4452 // Find out what the fix offset of the frame pointer save area. 4453 int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, 4454 isDarwinABI); 4455 4456 // Allocate the frame index for frame pointer save area. 4457 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 4458 // Save the result. 4459 FI->setFramePointerSaveIndex(FPSI); 4460 } 4461 return DAG.getFrameIndex(FPSI, PtrVT); 4462 } 4463 4464 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 4465 SelectionDAG &DAG, 4466 const PPCSubtarget &Subtarget) const { 4467 // Get the inputs. 4468 SDValue Chain = Op.getOperand(0); 4469 SDValue Size = Op.getOperand(1); 4470 DebugLoc dl = Op.getDebugLoc(); 4471 4472 // Get the corect type for pointers. 4473 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4474 // Negate the size. 4475 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 4476 DAG.getConstant(0, PtrVT), Size); 4477 // Construct a node for the frame pointer save index. 4478 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 4479 // Build a DYNALLOC node. 4480 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 4481 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 4482 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); 4483 } 4484 4485 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 4486 /// possible. 4487 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 4488 // Not FP? Not a fsel. 4489 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 4490 !Op.getOperand(2).getValueType().isFloatingPoint()) 4491 return Op; 4492 4493 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 4494 4495 // Cannot handle SETEQ/SETNE. 4496 if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op; 4497 4498 EVT ResVT = Op.getValueType(); 4499 EVT CmpVT = Op.getOperand(0).getValueType(); 4500 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 4501 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 4502 DebugLoc dl = Op.getDebugLoc(); 4503 4504 // If the RHS of the comparison is a 0.0, we don't need to do the 4505 // subtraction at all. 4506 if (isFloatingPointZero(RHS)) 4507 switch (CC) { 4508 default: break; // SETUO etc aren't handled by fsel. 4509 case ISD::SETULT: 4510 case ISD::SETLT: 4511 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 4512 case ISD::SETOGE: 4513 case ISD::SETGE: 4514 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4515 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4516 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 4517 case ISD::SETUGT: 4518 case ISD::SETGT: 4519 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 4520 case ISD::SETOLE: 4521 case ISD::SETLE: 4522 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4523 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4524 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4525 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 4526 } 4527 4528 SDValue Cmp; 4529 switch (CC) { 4530 default: break; // SETUO etc aren't handled by fsel. 4531 case ISD::SETULT: 4532 case ISD::SETLT: 4533 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4534 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4535 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4536 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 4537 case ISD::SETOGE: 4538 case ISD::SETGE: 4539 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4540 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4541 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4542 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4543 case ISD::SETUGT: 4544 case ISD::SETGT: 4545 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 4546 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4547 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4548 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 4549 case ISD::SETOLE: 4550 case ISD::SETLE: 4551 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 4552 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4553 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4554 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4555 } 4556 return Op; 4557 } 4558 4559 // FIXME: Split this code up when LegalizeDAGTypes lands. 4560 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 4561 DebugLoc dl) const { 4562 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 4563 SDValue Src = Op.getOperand(0); 4564 if (Src.getValueType() == MVT::f32) 4565 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 4566 4567 SDValue Tmp; 4568 switch (Op.getValueType().getSimpleVT().SimpleTy) { 4569 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 4570 case MVT::i32: 4571 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : 4572 PPCISD::FCTIDZ, 4573 dl, MVT::f64, Src); 4574 break; 4575 case MVT::i64: 4576 Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src); 4577 break; 4578 } 4579 4580 // Convert the FP value to an int value through memory. 4581 SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); 4582 4583 // Emit a store to the stack slot. 4584 SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 4585 MachinePointerInfo(), false, false, 0); 4586 4587 // Result is a load from the stack slot. If loading 4 bytes, make sure to 4588 // add in a bias. 4589 if (Op.getValueType() == MVT::i32) 4590 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 4591 DAG.getConstant(4, FIPtr.getValueType())); 4592 return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(), 4593 false, false, false, 0); 4594 } 4595 4596 SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, 4597 SelectionDAG &DAG) const { 4598 DebugLoc dl = Op.getDebugLoc(); 4599 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 4600 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 4601 return SDValue(); 4602 4603 if (Op.getOperand(0).getValueType() == MVT::i64) { 4604 SDValue SINT = Op.getOperand(0); 4605 // When converting to single-precision, we actually need to convert 4606 // to double-precision first and then round to single-precision. 4607 // To avoid double-rounding effects during that operation, we have 4608 // to prepare the input operand. Bits that might be truncated when 4609 // converting to double-precision are replaced by a bit that won't 4610 // be lost at this stage, but is below the single-precision rounding 4611 // position. 4612 // 4613 // However, if -enable-unsafe-fp-math is in effect, accept double 4614 // rounding to avoid the extra overhead. 4615 if (Op.getValueType() == MVT::f32 && 4616 !DAG.getTarget().Options.UnsafeFPMath) { 4617 4618 // Twiddle input to make sure the low 11 bits are zero. (If this 4619 // is the case, we are guaranteed the value will fit into the 53 bit 4620 // mantissa of an IEEE double-precision value without rounding.) 4621 // If any of those low 11 bits were not zero originally, make sure 4622 // bit 12 (value 2048) is set instead, so that the final rounding 4623 // to single-precision gets the correct result. 4624 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 4625 SINT, DAG.getConstant(2047, MVT::i64)); 4626 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 4627 Round, DAG.getConstant(2047, MVT::i64)); 4628 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 4629 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 4630 Round, DAG.getConstant(-2048, MVT::i64)); 4631 4632 // However, we cannot use that value unconditionally: if the magnitude 4633 // of the input value is small, the bit-twiddling we did above might 4634 // end up visibly changing the output. Fortunately, in that case, we 4635 // don't need to twiddle bits since the original input will convert 4636 // exactly to double-precision floating-point already. Therefore, 4637 // construct a conditional to use the original value if the top 11 4638 // bits are all sign-bit copies, and use the rounded value computed 4639 // above otherwise. 4640 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 4641 SINT, DAG.getConstant(53, MVT::i32)); 4642 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 4643 Cond, DAG.getConstant(1, MVT::i64)); 4644 Cond = DAG.getSetCC(dl, MVT::i32, 4645 Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); 4646 4647 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 4648 } 4649 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 4650 SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); 4651 if (Op.getValueType() == MVT::f32) 4652 FP = DAG.getNode(ISD::FP_ROUND, dl, 4653 MVT::f32, FP, DAG.getIntPtrConstant(0)); 4654 return FP; 4655 } 4656 4657 assert(Op.getOperand(0).getValueType() == MVT::i32 && 4658 "Unhandled SINT_TO_FP type in custom expander!"); 4659 // Since we only generate this in 64-bit mode, we can take advantage of 4660 // 64-bit registers. In particular, sign extend the input value into the 4661 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 4662 // then lfd it and fcfid it. 4663 MachineFunction &MF = DAG.getMachineFunction(); 4664 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 4665 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 4666 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4667 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 4668 4669 SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32, 4670 Op.getOperand(0)); 4671 4672 // STD the extended value into the stack slot. 4673 MachineMemOperand *MMO = 4674 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), 4675 MachineMemOperand::MOStore, 8, 8); 4676 SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx }; 4677 SDValue Store = 4678 DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other), 4679 Ops, 4, MVT::i64, MMO); 4680 // Load the value as a double. 4681 SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(), 4682 false, false, false, 0); 4683 4684 // FCFID it and return it. 4685 SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); 4686 if (Op.getValueType() == MVT::f32) 4687 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); 4688 return FP; 4689 } 4690 4691 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 4692 SelectionDAG &DAG) const { 4693 DebugLoc dl = Op.getDebugLoc(); 4694 /* 4695 The rounding mode is in bits 30:31 of FPSR, and has the following 4696 settings: 4697 00 Round to nearest 4698 01 Round to 0 4699 10 Round to +inf 4700 11 Round to -inf 4701 4702 FLT_ROUNDS, on the other hand, expects the following: 4703 -1 Undefined 4704 0 Round to 0 4705 1 Round to nearest 4706 2 Round to +inf 4707 3 Round to -inf 4708 4709 To perform the conversion, we do: 4710 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 4711 */ 4712 4713 MachineFunction &MF = DAG.getMachineFunction(); 4714 EVT VT = Op.getValueType(); 4715 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4716 std::vector<EVT> NodeTys; 4717 SDValue MFFSreg, InFlag; 4718 4719 // Save FP Control Word to register 4720 NodeTys.push_back(MVT::f64); // return register 4721 NodeTys.push_back(MVT::Glue); // unused in this context 4722 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); 4723 4724 // Save FP register to stack slot 4725 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 4726 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 4727 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 4728 StackSlot, MachinePointerInfo(), false, false,0); 4729 4730 // Load FP Control Word from low 32 bits of stack slot. 4731 SDValue Four = DAG.getConstant(4, PtrVT); 4732 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 4733 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 4734 false, false, false, 0); 4735 4736 // Transform as necessary 4737 SDValue CWD1 = 4738 DAG.getNode(ISD::AND, dl, MVT::i32, 4739 CWD, DAG.getConstant(3, MVT::i32)); 4740 SDValue CWD2 = 4741 DAG.getNode(ISD::SRL, dl, MVT::i32, 4742 DAG.getNode(ISD::AND, dl, MVT::i32, 4743 DAG.getNode(ISD::XOR, dl, MVT::i32, 4744 CWD, DAG.getConstant(3, MVT::i32)), 4745 DAG.getConstant(3, MVT::i32)), 4746 DAG.getConstant(1, MVT::i32)); 4747 4748 SDValue RetVal = 4749 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 4750 4751 return DAG.getNode((VT.getSizeInBits() < 16 ? 4752 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 4753 } 4754 4755 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 4756 EVT VT = Op.getValueType(); 4757 unsigned BitWidth = VT.getSizeInBits(); 4758 DebugLoc dl = Op.getDebugLoc(); 4759 assert(Op.getNumOperands() == 3 && 4760 VT == Op.getOperand(1).getValueType() && 4761 "Unexpected SHL!"); 4762 4763 // Expand into a bunch of logical ops. Note that these ops 4764 // depend on the PPC behavior for oversized shift amounts. 4765 SDValue Lo = Op.getOperand(0); 4766 SDValue Hi = Op.getOperand(1); 4767 SDValue Amt = Op.getOperand(2); 4768 EVT AmtVT = Amt.getValueType(); 4769 4770 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 4771 DAG.getConstant(BitWidth, AmtVT), Amt); 4772 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 4773 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 4774 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 4775 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 4776 DAG.getConstant(-BitWidth, AmtVT)); 4777 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 4778 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 4779 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 4780 SDValue OutOps[] = { OutLo, OutHi }; 4781 return DAG.getMergeValues(OutOps, 2, dl); 4782 } 4783 4784 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 4785 EVT VT = Op.getValueType(); 4786 DebugLoc dl = Op.getDebugLoc(); 4787 unsigned BitWidth = VT.getSizeInBits(); 4788 assert(Op.getNumOperands() == 3 && 4789 VT == Op.getOperand(1).getValueType() && 4790 "Unexpected SRL!"); 4791 4792 // Expand into a bunch of logical ops. Note that these ops 4793 // depend on the PPC behavior for oversized shift amounts. 4794 SDValue Lo = Op.getOperand(0); 4795 SDValue Hi = Op.getOperand(1); 4796 SDValue Amt = Op.getOperand(2); 4797 EVT AmtVT = Amt.getValueType(); 4798 4799 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 4800 DAG.getConstant(BitWidth, AmtVT), Amt); 4801 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 4802 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 4803 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 4804 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 4805 DAG.getConstant(-BitWidth, AmtVT)); 4806 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 4807 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 4808 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 4809 SDValue OutOps[] = { OutLo, OutHi }; 4810 return DAG.getMergeValues(OutOps, 2, dl); 4811 } 4812 4813 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 4814 DebugLoc dl = Op.getDebugLoc(); 4815 EVT VT = Op.getValueType(); 4816 unsigned BitWidth = VT.getSizeInBits(); 4817 assert(Op.getNumOperands() == 3 && 4818 VT == Op.getOperand(1).getValueType() && 4819 "Unexpected SRA!"); 4820 4821 // Expand into a bunch of logical ops, followed by a select_cc. 4822 SDValue Lo = Op.getOperand(0); 4823 SDValue Hi = Op.getOperand(1); 4824 SDValue Amt = Op.getOperand(2); 4825 EVT AmtVT = Amt.getValueType(); 4826 4827 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 4828 DAG.getConstant(BitWidth, AmtVT), Amt); 4829 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 4830 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 4831 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 4832 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 4833 DAG.getConstant(-BitWidth, AmtVT)); 4834 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 4835 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 4836 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), 4837 Tmp4, Tmp6, ISD::SETLE); 4838 SDValue OutOps[] = { OutLo, OutHi }; 4839 return DAG.getMergeValues(OutOps, 2, dl); 4840 } 4841 4842 //===----------------------------------------------------------------------===// 4843 // Vector related lowering. 4844 // 4845 4846 /// BuildSplatI - Build a canonical splati of Val with an element size of 4847 /// SplatSize. Cast the result to VT. 4848 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 4849 SelectionDAG &DAG, DebugLoc dl) { 4850 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 4851 4852 static const EVT VTys[] = { // canonical VT to use for each size. 4853 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 4854 }; 4855 4856 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 4857 4858 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 4859 if (Val == -1) 4860 SplatSize = 1; 4861 4862 EVT CanonicalVT = VTys[SplatSize-1]; 4863 4864 // Build a canonical splat for this value. 4865 SDValue Elt = DAG.getConstant(Val, MVT::i32); 4866 SmallVector<SDValue, 8> Ops; 4867 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 4868 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, 4869 &Ops[0], Ops.size()); 4870 return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); 4871 } 4872 4873 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 4874 /// specified intrinsic ID. 4875 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 4876 SelectionDAG &DAG, DebugLoc dl, 4877 EVT DestVT = MVT::Other) { 4878 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 4879 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 4880 DAG.getConstant(IID, MVT::i32), LHS, RHS); 4881 } 4882 4883 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 4884 /// specified intrinsic ID. 4885 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 4886 SDValue Op2, SelectionDAG &DAG, 4887 DebugLoc dl, EVT DestVT = MVT::Other) { 4888 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 4889 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 4890 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 4891 } 4892 4893 4894 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 4895 /// amount. The result has the specified value type. 4896 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 4897 EVT VT, SelectionDAG &DAG, DebugLoc dl) { 4898 // Force LHS/RHS to be the right type. 4899 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 4900 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 4901 4902 int Ops[16]; 4903 for (unsigned i = 0; i != 16; ++i) 4904 Ops[i] = i + Amt; 4905 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 4906 return DAG.getNode(ISD::BITCAST, dl, VT, T); 4907 } 4908 4909 // If this is a case we can't handle, return null and let the default 4910 // expansion code take care of it. If we CAN select this case, and if it 4911 // selects to a single instruction, return Op. Otherwise, if we can codegen 4912 // this case more efficiently than a constant pool load, lower it to the 4913 // sequence of ops that should be used. 4914 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 4915 SelectionDAG &DAG) const { 4916 DebugLoc dl = Op.getDebugLoc(); 4917 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 4918 assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 4919 4920 // Check if this is a splat of a constant value. 4921 APInt APSplatBits, APSplatUndef; 4922 unsigned SplatBitSize; 4923 bool HasAnyUndefs; 4924 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 4925 HasAnyUndefs, 0, true) || SplatBitSize > 32) 4926 return SDValue(); 4927 4928 unsigned SplatBits = APSplatBits.getZExtValue(); 4929 unsigned SplatUndef = APSplatUndef.getZExtValue(); 4930 unsigned SplatSize = SplatBitSize / 8; 4931 4932 // First, handle single instruction cases. 4933 4934 // All zeros? 4935 if (SplatBits == 0) { 4936 // Canonicalize all zero vectors to be v4i32. 4937 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 4938 SDValue Z = DAG.getConstant(0, MVT::i32); 4939 Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); 4940 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 4941 } 4942 return Op; 4943 } 4944 4945 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 4946 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 4947 (32-SplatBitSize)); 4948 if (SextVal >= -16 && SextVal <= 15) 4949 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 4950 4951 4952 // Two instruction sequences. 4953 4954 // If this value is in the range [-32,30] and is even, use: 4955 // tmp = VSPLTI[bhw], result = add tmp, tmp 4956 if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { 4957 SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); 4958 Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); 4959 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 4960 } 4961 4962 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 4963 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 4964 // for fneg/fabs. 4965 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 4966 // Make -1 and vspltisw -1: 4967 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 4968 4969 // Make the VSLW intrinsic, computing 0x8000_0000. 4970 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 4971 OnesV, DAG, dl); 4972 4973 // xor by OnesV to invert it. 4974 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 4975 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 4976 } 4977 4978 // Check to see if this is a wide variety of vsplti*, binop self cases. 4979 static const signed char SplatCsts[] = { 4980 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 4981 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 4982 }; 4983 4984 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 4985 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 4986 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 4987 int i = SplatCsts[idx]; 4988 4989 // Figure out what shift amount will be used by altivec if shifted by i in 4990 // this splat size. 4991 unsigned TypeShiftAmt = i & (SplatBitSize-1); 4992 4993 // vsplti + shl self. 4994 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 4995 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 4996 static const unsigned IIDs[] = { // Intrinsic to use for each size. 4997 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 4998 Intrinsic::ppc_altivec_vslw 4999 }; 5000 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5001 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5002 } 5003 5004 // vsplti + srl self. 5005 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5006 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5007 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5008 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 5009 Intrinsic::ppc_altivec_vsrw 5010 }; 5011 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5012 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5013 } 5014 5015 // vsplti + sra self. 5016 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5017 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5018 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5019 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 5020 Intrinsic::ppc_altivec_vsraw 5021 }; 5022 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5023 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5024 } 5025 5026 // vsplti + rol self. 5027 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 5028 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 5029 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5030 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5031 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 5032 Intrinsic::ppc_altivec_vrlw 5033 }; 5034 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5035 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5036 } 5037 5038 // t = vsplti c, result = vsldoi t, t, 1 5039 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 5040 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5041 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); 5042 } 5043 // t = vsplti c, result = vsldoi t, t, 2 5044 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 5045 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5046 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); 5047 } 5048 // t = vsplti c, result = vsldoi t, t, 3 5049 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 5050 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5051 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); 5052 } 5053 } 5054 5055 // Three instruction sequences. 5056 5057 // Odd, in range [17,31]: (vsplti C)-(vsplti -16). 5058 if (SextVal >= 0 && SextVal <= 31) { 5059 SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl); 5060 SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); 5061 LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS); 5062 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); 5063 } 5064 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). 5065 if (SextVal >= -31 && SextVal <= 0) { 5066 SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl); 5067 SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); 5068 LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS); 5069 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); 5070 } 5071 5072 return SDValue(); 5073 } 5074 5075 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 5076 /// the specified operations to build the shuffle. 5077 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 5078 SDValue RHS, SelectionDAG &DAG, 5079 DebugLoc dl) { 5080 unsigned OpNum = (PFEntry >> 26) & 0x0F; 5081 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 5082 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 5083 5084 enum { 5085 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 5086 OP_VMRGHW, 5087 OP_VMRGLW, 5088 OP_VSPLTISW0, 5089 OP_VSPLTISW1, 5090 OP_VSPLTISW2, 5091 OP_VSPLTISW3, 5092 OP_VSLDOI4, 5093 OP_VSLDOI8, 5094 OP_VSLDOI12 5095 }; 5096 5097 if (OpNum == OP_COPY) { 5098 if (LHSID == (1*9+2)*9+3) return LHS; 5099 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 5100 return RHS; 5101 } 5102 5103 SDValue OpLHS, OpRHS; 5104 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 5105 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 5106 5107 int ShufIdxs[16]; 5108 switch (OpNum) { 5109 default: llvm_unreachable("Unknown i32 permute!"); 5110 case OP_VMRGHW: 5111 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 5112 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 5113 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 5114 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 5115 break; 5116 case OP_VMRGLW: 5117 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 5118 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 5119 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 5120 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 5121 break; 5122 case OP_VSPLTISW0: 5123 for (unsigned i = 0; i != 16; ++i) 5124 ShufIdxs[i] = (i&3)+0; 5125 break; 5126 case OP_VSPLTISW1: 5127 for (unsigned i = 0; i != 16; ++i) 5128 ShufIdxs[i] = (i&3)+4; 5129 break; 5130 case OP_VSPLTISW2: 5131 for (unsigned i = 0; i != 16; ++i) 5132 ShufIdxs[i] = (i&3)+8; 5133 break; 5134 case OP_VSPLTISW3: 5135 for (unsigned i = 0; i != 16; ++i) 5136 ShufIdxs[i] = (i&3)+12; 5137 break; 5138 case OP_VSLDOI4: 5139 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 5140 case OP_VSLDOI8: 5141 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 5142 case OP_VSLDOI12: 5143 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 5144 } 5145 EVT VT = OpLHS.getValueType(); 5146 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 5147 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 5148 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 5149 return DAG.getNode(ISD::BITCAST, dl, VT, T); 5150 } 5151 5152 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 5153 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 5154 /// return the code it can be lowered into. Worst case, it can always be 5155 /// lowered into a vperm. 5156 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 5157 SelectionDAG &DAG) const { 5158 DebugLoc dl = Op.getDebugLoc(); 5159 SDValue V1 = Op.getOperand(0); 5160 SDValue V2 = Op.getOperand(1); 5161 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 5162 EVT VT = Op.getValueType(); 5163 5164 // Cases that are handled by instructions that take permute immediates 5165 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 5166 // selected by the instruction selector. 5167 if (V2.getOpcode() == ISD::UNDEF) { 5168 if (PPC::isSplatShuffleMask(SVOp, 1) || 5169 PPC::isSplatShuffleMask(SVOp, 2) || 5170 PPC::isSplatShuffleMask(SVOp, 4) || 5171 PPC::isVPKUWUMShuffleMask(SVOp, true) || 5172 PPC::isVPKUHUMShuffleMask(SVOp, true) || 5173 PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || 5174 PPC::isVMRGLShuffleMask(SVOp, 1, true) || 5175 PPC::isVMRGLShuffleMask(SVOp, 2, true) || 5176 PPC::isVMRGLShuffleMask(SVOp, 4, true) || 5177 PPC::isVMRGHShuffleMask(SVOp, 1, true) || 5178 PPC::isVMRGHShuffleMask(SVOp, 2, true) || 5179 PPC::isVMRGHShuffleMask(SVOp, 4, true)) { 5180 return Op; 5181 } 5182 } 5183 5184 // Altivec has a variety of "shuffle immediates" that take two vector inputs 5185 // and produce a fixed permutation. If any of these match, do not lower to 5186 // VPERM. 5187 if (PPC::isVPKUWUMShuffleMask(SVOp, false) || 5188 PPC::isVPKUHUMShuffleMask(SVOp, false) || 5189 PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || 5190 PPC::isVMRGLShuffleMask(SVOp, 1, false) || 5191 PPC::isVMRGLShuffleMask(SVOp, 2, false) || 5192 PPC::isVMRGLShuffleMask(SVOp, 4, false) || 5193 PPC::isVMRGHShuffleMask(SVOp, 1, false) || 5194 PPC::isVMRGHShuffleMask(SVOp, 2, false) || 5195 PPC::isVMRGHShuffleMask(SVOp, 4, false)) 5196 return Op; 5197 5198 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 5199 // perfect shuffle table to emit an optimal matching sequence. 5200 ArrayRef<int> PermMask = SVOp->getMask(); 5201 5202 unsigned PFIndexes[4]; 5203 bool isFourElementShuffle = true; 5204 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 5205 unsigned EltNo = 8; // Start out undef. 5206 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 5207 if (PermMask[i*4+j] < 0) 5208 continue; // Undef, ignore it. 5209 5210 unsigned ByteSource = PermMask[i*4+j]; 5211 if ((ByteSource & 3) != j) { 5212 isFourElementShuffle = false; 5213 break; 5214 } 5215 5216 if (EltNo == 8) { 5217 EltNo = ByteSource/4; 5218 } else if (EltNo != ByteSource/4) { 5219 isFourElementShuffle = false; 5220 break; 5221 } 5222 } 5223 PFIndexes[i] = EltNo; 5224 } 5225 5226 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 5227 // perfect shuffle vector to determine if it is cost effective to do this as 5228 // discrete instructions, or whether we should use a vperm. 5229 if (isFourElementShuffle) { 5230 // Compute the index in the perfect shuffle table. 5231 unsigned PFTableIndex = 5232 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5233 5234 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5235 unsigned Cost = (PFEntry >> 30); 5236 5237 // Determining when to avoid vperm is tricky. Many things affect the cost 5238 // of vperm, particularly how many times the perm mask needs to be computed. 5239 // For example, if the perm mask can be hoisted out of a loop or is already 5240 // used (perhaps because there are multiple permutes with the same shuffle 5241 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 5242 // the loop requires an extra register. 5243 // 5244 // As a compromise, we only emit discrete instructions if the shuffle can be 5245 // generated in 3 or fewer operations. When we have loop information 5246 // available, if this block is within a loop, we should avoid using vperm 5247 // for 3-operation perms and use a constant pool load instead. 5248 if (Cost < 3) 5249 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 5250 } 5251 5252 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 5253 // vector that will get spilled to the constant pool. 5254 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 5255 5256 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 5257 // that it is in input element units, not in bytes. Convert now. 5258 EVT EltVT = V1.getValueType().getVectorElementType(); 5259 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 5260 5261 SmallVector<SDValue, 16> ResultMask; 5262 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 5263 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 5264 5265 for (unsigned j = 0; j != BytesPerElement; ++j) 5266 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 5267 MVT::i32)); 5268 } 5269 5270 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 5271 &ResultMask[0], ResultMask.size()); 5272 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); 5273 } 5274 5275 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 5276 /// altivec comparison. If it is, return true and fill in Opc/isDot with 5277 /// information about the intrinsic. 5278 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 5279 bool &isDot) { 5280 unsigned IntrinsicID = 5281 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 5282 CompareOpc = -1; 5283 isDot = false; 5284 switch (IntrinsicID) { 5285 default: return false; 5286 // Comparison predicates. 5287 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 5288 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 5289 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 5290 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 5291 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 5292 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 5293 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 5294 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 5295 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 5296 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 5297 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 5298 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 5299 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 5300 5301 // Normal Comparisons. 5302 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 5303 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 5304 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 5305 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 5306 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 5307 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 5308 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 5309 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 5310 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 5311 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 5312 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 5313 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 5314 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 5315 } 5316 return true; 5317 } 5318 5319 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 5320 /// lower, do it, otherwise return null. 5321 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 5322 SelectionDAG &DAG) const { 5323 // If this is a lowered altivec predicate compare, CompareOpc is set to the 5324 // opcode number of the comparison. 5325 DebugLoc dl = Op.getDebugLoc(); 5326 int CompareOpc; 5327 bool isDot; 5328 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 5329 return SDValue(); // Don't custom lower most intrinsics. 5330 5331 // If this is a non-dot comparison, make the VCMP node and we are done. 5332 if (!isDot) { 5333 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 5334 Op.getOperand(1), Op.getOperand(2), 5335 DAG.getConstant(CompareOpc, MVT::i32)); 5336 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 5337 } 5338 5339 // Create the PPCISD altivec 'dot' comparison node. 5340 SDValue Ops[] = { 5341 Op.getOperand(2), // LHS 5342 Op.getOperand(3), // RHS 5343 DAG.getConstant(CompareOpc, MVT::i32) 5344 }; 5345 std::vector<EVT> VTs; 5346 VTs.push_back(Op.getOperand(2).getValueType()); 5347 VTs.push_back(MVT::Glue); 5348 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 5349 5350 // Now that we have the comparison, emit a copy from the CR to a GPR. 5351 // This is flagged to the above dot comparison. 5352 SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32, 5353 DAG.getRegister(PPC::CR6, MVT::i32), 5354 CompNode.getValue(1)); 5355 5356 // Unpack the result based on how the target uses it. 5357 unsigned BitNo; // Bit # of CR6. 5358 bool InvertBit; // Invert result? 5359 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 5360 default: // Can't happen, don't crash on invalid number though. 5361 case 0: // Return the value of the EQ bit of CR6. 5362 BitNo = 0; InvertBit = false; 5363 break; 5364 case 1: // Return the inverted value of the EQ bit of CR6. 5365 BitNo = 0; InvertBit = true; 5366 break; 5367 case 2: // Return the value of the LT bit of CR6. 5368 BitNo = 2; InvertBit = false; 5369 break; 5370 case 3: // Return the inverted value of the LT bit of CR6. 5371 BitNo = 2; InvertBit = true; 5372 break; 5373 } 5374 5375 // Shift the bit into the low position. 5376 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 5377 DAG.getConstant(8-(3-BitNo), MVT::i32)); 5378 // Isolate the bit. 5379 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 5380 DAG.getConstant(1, MVT::i32)); 5381 5382 // If we are supposed to, toggle the bit. 5383 if (InvertBit) 5384 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 5385 DAG.getConstant(1, MVT::i32)); 5386 return Flags; 5387 } 5388 5389 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 5390 SelectionDAG &DAG) const { 5391 DebugLoc dl = Op.getDebugLoc(); 5392 // Create a stack slot that is 16-byte aligned. 5393 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 5394 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 5395 EVT PtrVT = getPointerTy(); 5396 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5397 5398 // Store the input value into Value#0 of the stack slot. 5399 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 5400 Op.getOperand(0), FIdx, MachinePointerInfo(), 5401 false, false, 0); 5402 // Load it out. 5403 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 5404 false, false, false, 0); 5405 } 5406 5407 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 5408 DebugLoc dl = Op.getDebugLoc(); 5409 if (Op.getValueType() == MVT::v4i32) { 5410 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5411 5412 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 5413 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 5414 5415 SDValue RHSSwap = // = vrlw RHS, 16 5416 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 5417 5418 // Shrinkify inputs to v8i16. 5419 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 5420 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 5421 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 5422 5423 // Low parts multiplied together, generating 32-bit results (we ignore the 5424 // top parts). 5425 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 5426 LHS, RHS, DAG, dl, MVT::v4i32); 5427 5428 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 5429 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 5430 // Shift the high parts up 16 bits. 5431 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 5432 Neg16, DAG, dl); 5433 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 5434 } else if (Op.getValueType() == MVT::v8i16) { 5435 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5436 5437 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 5438 5439 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 5440 LHS, RHS, Zero, DAG, dl); 5441 } else if (Op.getValueType() == MVT::v16i8) { 5442 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5443 5444 // Multiply the even 8-bit parts, producing 16-bit sums. 5445 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 5446 LHS, RHS, DAG, dl, MVT::v8i16); 5447 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 5448 5449 // Multiply the odd 8-bit parts, producing 16-bit sums. 5450 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 5451 LHS, RHS, DAG, dl, MVT::v8i16); 5452 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 5453 5454 // Merge the results together. 5455 int Ops[16]; 5456 for (unsigned i = 0; i != 8; ++i) { 5457 Ops[i*2 ] = 2*i+1; 5458 Ops[i*2+1] = 2*i+1+16; 5459 } 5460 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 5461 } else { 5462 llvm_unreachable("Unknown mul to lower!"); 5463 } 5464 } 5465 5466 /// LowerOperation - Provide custom lowering hooks for some operations. 5467 /// 5468 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 5469 switch (Op.getOpcode()) { 5470 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 5471 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5472 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 5473 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 5474 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5475 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5476 case ISD::SETCC: return LowerSETCC(Op, DAG); 5477 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 5478 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 5479 case ISD::VASTART: 5480 return LowerVASTART(Op, DAG, PPCSubTarget); 5481 5482 case ISD::VAARG: 5483 return LowerVAARG(Op, DAG, PPCSubTarget); 5484 5485 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); 5486 case ISD::DYNAMIC_STACKALLOC: 5487 return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); 5488 5489 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 5490 case ISD::FP_TO_UINT: 5491 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 5492 Op.getDebugLoc()); 5493 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 5494 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 5495 5496 // Lower 64-bit shifts. 5497 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 5498 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 5499 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 5500 5501 // Vector-related lowering. 5502 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 5503 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5504 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5505 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 5506 case ISD::MUL: return LowerMUL(Op, DAG); 5507 5508 // Frame & Return address. 5509 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5510 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5511 } 5512 } 5513 5514 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 5515 SmallVectorImpl<SDValue>&Results, 5516 SelectionDAG &DAG) const { 5517 const TargetMachine &TM = getTargetMachine(); 5518 DebugLoc dl = N->getDebugLoc(); 5519 switch (N->getOpcode()) { 5520 default: 5521 llvm_unreachable("Do not know how to custom type legalize this operation!"); 5522 case ISD::VAARG: { 5523 if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() 5524 || TM.getSubtarget<PPCSubtarget>().isPPC64()) 5525 return; 5526 5527 EVT VT = N->getValueType(0); 5528 5529 if (VT == MVT::i64) { 5530 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); 5531 5532 Results.push_back(NewNode); 5533 Results.push_back(NewNode.getValue(1)); 5534 } 5535 return; 5536 } 5537 case ISD::FP_ROUND_INREG: { 5538 assert(N->getValueType(0) == MVT::ppcf128); 5539 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 5540 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 5541 MVT::f64, N->getOperand(0), 5542 DAG.getIntPtrConstant(0)); 5543 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 5544 MVT::f64, N->getOperand(0), 5545 DAG.getIntPtrConstant(1)); 5546 5547 // This sequence changes FPSCR to do round-to-zero, adds the two halves 5548 // of the long double, and puts FPSCR back the way it was. We do not 5549 // actually model FPSCR. 5550 std::vector<EVT> NodeTys; 5551 SDValue Ops[4], Result, MFFSreg, InFlag, FPreg; 5552 5553 NodeTys.push_back(MVT::f64); // Return register 5554 NodeTys.push_back(MVT::Glue); // Returns a flag for later insns 5555 Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); 5556 MFFSreg = Result.getValue(0); 5557 InFlag = Result.getValue(1); 5558 5559 NodeTys.clear(); 5560 NodeTys.push_back(MVT::Glue); // Returns a flag 5561 Ops[0] = DAG.getConstant(31, MVT::i32); 5562 Ops[1] = InFlag; 5563 Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2); 5564 InFlag = Result.getValue(0); 5565 5566 NodeTys.clear(); 5567 NodeTys.push_back(MVT::Glue); // Returns a flag 5568 Ops[0] = DAG.getConstant(30, MVT::i32); 5569 Ops[1] = InFlag; 5570 Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2); 5571 InFlag = Result.getValue(0); 5572 5573 NodeTys.clear(); 5574 NodeTys.push_back(MVT::f64); // result of add 5575 NodeTys.push_back(MVT::Glue); // Returns a flag 5576 Ops[0] = Lo; 5577 Ops[1] = Hi; 5578 Ops[2] = InFlag; 5579 Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3); 5580 FPreg = Result.getValue(0); 5581 InFlag = Result.getValue(1); 5582 5583 NodeTys.clear(); 5584 NodeTys.push_back(MVT::f64); 5585 Ops[0] = DAG.getConstant(1, MVT::i32); 5586 Ops[1] = MFFSreg; 5587 Ops[2] = FPreg; 5588 Ops[3] = InFlag; 5589 Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4); 5590 FPreg = Result.getValue(0); 5591 5592 // We know the low half is about to be thrown away, so just use something 5593 // convenient. 5594 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 5595 FPreg, FPreg)); 5596 return; 5597 } 5598 case ISD::FP_TO_SINT: 5599 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 5600 return; 5601 } 5602 } 5603 5604 5605 //===----------------------------------------------------------------------===// 5606 // Other Lowering Code 5607 //===----------------------------------------------------------------------===// 5608 5609 MachineBasicBlock * 5610 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 5611 bool is64bit, unsigned BinOpcode) const { 5612 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5613 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5614 5615 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5616 MachineFunction *F = BB->getParent(); 5617 MachineFunction::iterator It = BB; 5618 ++It; 5619 5620 unsigned dest = MI->getOperand(0).getReg(); 5621 unsigned ptrA = MI->getOperand(1).getReg(); 5622 unsigned ptrB = MI->getOperand(2).getReg(); 5623 unsigned incr = MI->getOperand(3).getReg(); 5624 DebugLoc dl = MI->getDebugLoc(); 5625 5626 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 5627 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 5628 F->insert(It, loopMBB); 5629 F->insert(It, exitMBB); 5630 exitMBB->splice(exitMBB->begin(), BB, 5631 llvm::next(MachineBasicBlock::iterator(MI)), 5632 BB->end()); 5633 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5634 5635 MachineRegisterInfo &RegInfo = F->getRegInfo(); 5636 unsigned TmpReg = (!BinOpcode) ? incr : 5637 RegInfo.createVirtualRegister( 5638 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 5639 (const TargetRegisterClass *) &PPC::GPRCRegClass); 5640 5641 // thisMBB: 5642 // ... 5643 // fallthrough --> loopMBB 5644 BB->addSuccessor(loopMBB); 5645 5646 // loopMBB: 5647 // l[wd]arx dest, ptr 5648 // add r0, dest, incr 5649 // st[wd]cx. r0, ptr 5650 // bne- loopMBB 5651 // fallthrough --> exitMBB 5652 BB = loopMBB; 5653 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 5654 .addReg(ptrA).addReg(ptrB); 5655 if (BinOpcode) 5656 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 5657 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 5658 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 5659 BuildMI(BB, dl, TII->get(PPC::BCC)) 5660 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 5661 BB->addSuccessor(loopMBB); 5662 BB->addSuccessor(exitMBB); 5663 5664 // exitMBB: 5665 // ... 5666 BB = exitMBB; 5667 return BB; 5668 } 5669 5670 MachineBasicBlock * 5671 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 5672 MachineBasicBlock *BB, 5673 bool is8bit, // operation 5674 unsigned BinOpcode) const { 5675 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5676 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5677 // In 64 bit mode we have to use 64 bits for addresses, even though the 5678 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 5679 // registers without caring whether they're 32 or 64, but here we're 5680 // doing actual arithmetic on the addresses. 5681 bool is64bit = PPCSubTarget.isPPC64(); 5682 unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; 5683 5684 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5685 MachineFunction *F = BB->getParent(); 5686 MachineFunction::iterator It = BB; 5687 ++It; 5688 5689 unsigned dest = MI->getOperand(0).getReg(); 5690 unsigned ptrA = MI->getOperand(1).getReg(); 5691 unsigned ptrB = MI->getOperand(2).getReg(); 5692 unsigned incr = MI->getOperand(3).getReg(); 5693 DebugLoc dl = MI->getDebugLoc(); 5694 5695 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 5696 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 5697 F->insert(It, loopMBB); 5698 F->insert(It, exitMBB); 5699 exitMBB->splice(exitMBB->begin(), BB, 5700 llvm::next(MachineBasicBlock::iterator(MI)), 5701 BB->end()); 5702 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5703 5704 MachineRegisterInfo &RegInfo = F->getRegInfo(); 5705 const TargetRegisterClass *RC = 5706 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 5707 (const TargetRegisterClass *) &PPC::GPRCRegClass; 5708 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 5709 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 5710 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 5711 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 5712 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 5713 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 5714 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 5715 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 5716 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 5717 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 5718 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 5719 unsigned Ptr1Reg; 5720 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 5721 5722 // thisMBB: 5723 // ... 5724 // fallthrough --> loopMBB 5725 BB->addSuccessor(loopMBB); 5726 5727 // The 4-byte load must be aligned, while a char or short may be 5728 // anywhere in the word. Hence all this nasty bookkeeping code. 5729 // add ptr1, ptrA, ptrB [copy if ptrA==0] 5730 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 5731 // xori shift, shift1, 24 [16] 5732 // rlwinm ptr, ptr1, 0, 0, 29 5733 // slw incr2, incr, shift 5734 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 5735 // slw mask, mask2, shift 5736 // loopMBB: 5737 // lwarx tmpDest, ptr 5738 // add tmp, tmpDest, incr2 5739 // andc tmp2, tmpDest, mask 5740 // and tmp3, tmp, mask 5741 // or tmp4, tmp3, tmp2 5742 // stwcx. tmp4, ptr 5743 // bne- loopMBB 5744 // fallthrough --> exitMBB 5745 // srw dest, tmpDest, shift 5746 if (ptrA != ZeroReg) { 5747 Ptr1Reg = RegInfo.createVirtualRegister(RC); 5748 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 5749 .addReg(ptrA).addReg(ptrB); 5750 } else { 5751 Ptr1Reg = ptrB; 5752 } 5753 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 5754 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 5755 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 5756 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 5757 if (is64bit) 5758 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 5759 .addReg(Ptr1Reg).addImm(0).addImm(61); 5760 else 5761 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 5762 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 5763 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 5764 .addReg(incr).addReg(ShiftReg); 5765 if (is8bit) 5766 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 5767 else { 5768 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 5769 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 5770 } 5771 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 5772 .addReg(Mask2Reg).addReg(ShiftReg); 5773 5774 BB = loopMBB; 5775 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 5776 .addReg(ZeroReg).addReg(PtrReg); 5777 if (BinOpcode) 5778 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 5779 .addReg(Incr2Reg).addReg(TmpDestReg); 5780 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 5781 .addReg(TmpDestReg).addReg(MaskReg); 5782 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 5783 .addReg(TmpReg).addReg(MaskReg); 5784 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 5785 .addReg(Tmp3Reg).addReg(Tmp2Reg); 5786 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 5787 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 5788 BuildMI(BB, dl, TII->get(PPC::BCC)) 5789 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 5790 BB->addSuccessor(loopMBB); 5791 BB->addSuccessor(exitMBB); 5792 5793 // exitMBB: 5794 // ... 5795 BB = exitMBB; 5796 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 5797 .addReg(ShiftReg); 5798 return BB; 5799 } 5800 5801 MachineBasicBlock * 5802 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 5803 MachineBasicBlock *BB) const { 5804 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5805 5806 // To "insert" these instructions we actually have to insert their 5807 // control-flow patterns. 5808 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5809 MachineFunction::iterator It = BB; 5810 ++It; 5811 5812 MachineFunction *F = BB->getParent(); 5813 5814 if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || 5815 MI->getOpcode() == PPC::SELECT_CC_I8)) { 5816 unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ? 5817 PPC::ISEL8 : PPC::ISEL; 5818 unsigned SelectPred = MI->getOperand(4).getImm(); 5819 DebugLoc dl = MI->getDebugLoc(); 5820 5821 // The SelectPred is ((BI << 5) | BO) for a BCC 5822 unsigned BO = SelectPred & 0xF; 5823 assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel"); 5824 5825 unsigned TrueOpNo, FalseOpNo; 5826 if (BO == 12) { 5827 TrueOpNo = 2; 5828 FalseOpNo = 3; 5829 } else { 5830 TrueOpNo = 3; 5831 FalseOpNo = 2; 5832 SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred); 5833 } 5834 5835 BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg()) 5836 .addReg(MI->getOperand(TrueOpNo).getReg()) 5837 .addReg(MI->getOperand(FalseOpNo).getReg()) 5838 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()); 5839 } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || 5840 MI->getOpcode() == PPC::SELECT_CC_I8 || 5841 MI->getOpcode() == PPC::SELECT_CC_F4 || 5842 MI->getOpcode() == PPC::SELECT_CC_F8 || 5843 MI->getOpcode() == PPC::SELECT_CC_VRRC) { 5844 5845 5846 // The incoming instruction knows the destination vreg to set, the 5847 // condition code register to branch on, the true/false values to 5848 // select between, and a branch opcode to use. 5849 5850 // thisMBB: 5851 // ... 5852 // TrueVal = ... 5853 // cmpTY ccX, r1, r2 5854 // bCC copy1MBB 5855 // fallthrough --> copy0MBB 5856 MachineBasicBlock *thisMBB = BB; 5857 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 5858 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 5859 unsigned SelectPred = MI->getOperand(4).getImm(); 5860 DebugLoc dl = MI->getDebugLoc(); 5861 F->insert(It, copy0MBB); 5862 F->insert(It, sinkMBB); 5863 5864 // Transfer the remainder of BB and its successor edges to sinkMBB. 5865 sinkMBB->splice(sinkMBB->begin(), BB, 5866 llvm::next(MachineBasicBlock::iterator(MI)), 5867 BB->end()); 5868 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 5869 5870 // Next, add the true and fallthrough blocks as its successors. 5871 BB->addSuccessor(copy0MBB); 5872 BB->addSuccessor(sinkMBB); 5873 5874 BuildMI(BB, dl, TII->get(PPC::BCC)) 5875 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 5876 5877 // copy0MBB: 5878 // %FalseValue = ... 5879 // # fallthrough to sinkMBB 5880 BB = copy0MBB; 5881 5882 // Update machine-CFG edges 5883 BB->addSuccessor(sinkMBB); 5884 5885 // sinkMBB: 5886 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5887 // ... 5888 BB = sinkMBB; 5889 BuildMI(*BB, BB->begin(), dl, 5890 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 5891 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 5892 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5893 } 5894 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 5895 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 5896 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 5897 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 5898 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 5899 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 5900 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 5901 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 5902 5903 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 5904 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 5905 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 5906 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 5907 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 5908 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 5909 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 5910 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 5911 5912 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 5913 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 5914 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 5915 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 5916 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 5917 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 5918 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 5919 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 5920 5921 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 5922 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 5923 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 5924 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 5925 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 5926 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 5927 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 5928 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 5929 5930 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 5931 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC); 5932 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 5933 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC); 5934 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 5935 BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC); 5936 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 5937 BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8); 5938 5939 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 5940 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 5941 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 5942 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 5943 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 5944 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 5945 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 5946 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 5947 5948 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 5949 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 5950 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 5951 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 5952 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 5953 BB = EmitAtomicBinary(MI, BB, false, 0); 5954 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 5955 BB = EmitAtomicBinary(MI, BB, true, 0); 5956 5957 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 5958 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 5959 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 5960 5961 unsigned dest = MI->getOperand(0).getReg(); 5962 unsigned ptrA = MI->getOperand(1).getReg(); 5963 unsigned ptrB = MI->getOperand(2).getReg(); 5964 unsigned oldval = MI->getOperand(3).getReg(); 5965 unsigned newval = MI->getOperand(4).getReg(); 5966 DebugLoc dl = MI->getDebugLoc(); 5967 5968 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 5969 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 5970 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 5971 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 5972 F->insert(It, loop1MBB); 5973 F->insert(It, loop2MBB); 5974 F->insert(It, midMBB); 5975 F->insert(It, exitMBB); 5976 exitMBB->splice(exitMBB->begin(), BB, 5977 llvm::next(MachineBasicBlock::iterator(MI)), 5978 BB->end()); 5979 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5980 5981 // thisMBB: 5982 // ... 5983 // fallthrough --> loopMBB 5984 BB->addSuccessor(loop1MBB); 5985 5986 // loop1MBB: 5987 // l[wd]arx dest, ptr 5988 // cmp[wd] dest, oldval 5989 // bne- midMBB 5990 // loop2MBB: 5991 // st[wd]cx. newval, ptr 5992 // bne- loopMBB 5993 // b exitBB 5994 // midMBB: 5995 // st[wd]cx. dest, ptr 5996 // exitBB: 5997 BB = loop1MBB; 5998 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 5999 .addReg(ptrA).addReg(ptrB); 6000 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 6001 .addReg(oldval).addReg(dest); 6002 BuildMI(BB, dl, TII->get(PPC::BCC)) 6003 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 6004 BB->addSuccessor(loop2MBB); 6005 BB->addSuccessor(midMBB); 6006 6007 BB = loop2MBB; 6008 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6009 .addReg(newval).addReg(ptrA).addReg(ptrB); 6010 BuildMI(BB, dl, TII->get(PPC::BCC)) 6011 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 6012 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6013 BB->addSuccessor(loop1MBB); 6014 BB->addSuccessor(exitMBB); 6015 6016 BB = midMBB; 6017 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6018 .addReg(dest).addReg(ptrA).addReg(ptrB); 6019 BB->addSuccessor(exitMBB); 6020 6021 // exitMBB: 6022 // ... 6023 BB = exitMBB; 6024 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 6025 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 6026 // We must use 64-bit registers for addresses when targeting 64-bit, 6027 // since we're actually doing arithmetic on them. Other registers 6028 // can be 32-bit. 6029 bool is64bit = PPCSubTarget.isPPC64(); 6030 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 6031 6032 unsigned dest = MI->getOperand(0).getReg(); 6033 unsigned ptrA = MI->getOperand(1).getReg(); 6034 unsigned ptrB = MI->getOperand(2).getReg(); 6035 unsigned oldval = MI->getOperand(3).getReg(); 6036 unsigned newval = MI->getOperand(4).getReg(); 6037 DebugLoc dl = MI->getDebugLoc(); 6038 6039 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 6040 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 6041 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 6042 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6043 F->insert(It, loop1MBB); 6044 F->insert(It, loop2MBB); 6045 F->insert(It, midMBB); 6046 F->insert(It, exitMBB); 6047 exitMBB->splice(exitMBB->begin(), BB, 6048 llvm::next(MachineBasicBlock::iterator(MI)), 6049 BB->end()); 6050 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6051 6052 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6053 const TargetRegisterClass *RC = 6054 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 6055 (const TargetRegisterClass *) &PPC::GPRCRegClass; 6056 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 6057 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 6058 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 6059 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 6060 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 6061 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 6062 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 6063 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 6064 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 6065 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 6066 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 6067 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 6068 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 6069 unsigned Ptr1Reg; 6070 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 6071 unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; 6072 // thisMBB: 6073 // ... 6074 // fallthrough --> loopMBB 6075 BB->addSuccessor(loop1MBB); 6076 6077 // The 4-byte load must be aligned, while a char or short may be 6078 // anywhere in the word. Hence all this nasty bookkeeping code. 6079 // add ptr1, ptrA, ptrB [copy if ptrA==0] 6080 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 6081 // xori shift, shift1, 24 [16] 6082 // rlwinm ptr, ptr1, 0, 0, 29 6083 // slw newval2, newval, shift 6084 // slw oldval2, oldval,shift 6085 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 6086 // slw mask, mask2, shift 6087 // and newval3, newval2, mask 6088 // and oldval3, oldval2, mask 6089 // loop1MBB: 6090 // lwarx tmpDest, ptr 6091 // and tmp, tmpDest, mask 6092 // cmpw tmp, oldval3 6093 // bne- midMBB 6094 // loop2MBB: 6095 // andc tmp2, tmpDest, mask 6096 // or tmp4, tmp2, newval3 6097 // stwcx. tmp4, ptr 6098 // bne- loop1MBB 6099 // b exitBB 6100 // midMBB: 6101 // stwcx. tmpDest, ptr 6102 // exitBB: 6103 // srw dest, tmpDest, shift 6104 if (ptrA != ZeroReg) { 6105 Ptr1Reg = RegInfo.createVirtualRegister(RC); 6106 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 6107 .addReg(ptrA).addReg(ptrB); 6108 } else { 6109 Ptr1Reg = ptrB; 6110 } 6111 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 6112 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 6113 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 6114 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 6115 if (is64bit) 6116 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 6117 .addReg(Ptr1Reg).addImm(0).addImm(61); 6118 else 6119 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 6120 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 6121 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 6122 .addReg(newval).addReg(ShiftReg); 6123 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 6124 .addReg(oldval).addReg(ShiftReg); 6125 if (is8bit) 6126 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 6127 else { 6128 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 6129 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 6130 .addReg(Mask3Reg).addImm(65535); 6131 } 6132 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 6133 .addReg(Mask2Reg).addReg(ShiftReg); 6134 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 6135 .addReg(NewVal2Reg).addReg(MaskReg); 6136 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 6137 .addReg(OldVal2Reg).addReg(MaskReg); 6138 6139 BB = loop1MBB; 6140 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6141 .addReg(ZeroReg).addReg(PtrReg); 6142 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 6143 .addReg(TmpDestReg).addReg(MaskReg); 6144 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 6145 .addReg(TmpReg).addReg(OldVal3Reg); 6146 BuildMI(BB, dl, TII->get(PPC::BCC)) 6147 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 6148 BB->addSuccessor(loop2MBB); 6149 BB->addSuccessor(midMBB); 6150 6151 BB = loop2MBB; 6152 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 6153 .addReg(TmpDestReg).addReg(MaskReg); 6154 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 6155 .addReg(Tmp2Reg).addReg(NewVal3Reg); 6156 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 6157 .addReg(ZeroReg).addReg(PtrReg); 6158 BuildMI(BB, dl, TII->get(PPC::BCC)) 6159 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 6160 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6161 BB->addSuccessor(loop1MBB); 6162 BB->addSuccessor(exitMBB); 6163 6164 BB = midMBB; 6165 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 6166 .addReg(ZeroReg).addReg(PtrReg); 6167 BB->addSuccessor(exitMBB); 6168 6169 // exitMBB: 6170 // ... 6171 BB = exitMBB; 6172 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 6173 .addReg(ShiftReg); 6174 } else { 6175 llvm_unreachable("Unexpected instr type to insert"); 6176 } 6177 6178 MI->eraseFromParent(); // The pseudo instruction is gone now. 6179 return BB; 6180 } 6181 6182 //===----------------------------------------------------------------------===// 6183 // Target Optimization Hooks 6184 //===----------------------------------------------------------------------===// 6185 6186 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 6187 DAGCombinerInfo &DCI) const { 6188 const TargetMachine &TM = getTargetMachine(); 6189 SelectionDAG &DAG = DCI.DAG; 6190 DebugLoc dl = N->getDebugLoc(); 6191 switch (N->getOpcode()) { 6192 default: break; 6193 case PPCISD::SHL: 6194 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6195 if (C->isNullValue()) // 0 << V -> 0. 6196 return N->getOperand(0); 6197 } 6198 break; 6199 case PPCISD::SRL: 6200 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6201 if (C->isNullValue()) // 0 >>u V -> 0. 6202 return N->getOperand(0); 6203 } 6204 break; 6205 case PPCISD::SRA: 6206 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6207 if (C->isNullValue() || // 0 >>s V -> 0. 6208 C->isAllOnesValue()) // -1 >>s V -> -1. 6209 return N->getOperand(0); 6210 } 6211 break; 6212 6213 case ISD::SINT_TO_FP: 6214 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 6215 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 6216 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 6217 // We allow the src/dst to be either f32/f64, but the intermediate 6218 // type must be i64. 6219 if (N->getOperand(0).getValueType() == MVT::i64 && 6220 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 6221 SDValue Val = N->getOperand(0).getOperand(0); 6222 if (Val.getValueType() == MVT::f32) { 6223 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 6224 DCI.AddToWorklist(Val.getNode()); 6225 } 6226 6227 Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val); 6228 DCI.AddToWorklist(Val.getNode()); 6229 Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val); 6230 DCI.AddToWorklist(Val.getNode()); 6231 if (N->getValueType(0) == MVT::f32) { 6232 Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val, 6233 DAG.getIntPtrConstant(0)); 6234 DCI.AddToWorklist(Val.getNode()); 6235 } 6236 return Val; 6237 } else if (N->getOperand(0).getValueType() == MVT::i32) { 6238 // If the intermediate type is i32, we can avoid the load/store here 6239 // too. 6240 } 6241 } 6242 } 6243 break; 6244 case ISD::STORE: 6245 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 6246 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 6247 !cast<StoreSDNode>(N)->isTruncatingStore() && 6248 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 6249 N->getOperand(1).getValueType() == MVT::i32 && 6250 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 6251 SDValue Val = N->getOperand(1).getOperand(0); 6252 if (Val.getValueType() == MVT::f32) { 6253 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 6254 DCI.AddToWorklist(Val.getNode()); 6255 } 6256 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 6257 DCI.AddToWorklist(Val.getNode()); 6258 6259 Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val, 6260 N->getOperand(2), N->getOperand(3)); 6261 DCI.AddToWorklist(Val.getNode()); 6262 return Val; 6263 } 6264 6265 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 6266 if (cast<StoreSDNode>(N)->isUnindexed() && 6267 N->getOperand(1).getOpcode() == ISD::BSWAP && 6268 N->getOperand(1).getNode()->hasOneUse() && 6269 (N->getOperand(1).getValueType() == MVT::i32 || 6270 N->getOperand(1).getValueType() == MVT::i16)) { 6271 SDValue BSwapOp = N->getOperand(1).getOperand(0); 6272 // Do an any-extend to 32-bits if this is a half-word input. 6273 if (BSwapOp.getValueType() == MVT::i16) 6274 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 6275 6276 SDValue Ops[] = { 6277 N->getOperand(0), BSwapOp, N->getOperand(2), 6278 DAG.getValueType(N->getOperand(1).getValueType()) 6279 }; 6280 return 6281 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 6282 Ops, array_lengthof(Ops), 6283 cast<StoreSDNode>(N)->getMemoryVT(), 6284 cast<StoreSDNode>(N)->getMemOperand()); 6285 } 6286 break; 6287 case ISD::BSWAP: 6288 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 6289 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 6290 N->getOperand(0).hasOneUse() && 6291 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { 6292 SDValue Load = N->getOperand(0); 6293 LoadSDNode *LD = cast<LoadSDNode>(Load); 6294 // Create the byte-swapping load. 6295 SDValue Ops[] = { 6296 LD->getChain(), // Chain 6297 LD->getBasePtr(), // Ptr 6298 DAG.getValueType(N->getValueType(0)) // VT 6299 }; 6300 SDValue BSLoad = 6301 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 6302 DAG.getVTList(MVT::i32, MVT::Other), Ops, 3, 6303 LD->getMemoryVT(), LD->getMemOperand()); 6304 6305 // If this is an i16 load, insert the truncate. 6306 SDValue ResVal = BSLoad; 6307 if (N->getValueType(0) == MVT::i16) 6308 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 6309 6310 // First, combine the bswap away. This makes the value produced by the 6311 // load dead. 6312 DCI.CombineTo(N, ResVal); 6313 6314 // Next, combine the load away, we give it a bogus result value but a real 6315 // chain result. The result value is dead because the bswap is dead. 6316 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 6317 6318 // Return N so it doesn't get rechecked! 6319 return SDValue(N, 0); 6320 } 6321 6322 break; 6323 case PPCISD::VCMP: { 6324 // If a VCMPo node already exists with exactly the same operands as this 6325 // node, use its result instead of this node (VCMPo computes both a CR6 and 6326 // a normal output). 6327 // 6328 if (!N->getOperand(0).hasOneUse() && 6329 !N->getOperand(1).hasOneUse() && 6330 !N->getOperand(2).hasOneUse()) { 6331 6332 // Scan all of the users of the LHS, looking for VCMPo's that match. 6333 SDNode *VCMPoNode = 0; 6334 6335 SDNode *LHSN = N->getOperand(0).getNode(); 6336 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 6337 UI != E; ++UI) 6338 if (UI->getOpcode() == PPCISD::VCMPo && 6339 UI->getOperand(1) == N->getOperand(1) && 6340 UI->getOperand(2) == N->getOperand(2) && 6341 UI->getOperand(0) == N->getOperand(0)) { 6342 VCMPoNode = *UI; 6343 break; 6344 } 6345 6346 // If there is no VCMPo node, or if the flag value has a single use, don't 6347 // transform this. 6348 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 6349 break; 6350 6351 // Look at the (necessarily single) use of the flag value. If it has a 6352 // chain, this transformation is more complex. Note that multiple things 6353 // could use the value result, which we should ignore. 6354 SDNode *FlagUser = 0; 6355 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 6356 FlagUser == 0; ++UI) { 6357 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 6358 SDNode *User = *UI; 6359 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 6360 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 6361 FlagUser = User; 6362 break; 6363 } 6364 } 6365 } 6366 6367 // If the user is a MFCR instruction, we know this is safe. Otherwise we 6368 // give up for right now. 6369 if (FlagUser->getOpcode() == PPCISD::MFCR) 6370 return SDValue(VCMPoNode, 0); 6371 } 6372 break; 6373 } 6374 case ISD::BR_CC: { 6375 // If this is a branch on an altivec predicate comparison, lower this so 6376 // that we don't have to do a MFCR: instead, branch directly on CR6. This 6377 // lowering is done pre-legalize, because the legalizer lowers the predicate 6378 // compare down to code that is difficult to reassemble. 6379 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 6380 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 6381 int CompareOpc; 6382 bool isDot; 6383 6384 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 6385 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 6386 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 6387 assert(isDot && "Can't compare against a vector result!"); 6388 6389 // If this is a comparison against something other than 0/1, then we know 6390 // that the condition is never/always true. 6391 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 6392 if (Val != 0 && Val != 1) { 6393 if (CC == ISD::SETEQ) // Cond never true, remove branch. 6394 return N->getOperand(0); 6395 // Always !=, turn it into an unconditional branch. 6396 return DAG.getNode(ISD::BR, dl, MVT::Other, 6397 N->getOperand(0), N->getOperand(4)); 6398 } 6399 6400 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 6401 6402 // Create the PPCISD altivec 'dot' comparison node. 6403 std::vector<EVT> VTs; 6404 SDValue Ops[] = { 6405 LHS.getOperand(2), // LHS of compare 6406 LHS.getOperand(3), // RHS of compare 6407 DAG.getConstant(CompareOpc, MVT::i32) 6408 }; 6409 VTs.push_back(LHS.getOperand(2).getValueType()); 6410 VTs.push_back(MVT::Glue); 6411 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 6412 6413 // Unpack the result based on how the target uses it. 6414 PPC::Predicate CompOpc; 6415 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 6416 default: // Can't happen, don't crash on invalid number though. 6417 case 0: // Branch on the value of the EQ bit of CR6. 6418 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 6419 break; 6420 case 1: // Branch on the inverted value of the EQ bit of CR6. 6421 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 6422 break; 6423 case 2: // Branch on the value of the LT bit of CR6. 6424 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 6425 break; 6426 case 3: // Branch on the inverted value of the LT bit of CR6. 6427 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 6428 break; 6429 } 6430 6431 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 6432 DAG.getConstant(CompOpc, MVT::i32), 6433 DAG.getRegister(PPC::CR6, MVT::i32), 6434 N->getOperand(4), CompNode.getValue(1)); 6435 } 6436 break; 6437 } 6438 } 6439 6440 return SDValue(); 6441 } 6442 6443 //===----------------------------------------------------------------------===// 6444 // Inline Assembly Support 6445 //===----------------------------------------------------------------------===// 6446 6447 void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 6448 APInt &KnownZero, 6449 APInt &KnownOne, 6450 const SelectionDAG &DAG, 6451 unsigned Depth) const { 6452 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 6453 switch (Op.getOpcode()) { 6454 default: break; 6455 case PPCISD::LBRX: { 6456 // lhbrx is known to have the top bits cleared out. 6457 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 6458 KnownZero = 0xFFFF0000; 6459 break; 6460 } 6461 case ISD::INTRINSIC_WO_CHAIN: { 6462 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 6463 default: break; 6464 case Intrinsic::ppc_altivec_vcmpbfp_p: 6465 case Intrinsic::ppc_altivec_vcmpeqfp_p: 6466 case Intrinsic::ppc_altivec_vcmpequb_p: 6467 case Intrinsic::ppc_altivec_vcmpequh_p: 6468 case Intrinsic::ppc_altivec_vcmpequw_p: 6469 case Intrinsic::ppc_altivec_vcmpgefp_p: 6470 case Intrinsic::ppc_altivec_vcmpgtfp_p: 6471 case Intrinsic::ppc_altivec_vcmpgtsb_p: 6472 case Intrinsic::ppc_altivec_vcmpgtsh_p: 6473 case Intrinsic::ppc_altivec_vcmpgtsw_p: 6474 case Intrinsic::ppc_altivec_vcmpgtub_p: 6475 case Intrinsic::ppc_altivec_vcmpgtuh_p: 6476 case Intrinsic::ppc_altivec_vcmpgtuw_p: 6477 KnownZero = ~1U; // All bits but the low one are known to be zero. 6478 break; 6479 } 6480 } 6481 } 6482 } 6483 6484 6485 /// getConstraintType - Given a constraint, return the type of 6486 /// constraint it is for this target. 6487 PPCTargetLowering::ConstraintType 6488 PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 6489 if (Constraint.size() == 1) { 6490 switch (Constraint[0]) { 6491 default: break; 6492 case 'b': 6493 case 'r': 6494 case 'f': 6495 case 'v': 6496 case 'y': 6497 return C_RegisterClass; 6498 case 'Z': 6499 // FIXME: While Z does indicate a memory constraint, it specifically 6500 // indicates an r+r address (used in conjunction with the 'y' modifier 6501 // in the replacement string). Currently, we're forcing the base 6502 // register to be r0 in the asm printer (which is interpreted as zero) 6503 // and forming the complete address in the second register. This is 6504 // suboptimal. 6505 return C_Memory; 6506 } 6507 } 6508 return TargetLowering::getConstraintType(Constraint); 6509 } 6510 6511 /// Examine constraint type and operand type and determine a weight value. 6512 /// This object must already have been set up with the operand type 6513 /// and the current alternative constraint selected. 6514 TargetLowering::ConstraintWeight 6515 PPCTargetLowering::getSingleConstraintMatchWeight( 6516 AsmOperandInfo &info, const char *constraint) const { 6517 ConstraintWeight weight = CW_Invalid; 6518 Value *CallOperandVal = info.CallOperandVal; 6519 // If we don't have a value, we can't do a match, 6520 // but allow it at the lowest weight. 6521 if (CallOperandVal == NULL) 6522 return CW_Default; 6523 Type *type = CallOperandVal->getType(); 6524 // Look at the constraint type. 6525 switch (*constraint) { 6526 default: 6527 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 6528 break; 6529 case 'b': 6530 if (type->isIntegerTy()) 6531 weight = CW_Register; 6532 break; 6533 case 'f': 6534 if (type->isFloatTy()) 6535 weight = CW_Register; 6536 break; 6537 case 'd': 6538 if (type->isDoubleTy()) 6539 weight = CW_Register; 6540 break; 6541 case 'v': 6542 if (type->isVectorTy()) 6543 weight = CW_Register; 6544 break; 6545 case 'y': 6546 weight = CW_Register; 6547 break; 6548 case 'Z': 6549 weight = CW_Memory; 6550 break; 6551 } 6552 return weight; 6553 } 6554 6555 std::pair<unsigned, const TargetRegisterClass*> 6556 PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 6557 EVT VT) const { 6558 if (Constraint.size() == 1) { 6559 // GCC RS6000 Constraint Letters 6560 switch (Constraint[0]) { 6561 case 'b': // R1-R31 6562 case 'r': // R0-R31 6563 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 6564 return std::make_pair(0U, &PPC::G8RCRegClass); 6565 return std::make_pair(0U, &PPC::GPRCRegClass); 6566 case 'f': 6567 if (VT == MVT::f32 || VT == MVT::i32) 6568 return std::make_pair(0U, &PPC::F4RCRegClass); 6569 if (VT == MVT::f64 || VT == MVT::i64) 6570 return std::make_pair(0U, &PPC::F8RCRegClass); 6571 break; 6572 case 'v': 6573 return std::make_pair(0U, &PPC::VRRCRegClass); 6574 case 'y': // crrc 6575 return std::make_pair(0U, &PPC::CRRCRegClass); 6576 } 6577 } 6578 6579 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 6580 } 6581 6582 6583 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 6584 /// vector. If it is invalid, don't add anything to Ops. 6585 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 6586 std::string &Constraint, 6587 std::vector<SDValue>&Ops, 6588 SelectionDAG &DAG) const { 6589 SDValue Result(0,0); 6590 6591 // Only support length 1 constraints. 6592 if (Constraint.length() > 1) return; 6593 6594 char Letter = Constraint[0]; 6595 switch (Letter) { 6596 default: break; 6597 case 'I': 6598 case 'J': 6599 case 'K': 6600 case 'L': 6601 case 'M': 6602 case 'N': 6603 case 'O': 6604 case 'P': { 6605 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 6606 if (!CST) return; // Must be an immediate to match. 6607 unsigned Value = CST->getZExtValue(); 6608 switch (Letter) { 6609 default: llvm_unreachable("Unknown constraint letter!"); 6610 case 'I': // "I" is a signed 16-bit constant. 6611 if ((short)Value == (int)Value) 6612 Result = DAG.getTargetConstant(Value, Op.getValueType()); 6613 break; 6614 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 6615 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 6616 if ((short)Value == 0) 6617 Result = DAG.getTargetConstant(Value, Op.getValueType()); 6618 break; 6619 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 6620 if ((Value >> 16) == 0) 6621 Result = DAG.getTargetConstant(Value, Op.getValueType()); 6622 break; 6623 case 'M': // "M" is a constant that is greater than 31. 6624 if (Value > 31) 6625 Result = DAG.getTargetConstant(Value, Op.getValueType()); 6626 break; 6627 case 'N': // "N" is a positive constant that is an exact power of two. 6628 if ((int)Value > 0 && isPowerOf2_32(Value)) 6629 Result = DAG.getTargetConstant(Value, Op.getValueType()); 6630 break; 6631 case 'O': // "O" is the constant zero. 6632 if (Value == 0) 6633 Result = DAG.getTargetConstant(Value, Op.getValueType()); 6634 break; 6635 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 6636 if ((short)-Value == (int)-Value) 6637 Result = DAG.getTargetConstant(Value, Op.getValueType()); 6638 break; 6639 } 6640 break; 6641 } 6642 } 6643 6644 if (Result.getNode()) { 6645 Ops.push_back(Result); 6646 return; 6647 } 6648 6649 // Handle standard constraint letters. 6650 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 6651 } 6652 6653 // isLegalAddressingMode - Return true if the addressing mode represented 6654 // by AM is legal for this target, for a load/store of the specified type. 6655 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 6656 Type *Ty) const { 6657 // FIXME: PPC does not allow r+i addressing modes for vectors! 6658 6659 // PPC allows a sign-extended 16-bit immediate field. 6660 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 6661 return false; 6662 6663 // No global is ever allowed as a base. 6664 if (AM.BaseGV) 6665 return false; 6666 6667 // PPC only support r+r, 6668 switch (AM.Scale) { 6669 case 0: // "r+i" or just "i", depending on HasBaseReg. 6670 break; 6671 case 1: 6672 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 6673 return false; 6674 // Otherwise we have r+r or r+i. 6675 break; 6676 case 2: 6677 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 6678 return false; 6679 // Allow 2*r as r+r. 6680 break; 6681 default: 6682 // No other scales are supported. 6683 return false; 6684 } 6685 6686 return true; 6687 } 6688 6689 /// isLegalAddressImmediate - Return true if the integer value can be used 6690 /// as the offset of the target addressing mode for load / store of the 6691 /// given type. 6692 bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{ 6693 // PPC allows a sign-extended 16-bit immediate field. 6694 return (V > -(1 << 16) && V < (1 << 16)-1); 6695 } 6696 6697 bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { 6698 return false; 6699 } 6700 6701 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 6702 SelectionDAG &DAG) const { 6703 MachineFunction &MF = DAG.getMachineFunction(); 6704 MachineFrameInfo *MFI = MF.getFrameInfo(); 6705 MFI->setReturnAddressIsTaken(true); 6706 6707 DebugLoc dl = Op.getDebugLoc(); 6708 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 6709 6710 // Make sure the function does not optimize away the store of the RA to 6711 // the stack. 6712 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 6713 FuncInfo->setLRStoreRequired(); 6714 bool isPPC64 = PPCSubTarget.isPPC64(); 6715 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 6716 6717 if (Depth > 0) { 6718 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 6719 SDValue Offset = 6720 6721 DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), 6722 isPPC64? MVT::i64 : MVT::i32); 6723 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 6724 DAG.getNode(ISD::ADD, dl, getPointerTy(), 6725 FrameAddr, Offset), 6726 MachinePointerInfo(), false, false, false, 0); 6727 } 6728 6729 // Just load the return address off the stack. 6730 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 6731 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 6732 RetAddrFI, MachinePointerInfo(), false, false, false, 0); 6733 } 6734 6735 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 6736 SelectionDAG &DAG) const { 6737 DebugLoc dl = Op.getDebugLoc(); 6738 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 6739 6740 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 6741 bool isPPC64 = PtrVT == MVT::i64; 6742 6743 MachineFunction &MF = DAG.getMachineFunction(); 6744 MachineFrameInfo *MFI = MF.getFrameInfo(); 6745 MFI->setFrameAddressIsTaken(true); 6746 bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || 6747 MFI->hasVarSizedObjects()) && 6748 MFI->getStackSize() && 6749 !MF.getFunction()->getFnAttributes(). 6750 hasAttribute(Attributes::Naked); 6751 unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : 6752 (is31 ? PPC::R31 : PPC::R1); 6753 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 6754 PtrVT); 6755 while (Depth--) 6756 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 6757 FrameAddr, MachinePointerInfo(), false, false, 6758 false, 0); 6759 return FrameAddr; 6760 } 6761 6762 bool 6763 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 6764 // The PowerPC target isn't yet aware of offsets. 6765 return false; 6766 } 6767 6768 /// getOptimalMemOpType - Returns the target specific optimal type for load 6769 /// and store operations as a result of memset, memcpy, and memmove 6770 /// lowering. If DstAlign is zero that means it's safe to destination 6771 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 6772 /// means there isn't a need to check it against alignment requirement, 6773 /// probably because the source does not need to be loaded. If 6774 /// 'IsZeroVal' is true, that means it's safe to return a 6775 /// non-scalar-integer type, e.g. empty string source, constant, or loaded 6776 /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is 6777 /// constant so it does not need to be loaded. 6778 /// It returns EVT::Other if the type should be determined using generic 6779 /// target-independent logic. 6780 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 6781 unsigned DstAlign, unsigned SrcAlign, 6782 bool IsZeroVal, 6783 bool MemcpyStrSrc, 6784 MachineFunction &MF) const { 6785 if (this->PPCSubTarget.isPPC64()) { 6786 return MVT::i64; 6787 } else { 6788 return MVT::i32; 6789 } 6790 } 6791 6792 /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than 6793 /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to 6794 /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd 6795 /// is expanded to mul + add. 6796 bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { 6797 if (!VT.isSimple()) 6798 return false; 6799 6800 switch (VT.getSimpleVT().SimpleTy) { 6801 case MVT::f32: 6802 case MVT::f64: 6803 case MVT::v4f32: 6804 return true; 6805 default: 6806 break; 6807 } 6808 6809 return false; 6810 } 6811 6812 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 6813 if (DisableILPPref) 6814 return TargetLowering::getSchedulingPreference(N); 6815 6816 return Sched::ILP; 6817 } 6818 6819