1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PPCISelLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPCISelLowering.h"
15 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPCCallingConv.h"
17 #include "PPCCCState.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCPerfectShuffle.h"
20 #include "PPCTargetMachine.h"
21 #include "PPCTargetObjectFile.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/ADT/Triple.h"
26 #include "llvm/CodeGen/CallingConvLower.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineLoopInfo.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/SelectionDAG.h"
33 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
34 #include "llvm/IR/CallingConv.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/Function.h"
38 #include "llvm/IR/Intrinsics.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/MathExtras.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetOptions.h"
45 #include <list>
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "ppc-lowering"
50 
51 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
52 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
53 
54 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
55 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
56 
57 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
58 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
59 
60 static cl::opt<bool> DisableSCO("disable-ppc-sco",
61 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
62 
63 STATISTIC(NumTailCalls, "Number of tail calls");
64 STATISTIC(NumSiblingCalls, "Number of sibling calls");
65 
66 // FIXME: Remove this once the bug has been fixed!
67 extern cl::opt<bool> ANDIGlueBug;
68 
69 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
70                                      const PPCSubtarget &STI)
71     : TargetLowering(TM), Subtarget(STI) {
72   // Use _setjmp/_longjmp instead of setjmp/longjmp.
73   setUseUnderscoreSetJmp(true);
74   setUseUnderscoreLongJmp(true);
75 
76   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
77   // arguments are at least 4/8 bytes aligned.
78   bool isPPC64 = Subtarget.isPPC64();
79   setMinStackArgumentAlignment(isPPC64 ? 8:4);
80 
81   // Set up the register classes.
82   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
83   if (!useSoftFloat()) {
84     addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
85     addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
86   }
87 
88   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
89   for (MVT VT : MVT::integer_valuetypes()) {
90     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
91     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
92   }
93 
94   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
95 
96   // PowerPC has pre-inc load and store's.
97   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
98   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
99   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
100   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
101   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
102   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
103   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
104   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
105   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
106   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
107   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
108   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
109   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
110   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
111 
112   if (Subtarget.useCRBits()) {
113     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
114 
115     if (isPPC64 || Subtarget.hasFPCVT()) {
116       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
117       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
118                          isPPC64 ? MVT::i64 : MVT::i32);
119       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
120       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
121                         isPPC64 ? MVT::i64 : MVT::i32);
122     } else {
123       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
124       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
125     }
126 
127     // PowerPC does not support direct load / store of condition registers
128     setOperationAction(ISD::LOAD, MVT::i1, Custom);
129     setOperationAction(ISD::STORE, MVT::i1, Custom);
130 
131     // FIXME: Remove this once the ANDI glue bug is fixed:
132     if (ANDIGlueBug)
133       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
134 
135     for (MVT VT : MVT::integer_valuetypes()) {
136       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
137       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
138       setTruncStoreAction(VT, MVT::i1, Expand);
139     }
140 
141     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
142   }
143 
144   // This is used in the ppcf128->int sequence.  Note it has different semantics
145   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
146   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
147 
148   // We do not currently implement these libm ops for PowerPC.
149   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
150   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
151   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
152   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
153   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
154   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
155 
156   // PowerPC has no SREM/UREM instructions
157   setOperationAction(ISD::SREM, MVT::i32, Expand);
158   setOperationAction(ISD::UREM, MVT::i32, Expand);
159   setOperationAction(ISD::SREM, MVT::i64, Expand);
160   setOperationAction(ISD::UREM, MVT::i64, Expand);
161 
162   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
163   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
164   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
165   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
166   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
167   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
168   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
169   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
170   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
171 
172   // We don't support sin/cos/sqrt/fmod/pow
173   setOperationAction(ISD::FSIN , MVT::f64, Expand);
174   setOperationAction(ISD::FCOS , MVT::f64, Expand);
175   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
176   setOperationAction(ISD::FREM , MVT::f64, Expand);
177   setOperationAction(ISD::FPOW , MVT::f64, Expand);
178   setOperationAction(ISD::FMA  , MVT::f64, Legal);
179   setOperationAction(ISD::FSIN , MVT::f32, Expand);
180   setOperationAction(ISD::FCOS , MVT::f32, Expand);
181   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
182   setOperationAction(ISD::FREM , MVT::f32, Expand);
183   setOperationAction(ISD::FPOW , MVT::f32, Expand);
184   setOperationAction(ISD::FMA  , MVT::f32, Legal);
185 
186   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
187 
188   // If we're enabling GP optimizations, use hardware square root
189   if (!Subtarget.hasFSQRT() &&
190       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
191         Subtarget.hasFRE()))
192     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
193 
194   if (!Subtarget.hasFSQRT() &&
195       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
196         Subtarget.hasFRES()))
197     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
198 
199   if (Subtarget.hasFCPSGN()) {
200     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
201     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
202   } else {
203     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
204     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
205   }
206 
207   if (Subtarget.hasFPRND()) {
208     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
209     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
210     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
211     setOperationAction(ISD::FROUND, MVT::f64, Legal);
212 
213     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
214     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
215     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
216     setOperationAction(ISD::FROUND, MVT::f32, Legal);
217   }
218 
219   // PowerPC does not have BSWAP
220   // CTPOP or CTTZ were introduced in P8/P9 respectivelly
221   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
222   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
223   if (Subtarget.isISA3_0()) {
224     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
225     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
226   } else {
227     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
228     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
229   }
230 
231   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
232     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
233     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
234   } else {
235     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
236     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
237   }
238 
239   // PowerPC does not have ROTR
240   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
241   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
242 
243   if (!Subtarget.useCRBits()) {
244     // PowerPC does not have Select
245     setOperationAction(ISD::SELECT, MVT::i32, Expand);
246     setOperationAction(ISD::SELECT, MVT::i64, Expand);
247     setOperationAction(ISD::SELECT, MVT::f32, Expand);
248     setOperationAction(ISD::SELECT, MVT::f64, Expand);
249   }
250 
251   // PowerPC wants to turn select_cc of FP into fsel when possible.
252   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
253   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
254 
255   // PowerPC wants to optimize integer setcc a bit
256   if (!Subtarget.useCRBits())
257     setOperationAction(ISD::SETCC, MVT::i32, Custom);
258 
259   // PowerPC does not have BRCOND which requires SetCC
260   if (!Subtarget.useCRBits())
261     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
262 
263   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
264 
265   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
266   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
267 
268   // PowerPC does not have [U|S]INT_TO_FP
269   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
270   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
271 
272   if (Subtarget.hasDirectMove() && isPPC64) {
273     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
274     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
275     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
276     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
277   } else {
278     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
279     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
280     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
281     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
282   }
283 
284   // We cannot sextinreg(i1).  Expand to shifts.
285   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
286 
287   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
288   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
289   // support continuation, user-level threading, and etc.. As a result, no
290   // other SjLj exception interfaces are implemented and please don't build
291   // your own exception handling based on them.
292   // LLVM/Clang supports zero-cost DWARF exception handling.
293   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
294   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
295 
296   // We want to legalize GlobalAddress and ConstantPool nodes into the
297   // appropriate instructions to materialize the address.
298   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
299   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
300   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
301   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
302   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
303   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
304   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
305   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
306   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
307   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
308 
309   // TRAP is legal.
310   setOperationAction(ISD::TRAP, MVT::Other, Legal);
311 
312   // TRAMPOLINE is custom lowered.
313   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
314   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
315 
316   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
317   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
318 
319   if (Subtarget.isSVR4ABI()) {
320     if (isPPC64) {
321       // VAARG always uses double-word chunks, so promote anything smaller.
322       setOperationAction(ISD::VAARG, MVT::i1, Promote);
323       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
324       setOperationAction(ISD::VAARG, MVT::i8, Promote);
325       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
326       setOperationAction(ISD::VAARG, MVT::i16, Promote);
327       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
328       setOperationAction(ISD::VAARG, MVT::i32, Promote);
329       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
330       setOperationAction(ISD::VAARG, MVT::Other, Expand);
331     } else {
332       // VAARG is custom lowered with the 32-bit SVR4 ABI.
333       setOperationAction(ISD::VAARG, MVT::Other, Custom);
334       setOperationAction(ISD::VAARG, MVT::i64, Custom);
335     }
336   } else
337     setOperationAction(ISD::VAARG, MVT::Other, Expand);
338 
339   if (Subtarget.isSVR4ABI() && !isPPC64)
340     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
341     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
342   else
343     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
344 
345   // Use the default implementation.
346   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
347   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
348   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
349   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
350   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
351   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
352   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
353   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
354   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
355 
356   // We want to custom lower some of our intrinsics.
357   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
358 
359   // To handle counter-based loop conditions.
360   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
361 
362   // Comparisons that require checking two conditions.
363   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
364   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
365   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
366   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
367   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
368   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
369   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
370   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
371   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
372   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
373   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
374   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
375 
376   if (Subtarget.has64BitSupport()) {
377     // They also have instructions for converting between i64 and fp.
378     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
379     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
380     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
381     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
382     // This is just the low 32 bits of a (signed) fp->i64 conversion.
383     // We cannot do this with Promote because i64 is not a legal type.
384     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
385 
386     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
387       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
388   } else {
389     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
390     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
391   }
392 
393   // With the instructions enabled under FPCVT, we can do everything.
394   if (Subtarget.hasFPCVT()) {
395     if (Subtarget.has64BitSupport()) {
396       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
397       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
398       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
399       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
400     }
401 
402     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
403     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
404     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
405     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
406   }
407 
408   if (Subtarget.use64BitRegs()) {
409     // 64-bit PowerPC implementations can support i64 types directly
410     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
411     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
412     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
413     // 64-bit PowerPC wants to expand i128 shifts itself.
414     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
415     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
416     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
417   } else {
418     // 32-bit PowerPC wants to expand i64 shifts itself.
419     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
420     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
421     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
422   }
423 
424   if (Subtarget.hasAltivec()) {
425     // First set operation action for all vector types to expand. Then we
426     // will selectively turn on ones that can be effectively codegen'd.
427     for (MVT VT : MVT::vector_valuetypes()) {
428       // add/sub are legal for all supported vector VT's.
429       setOperationAction(ISD::ADD, VT, Legal);
430       setOperationAction(ISD::SUB, VT, Legal);
431 
432       // Vector instructions introduced in P8
433       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
434         setOperationAction(ISD::CTPOP, VT, Legal);
435         setOperationAction(ISD::CTLZ, VT, Legal);
436       }
437       else {
438         setOperationAction(ISD::CTPOP, VT, Expand);
439         setOperationAction(ISD::CTLZ, VT, Expand);
440       }
441 
442       // Vector instructions introduced in P9
443       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
444         setOperationAction(ISD::CTTZ, VT, Legal);
445       else
446         setOperationAction(ISD::CTTZ, VT, Expand);
447 
448       // We promote all shuffles to v16i8.
449       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
450       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
451 
452       // We promote all non-typed operations to v4i32.
453       setOperationAction(ISD::AND   , VT, Promote);
454       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
455       setOperationAction(ISD::OR    , VT, Promote);
456       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
457       setOperationAction(ISD::XOR   , VT, Promote);
458       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
459       setOperationAction(ISD::LOAD  , VT, Promote);
460       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
461       setOperationAction(ISD::SELECT, VT, Promote);
462       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
463       setOperationAction(ISD::SELECT_CC, VT, Promote);
464       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
465       setOperationAction(ISD::STORE, VT, Promote);
466       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
467 
468       // No other operations are legal.
469       setOperationAction(ISD::MUL , VT, Expand);
470       setOperationAction(ISD::SDIV, VT, Expand);
471       setOperationAction(ISD::SREM, VT, Expand);
472       setOperationAction(ISD::UDIV, VT, Expand);
473       setOperationAction(ISD::UREM, VT, Expand);
474       setOperationAction(ISD::FDIV, VT, Expand);
475       setOperationAction(ISD::FREM, VT, Expand);
476       setOperationAction(ISD::FNEG, VT, Expand);
477       setOperationAction(ISD::FSQRT, VT, Expand);
478       setOperationAction(ISD::FLOG, VT, Expand);
479       setOperationAction(ISD::FLOG10, VT, Expand);
480       setOperationAction(ISD::FLOG2, VT, Expand);
481       setOperationAction(ISD::FEXP, VT, Expand);
482       setOperationAction(ISD::FEXP2, VT, Expand);
483       setOperationAction(ISD::FSIN, VT, Expand);
484       setOperationAction(ISD::FCOS, VT, Expand);
485       setOperationAction(ISD::FABS, VT, Expand);
486       setOperationAction(ISD::FPOWI, VT, Expand);
487       setOperationAction(ISD::FFLOOR, VT, Expand);
488       setOperationAction(ISD::FCEIL,  VT, Expand);
489       setOperationAction(ISD::FTRUNC, VT, Expand);
490       setOperationAction(ISD::FRINT,  VT, Expand);
491       setOperationAction(ISD::FNEARBYINT, VT, Expand);
492       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
493       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
494       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
495       setOperationAction(ISD::MULHU, VT, Expand);
496       setOperationAction(ISD::MULHS, VT, Expand);
497       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
498       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
499       setOperationAction(ISD::UDIVREM, VT, Expand);
500       setOperationAction(ISD::SDIVREM, VT, Expand);
501       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
502       setOperationAction(ISD::FPOW, VT, Expand);
503       setOperationAction(ISD::BSWAP, VT, Expand);
504       setOperationAction(ISD::VSELECT, VT, Expand);
505       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
506       setOperationAction(ISD::ROTL, VT, Expand);
507       setOperationAction(ISD::ROTR, VT, Expand);
508 
509       for (MVT InnerVT : MVT::vector_valuetypes()) {
510         setTruncStoreAction(VT, InnerVT, Expand);
511         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
512         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
513         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
514       }
515     }
516 
517     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
518     // with merges, splats, etc.
519     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
520 
521     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
522     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
523     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
524     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
525     setOperationAction(ISD::SELECT, MVT::v4i32,
526                        Subtarget.useCRBits() ? Legal : Expand);
527     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
528     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
529     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
530     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
531     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
532     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
533     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
534     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
535     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
536 
537     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
538     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
539     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
540     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
541 
542     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
543     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
544 
545     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
546       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
547       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
548     }
549 
550     if (Subtarget.hasP8Altivec())
551       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
552     else
553       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
554 
555     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
556     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
557 
558     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
559     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
560 
561     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
562     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
563     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
564     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
565     if (Subtarget.hasP8Altivec())
566       setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
567     if (Subtarget.hasVSX())
568       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
569 
570     // Altivec does not contain unordered floating-point compare instructions
571     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
572     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
573     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
574     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
575 
576     if (Subtarget.hasVSX()) {
577       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
578       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
579       if (Subtarget.hasP8Vector()) {
580         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
581         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
582       }
583       if (Subtarget.hasDirectMove() && isPPC64) {
584         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
585         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
586         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
587         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
588         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
589         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
590         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
591         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
592       }
593       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
594 
595       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
596       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
597       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
598       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
599       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
600 
601       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
602 
603       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
604       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
605 
606       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
607       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
608 
609       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
610       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
611       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
612       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
613       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
614 
615       // Share the Altivec comparison restrictions.
616       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
617       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
618       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
619       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
620 
621       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
622       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
623 
624       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
625 
626       if (Subtarget.hasP8Vector())
627         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
628 
629       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
630 
631       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
632       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
633       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
634 
635       if (Subtarget.hasP8Altivec()) {
636         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
637         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
638         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
639 
640         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
641       }
642       else {
643         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
644         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
645         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
646 
647         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
648 
649         // VSX v2i64 only supports non-arithmetic operations.
650         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
651         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
652       }
653 
654       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
655       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
656       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
657       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
658 
659       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
660 
661       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
662       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
663       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
664       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
665 
666       // Vector operation legalization checks the result type of
667       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
668       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
669       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
670       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
671       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
672 
673       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
674       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
675       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
676       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
677 
678       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
679     }
680 
681     if (Subtarget.hasP8Altivec()) {
682       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
683       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
684     }
685 
686     if (Subtarget.hasP9Vector()) {
687       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
688       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
689     }
690 
691     if (Subtarget.isISA3_0() && Subtarget.hasDirectMove())
692       setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
693   }
694 
695   if (Subtarget.hasQPX()) {
696     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
697     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
698     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
699     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
700 
701     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
702     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
703 
704     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
705     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
706 
707     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
708     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
709 
710     if (!Subtarget.useCRBits())
711       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
712     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
713 
714     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
715     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
716     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
717     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
718     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
719     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
720     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
721 
722     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
723     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
724 
725     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
726     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
727     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
728 
729     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
730     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
731     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
732     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
733     setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
734     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
735     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
736     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
737     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
738     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
739     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
740 
741     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
742     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
743 
744     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
745     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
746 
747     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
748 
749     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
750     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
751     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
752     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
753 
754     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
755     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
756 
757     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
758     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
759 
760     if (!Subtarget.useCRBits())
761       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
762     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
763 
764     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
765     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
766     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
767     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
768     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
769     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
770     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
771 
772     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
773     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
774 
775     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
776     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
777     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
778     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
779     setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
780     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
781     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
782     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
783     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
784     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
785     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
786 
787     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
788     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
789 
790     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
791     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
792 
793     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
794 
795     setOperationAction(ISD::AND , MVT::v4i1, Legal);
796     setOperationAction(ISD::OR , MVT::v4i1, Legal);
797     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
798 
799     if (!Subtarget.useCRBits())
800       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
801     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
802 
803     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
804     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
805 
806     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
807     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
808     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
809     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
810     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
811     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
812     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
813 
814     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
815     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
816 
817     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
818 
819     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
820     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
821     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
822     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
823 
824     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
825     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
826     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
827     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
828 
829     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
830     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
831 
832     // These need to set FE_INEXACT, and so cannot be vectorized here.
833     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
834     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
835 
836     if (TM.Options.UnsafeFPMath) {
837       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
838       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
839 
840       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
841       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
842     } else {
843       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
844       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
845 
846       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
847       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
848     }
849   }
850 
851   if (Subtarget.has64BitSupport())
852     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
853 
854   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
855 
856   if (!isPPC64) {
857     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
858     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
859   }
860 
861   setBooleanContents(ZeroOrOneBooleanContent);
862 
863   if (Subtarget.hasAltivec()) {
864     // Altivec instructions set fields to all zeros or all ones.
865     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
866   }
867 
868   if (!isPPC64) {
869     // These libcalls are not available in 32-bit.
870     setLibcallName(RTLIB::SHL_I128, nullptr);
871     setLibcallName(RTLIB::SRL_I128, nullptr);
872     setLibcallName(RTLIB::SRA_I128, nullptr);
873   }
874 
875   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
876 
877   // We have target-specific dag combine patterns for the following nodes:
878   setTargetDAGCombine(ISD::SINT_TO_FP);
879   setTargetDAGCombine(ISD::BUILD_VECTOR);
880   if (Subtarget.hasFPCVT())
881     setTargetDAGCombine(ISD::UINT_TO_FP);
882   setTargetDAGCombine(ISD::LOAD);
883   setTargetDAGCombine(ISD::STORE);
884   setTargetDAGCombine(ISD::BR_CC);
885   if (Subtarget.useCRBits())
886     setTargetDAGCombine(ISD::BRCOND);
887   setTargetDAGCombine(ISD::BSWAP);
888   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
889   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
890   setTargetDAGCombine(ISD::INTRINSIC_VOID);
891 
892   setTargetDAGCombine(ISD::SIGN_EXTEND);
893   setTargetDAGCombine(ISD::ZERO_EXTEND);
894   setTargetDAGCombine(ISD::ANY_EXTEND);
895 
896   if (Subtarget.useCRBits()) {
897     setTargetDAGCombine(ISD::TRUNCATE);
898     setTargetDAGCombine(ISD::SETCC);
899     setTargetDAGCombine(ISD::SELECT_CC);
900   }
901 
902   // Use reciprocal estimates.
903   if (TM.Options.UnsafeFPMath) {
904     setTargetDAGCombine(ISD::FDIV);
905     setTargetDAGCombine(ISD::FSQRT);
906   }
907 
908   // Darwin long double math library functions have $LDBL128 appended.
909   if (Subtarget.isDarwin()) {
910     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
911     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
912     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
913     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
914     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
915     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
916     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
917     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
918     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
919     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
920   }
921 
922   // With 32 condition bits, we don't need to sink (and duplicate) compares
923   // aggressively in CodeGenPrep.
924   if (Subtarget.useCRBits()) {
925     setHasMultipleConditionRegisters();
926     setJumpIsExpensive();
927   }
928 
929   setMinFunctionAlignment(2);
930   if (Subtarget.isDarwin())
931     setPrefFunctionAlignment(4);
932 
933   switch (Subtarget.getDarwinDirective()) {
934   default: break;
935   case PPC::DIR_970:
936   case PPC::DIR_A2:
937   case PPC::DIR_E500mc:
938   case PPC::DIR_E5500:
939   case PPC::DIR_PWR4:
940   case PPC::DIR_PWR5:
941   case PPC::DIR_PWR5X:
942   case PPC::DIR_PWR6:
943   case PPC::DIR_PWR6X:
944   case PPC::DIR_PWR7:
945   case PPC::DIR_PWR8:
946   case PPC::DIR_PWR9:
947     setPrefFunctionAlignment(4);
948     setPrefLoopAlignment(4);
949     break;
950   }
951 
952   if (Subtarget.enableMachineScheduler())
953     setSchedulingPreference(Sched::Source);
954   else
955     setSchedulingPreference(Sched::Hybrid);
956 
957   computeRegisterProperties(STI.getRegisterInfo());
958 
959   // The Freescale cores do better with aggressive inlining of memcpy and
960   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
961   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
962       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
963     MaxStoresPerMemset = 32;
964     MaxStoresPerMemsetOptSize = 16;
965     MaxStoresPerMemcpy = 32;
966     MaxStoresPerMemcpyOptSize = 8;
967     MaxStoresPerMemmove = 32;
968     MaxStoresPerMemmoveOptSize = 8;
969   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
970     // The A2 also benefits from (very) aggressive inlining of memcpy and
971     // friends. The overhead of a the function call, even when warm, can be
972     // over one hundred cycles.
973     MaxStoresPerMemset = 128;
974     MaxStoresPerMemcpy = 128;
975     MaxStoresPerMemmove = 128;
976   }
977 }
978 
979 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
980 /// the desired ByVal argument alignment.
981 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
982                              unsigned MaxMaxAlign) {
983   if (MaxAlign == MaxMaxAlign)
984     return;
985   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
986     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
987       MaxAlign = 32;
988     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
989       MaxAlign = 16;
990   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
991     unsigned EltAlign = 0;
992     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
993     if (EltAlign > MaxAlign)
994       MaxAlign = EltAlign;
995   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
996     for (auto *EltTy : STy->elements()) {
997       unsigned EltAlign = 0;
998       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
999       if (EltAlign > MaxAlign)
1000         MaxAlign = EltAlign;
1001       if (MaxAlign == MaxMaxAlign)
1002         break;
1003     }
1004   }
1005 }
1006 
1007 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1008 /// function arguments in the caller parameter area.
1009 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1010                                                   const DataLayout &DL) const {
1011   // Darwin passes everything on 4 byte boundary.
1012   if (Subtarget.isDarwin())
1013     return 4;
1014 
1015   // 16byte and wider vectors are passed on 16byte boundary.
1016   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1017   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1018   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1019     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1020   return Align;
1021 }
1022 
1023 bool PPCTargetLowering::useSoftFloat() const {
1024   return Subtarget.useSoftFloat();
1025 }
1026 
1027 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1028   switch ((PPCISD::NodeType)Opcode) {
1029   case PPCISD::FIRST_NUMBER:    break;
1030   case PPCISD::FSEL:            return "PPCISD::FSEL";
1031   case PPCISD::FCFID:           return "PPCISD::FCFID";
1032   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1033   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1034   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1035   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1036   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1037   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1038   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1039   case PPCISD::FRE:             return "PPCISD::FRE";
1040   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1041   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1042   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1043   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1044   case PPCISD::VPERM:           return "PPCISD::VPERM";
1045   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1046   case PPCISD::XXINSERT:        return "PPCISD::XXINSERT";
1047   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1048   case PPCISD::CMPB:            return "PPCISD::CMPB";
1049   case PPCISD::Hi:              return "PPCISD::Hi";
1050   case PPCISD::Lo:              return "PPCISD::Lo";
1051   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1052   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1053   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1054   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1055   case PPCISD::SRL:             return "PPCISD::SRL";
1056   case PPCISD::SRA:             return "PPCISD::SRA";
1057   case PPCISD::SHL:             return "PPCISD::SHL";
1058   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1059   case PPCISD::CALL:            return "PPCISD::CALL";
1060   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1061   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1062   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1063   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1064   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1065   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1066   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1067   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1068   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1069   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1070   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1071   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1072   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1073   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1074   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1075   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1076   case PPCISD::VCMP:            return "PPCISD::VCMP";
1077   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1078   case PPCISD::LBRX:            return "PPCISD::LBRX";
1079   case PPCISD::STBRX:           return "PPCISD::STBRX";
1080   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1081   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1082   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1083   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1084   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1085   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1086   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1087   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1088   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1089   case PPCISD::BDZ:             return "PPCISD::BDZ";
1090   case PPCISD::MFFS:            return "PPCISD::MFFS";
1091   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1092   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1093   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1094   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1095   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1096   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1097   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1098   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1099   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1100   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1101   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1102   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1103   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1104   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1105   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1106   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1107   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1108   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1109   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1110   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1111   case PPCISD::SC:              return "PPCISD::SC";
1112   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1113   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1114   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1115   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1116   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1117   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1118   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1119   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1120   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1121   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1122   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1123   }
1124   return nullptr;
1125 }
1126 
1127 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1128                                           EVT VT) const {
1129   if (!VT.isVector())
1130     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1131 
1132   if (Subtarget.hasQPX())
1133     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1134 
1135   return VT.changeVectorElementTypeToInteger();
1136 }
1137 
1138 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1139   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1140   return true;
1141 }
1142 
1143 //===----------------------------------------------------------------------===//
1144 // Node matching predicates, for use by the tblgen matching code.
1145 //===----------------------------------------------------------------------===//
1146 
1147 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1148 static bool isFloatingPointZero(SDValue Op) {
1149   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1150     return CFP->getValueAPF().isZero();
1151   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1152     // Maybe this has already been legalized into the constant pool?
1153     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1154       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1155         return CFP->getValueAPF().isZero();
1156   }
1157   return false;
1158 }
1159 
1160 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1161 /// true if Op is undef or if it matches the specified value.
1162 static bool isConstantOrUndef(int Op, int Val) {
1163   return Op < 0 || Op == Val;
1164 }
1165 
1166 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1167 /// VPKUHUM instruction.
1168 /// The ShuffleKind distinguishes between big-endian operations with
1169 /// two different inputs (0), either-endian operations with two identical
1170 /// inputs (1), and little-endian operations with two different inputs (2).
1171 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1172 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1173                                SelectionDAG &DAG) {
1174   bool IsLE = DAG.getDataLayout().isLittleEndian();
1175   if (ShuffleKind == 0) {
1176     if (IsLE)
1177       return false;
1178     for (unsigned i = 0; i != 16; ++i)
1179       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1180         return false;
1181   } else if (ShuffleKind == 2) {
1182     if (!IsLE)
1183       return false;
1184     for (unsigned i = 0; i != 16; ++i)
1185       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1186         return false;
1187   } else if (ShuffleKind == 1) {
1188     unsigned j = IsLE ? 0 : 1;
1189     for (unsigned i = 0; i != 8; ++i)
1190       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1191           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1192         return false;
1193   }
1194   return true;
1195 }
1196 
1197 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1198 /// VPKUWUM instruction.
1199 /// The ShuffleKind distinguishes between big-endian operations with
1200 /// two different inputs (0), either-endian operations with two identical
1201 /// inputs (1), and little-endian operations with two different inputs (2).
1202 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1203 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1204                                SelectionDAG &DAG) {
1205   bool IsLE = DAG.getDataLayout().isLittleEndian();
1206   if (ShuffleKind == 0) {
1207     if (IsLE)
1208       return false;
1209     for (unsigned i = 0; i != 16; i += 2)
1210       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1211           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1212         return false;
1213   } else if (ShuffleKind == 2) {
1214     if (!IsLE)
1215       return false;
1216     for (unsigned i = 0; i != 16; i += 2)
1217       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1218           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1219         return false;
1220   } else if (ShuffleKind == 1) {
1221     unsigned j = IsLE ? 0 : 2;
1222     for (unsigned i = 0; i != 8; i += 2)
1223       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1224           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1225           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1226           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1227         return false;
1228   }
1229   return true;
1230 }
1231 
1232 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1233 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1234 /// current subtarget.
1235 ///
1236 /// The ShuffleKind distinguishes between big-endian operations with
1237 /// two different inputs (0), either-endian operations with two identical
1238 /// inputs (1), and little-endian operations with two different inputs (2).
1239 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1240 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1241                                SelectionDAG &DAG) {
1242   const PPCSubtarget& Subtarget =
1243     static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1244   if (!Subtarget.hasP8Vector())
1245     return false;
1246 
1247   bool IsLE = DAG.getDataLayout().isLittleEndian();
1248   if (ShuffleKind == 0) {
1249     if (IsLE)
1250       return false;
1251     for (unsigned i = 0; i != 16; i += 4)
1252       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1253           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1254           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1255           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1256         return false;
1257   } else if (ShuffleKind == 2) {
1258     if (!IsLE)
1259       return false;
1260     for (unsigned i = 0; i != 16; i += 4)
1261       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1262           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1263           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1264           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1265         return false;
1266   } else if (ShuffleKind == 1) {
1267     unsigned j = IsLE ? 0 : 4;
1268     for (unsigned i = 0; i != 8; i += 4)
1269       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1270           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1271           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1272           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1273           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1274           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1275           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1276           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1277         return false;
1278   }
1279   return true;
1280 }
1281 
1282 /// isVMerge - Common function, used to match vmrg* shuffles.
1283 ///
1284 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1285                      unsigned LHSStart, unsigned RHSStart) {
1286   if (N->getValueType(0) != MVT::v16i8)
1287     return false;
1288   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1289          "Unsupported merge size!");
1290 
1291   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1292     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1293       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1294                              LHSStart+j+i*UnitSize) ||
1295           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1296                              RHSStart+j+i*UnitSize))
1297         return false;
1298     }
1299   return true;
1300 }
1301 
1302 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1303 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1304 /// The ShuffleKind distinguishes between big-endian merges with two
1305 /// different inputs (0), either-endian merges with two identical inputs (1),
1306 /// and little-endian merges with two different inputs (2).  For the latter,
1307 /// the input operands are swapped (see PPCInstrAltivec.td).
1308 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1309                              unsigned ShuffleKind, SelectionDAG &DAG) {
1310   if (DAG.getDataLayout().isLittleEndian()) {
1311     if (ShuffleKind == 1) // unary
1312       return isVMerge(N, UnitSize, 0, 0);
1313     else if (ShuffleKind == 2) // swapped
1314       return isVMerge(N, UnitSize, 0, 16);
1315     else
1316       return false;
1317   } else {
1318     if (ShuffleKind == 1) // unary
1319       return isVMerge(N, UnitSize, 8, 8);
1320     else if (ShuffleKind == 0) // normal
1321       return isVMerge(N, UnitSize, 8, 24);
1322     else
1323       return false;
1324   }
1325 }
1326 
1327 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1328 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1329 /// The ShuffleKind distinguishes between big-endian merges with two
1330 /// different inputs (0), either-endian merges with two identical inputs (1),
1331 /// and little-endian merges with two different inputs (2).  For the latter,
1332 /// the input operands are swapped (see PPCInstrAltivec.td).
1333 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1334                              unsigned ShuffleKind, SelectionDAG &DAG) {
1335   if (DAG.getDataLayout().isLittleEndian()) {
1336     if (ShuffleKind == 1) // unary
1337       return isVMerge(N, UnitSize, 8, 8);
1338     else if (ShuffleKind == 2) // swapped
1339       return isVMerge(N, UnitSize, 8, 24);
1340     else
1341       return false;
1342   } else {
1343     if (ShuffleKind == 1) // unary
1344       return isVMerge(N, UnitSize, 0, 0);
1345     else if (ShuffleKind == 0) // normal
1346       return isVMerge(N, UnitSize, 0, 16);
1347     else
1348       return false;
1349   }
1350 }
1351 
1352 /**
1353  * \brief Common function used to match vmrgew and vmrgow shuffles
1354  *
1355  * The indexOffset determines whether to look for even or odd words in
1356  * the shuffle mask. This is based on the of the endianness of the target
1357  * machine.
1358  *   - Little Endian:
1359  *     - Use offset of 0 to check for odd elements
1360  *     - Use offset of 4 to check for even elements
1361  *   - Big Endian:
1362  *     - Use offset of 0 to check for even elements
1363  *     - Use offset of 4 to check for odd elements
1364  * A detailed description of the vector element ordering for little endian and
1365  * big endian can be found at
1366  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1367  * Targeting your applications - what little endian and big endian IBM XL C/C++
1368  * compiler differences mean to you
1369  *
1370  * The mask to the shuffle vector instruction specifies the indices of the
1371  * elements from the two input vectors to place in the result. The elements are
1372  * numbered in array-access order, starting with the first vector. These vectors
1373  * are always of type v16i8, thus each vector will contain 16 elements of size
1374  * 8. More info on the shuffle vector can be found in the
1375  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1376  * Language Reference.
1377  *
1378  * The RHSStartValue indicates whether the same input vectors are used (unary)
1379  * or two different input vectors are used, based on the following:
1380  *   - If the instruction uses the same vector for both inputs, the range of the
1381  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1382  *     be 0.
1383  *   - If the instruction has two different vectors then the range of the
1384  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1385  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1386  *     to 31 specify elements in the second vector).
1387  *
1388  * \param[in] N The shuffle vector SD Node to analyze
1389  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1390  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1391  * vector to the shuffle_vector instruction
1392  * \return true iff this shuffle vector represents an even or odd word merge
1393  */
1394 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1395                      unsigned RHSStartValue) {
1396   if (N->getValueType(0) != MVT::v16i8)
1397     return false;
1398 
1399   for (unsigned i = 0; i < 2; ++i)
1400     for (unsigned j = 0; j < 4; ++j)
1401       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1402                              i*RHSStartValue+j+IndexOffset) ||
1403           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1404                              i*RHSStartValue+j+IndexOffset+8))
1405         return false;
1406   return true;
1407 }
1408 
1409 /**
1410  * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
1411  * vmrgow instructions.
1412  *
1413  * \param[in] N The shuffle vector SD Node to analyze
1414  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1415  * \param[in] ShuffleKind Identify the type of merge:
1416  *   - 0 = big-endian merge with two different inputs;
1417  *   - 1 = either-endian merge with two identical inputs;
1418  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1419  *     little-endian merges).
1420  * \param[in] DAG The current SelectionDAG
1421  * \return true iff this shuffle mask
1422  */
1423 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1424                               unsigned ShuffleKind, SelectionDAG &DAG) {
1425   if (DAG.getDataLayout().isLittleEndian()) {
1426     unsigned indexOffset = CheckEven ? 4 : 0;
1427     if (ShuffleKind == 1) // Unary
1428       return isVMerge(N, indexOffset, 0);
1429     else if (ShuffleKind == 2) // swapped
1430       return isVMerge(N, indexOffset, 16);
1431     else
1432       return false;
1433   }
1434   else {
1435     unsigned indexOffset = CheckEven ? 0 : 4;
1436     if (ShuffleKind == 1) // Unary
1437       return isVMerge(N, indexOffset, 0);
1438     else if (ShuffleKind == 0) // Normal
1439       return isVMerge(N, indexOffset, 16);
1440     else
1441       return false;
1442   }
1443   return false;
1444 }
1445 
1446 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1447 /// amount, otherwise return -1.
1448 /// The ShuffleKind distinguishes between big-endian operations with two
1449 /// different inputs (0), either-endian operations with two identical inputs
1450 /// (1), and little-endian operations with two different inputs (2).  For the
1451 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1452 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1453                              SelectionDAG &DAG) {
1454   if (N->getValueType(0) != MVT::v16i8)
1455     return -1;
1456 
1457   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1458 
1459   // Find the first non-undef value in the shuffle mask.
1460   unsigned i;
1461   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1462     /*search*/;
1463 
1464   if (i == 16) return -1;  // all undef.
1465 
1466   // Otherwise, check to see if the rest of the elements are consecutively
1467   // numbered from this value.
1468   unsigned ShiftAmt = SVOp->getMaskElt(i);
1469   if (ShiftAmt < i) return -1;
1470 
1471   ShiftAmt -= i;
1472   bool isLE = DAG.getDataLayout().isLittleEndian();
1473 
1474   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1475     // Check the rest of the elements to see if they are consecutive.
1476     for (++i; i != 16; ++i)
1477       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1478         return -1;
1479   } else if (ShuffleKind == 1) {
1480     // Check the rest of the elements to see if they are consecutive.
1481     for (++i; i != 16; ++i)
1482       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1483         return -1;
1484   } else
1485     return -1;
1486 
1487   if (isLE)
1488     ShiftAmt = 16 - ShiftAmt;
1489 
1490   return ShiftAmt;
1491 }
1492 
1493 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1494 /// specifies a splat of a single element that is suitable for input to
1495 /// VSPLTB/VSPLTH/VSPLTW.
1496 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1497   assert(N->getValueType(0) == MVT::v16i8 &&
1498          (EltSize == 1 || EltSize == 2 || EltSize == 4));
1499 
1500   // The consecutive indices need to specify an element, not part of two
1501   // different elements.  So abandon ship early if this isn't the case.
1502   if (N->getMaskElt(0) % EltSize != 0)
1503     return false;
1504 
1505   // This is a splat operation if each element of the permute is the same, and
1506   // if the value doesn't reference the second vector.
1507   unsigned ElementBase = N->getMaskElt(0);
1508 
1509   // FIXME: Handle UNDEF elements too!
1510   if (ElementBase >= 16)
1511     return false;
1512 
1513   // Check that the indices are consecutive, in the case of a multi-byte element
1514   // splatted with a v16i8 mask.
1515   for (unsigned i = 1; i != EltSize; ++i)
1516     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1517       return false;
1518 
1519   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1520     if (N->getMaskElt(i) < 0) continue;
1521     for (unsigned j = 0; j != EltSize; ++j)
1522       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1523         return false;
1524   }
1525   return true;
1526 }
1527 
1528 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1529                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1530 
1531   // Check that the mask is shuffling words
1532   for (unsigned i = 0; i < 4; ++i) {
1533     unsigned B0 = N->getMaskElt(i*4);
1534     unsigned B1 = N->getMaskElt(i*4+1);
1535     unsigned B2 = N->getMaskElt(i*4+2);
1536     unsigned B3 = N->getMaskElt(i*4+3);
1537     if (B0 % 4)
1538       return false;
1539     if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
1540       return false;
1541   }
1542 
1543   // Now we look at mask elements 0,4,8,12
1544   unsigned M0 = N->getMaskElt(0) / 4;
1545   unsigned M1 = N->getMaskElt(4) / 4;
1546   unsigned M2 = N->getMaskElt(8) / 4;
1547   unsigned M3 = N->getMaskElt(12) / 4;
1548   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1549   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1550 
1551   // Below, let H and L be arbitrary elements of the shuffle mask
1552   // where H is in the range [4,7] and L is in the range [0,3].
1553   // H, 1, 2, 3 or L, 5, 6, 7
1554   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1555       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1556     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1557     InsertAtByte = IsLE ? 12 : 0;
1558     Swap = M0 < 4;
1559     return true;
1560   }
1561   // 0, H, 2, 3 or 4, L, 6, 7
1562   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1563       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1564     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1565     InsertAtByte = IsLE ? 8 : 4;
1566     Swap = M1 < 4;
1567     return true;
1568   }
1569   // 0, 1, H, 3 or 4, 5, L, 7
1570   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1571       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1572     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1573     InsertAtByte = IsLE ? 4 : 8;
1574     Swap = M2 < 4;
1575     return true;
1576   }
1577   // 0, 1, 2, H or 4, 5, 6, L
1578   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1579       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1580     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1581     InsertAtByte = IsLE ? 0 : 12;
1582     Swap = M3 < 4;
1583     return true;
1584   }
1585 
1586   // If both vector operands for the shuffle are the same vector, the mask will
1587   // contain only elements from the first one and the second one will be undef.
1588   if (N->getOperand(1).isUndef()) {
1589     ShiftElts = 0;
1590     Swap = true;
1591     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1592     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1593       InsertAtByte = IsLE ? 12 : 0;
1594       return true;
1595     }
1596     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1597       InsertAtByte = IsLE ? 8 : 4;
1598       return true;
1599     }
1600     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1601       InsertAtByte = IsLE ? 4 : 8;
1602       return true;
1603     }
1604     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1605       InsertAtByte = IsLE ? 0 : 12;
1606       return true;
1607     }
1608   }
1609 
1610   return false;
1611 }
1612 
1613 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1614 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1615 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1616                                 SelectionDAG &DAG) {
1617   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1618   assert(isSplatShuffleMask(SVOp, EltSize));
1619   if (DAG.getDataLayout().isLittleEndian())
1620     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1621   else
1622     return SVOp->getMaskElt(0) / EltSize;
1623 }
1624 
1625 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1626 /// by using a vspltis[bhw] instruction of the specified element size, return
1627 /// the constant being splatted.  The ByteSize field indicates the number of
1628 /// bytes of each element [124] -> [bhw].
1629 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1630   SDValue OpVal(nullptr, 0);
1631 
1632   // If ByteSize of the splat is bigger than the element size of the
1633   // build_vector, then we have a case where we are checking for a splat where
1634   // multiple elements of the buildvector are folded together into a single
1635   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1636   unsigned EltSize = 16/N->getNumOperands();
1637   if (EltSize < ByteSize) {
1638     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1639     SDValue UniquedVals[4];
1640     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1641 
1642     // See if all of the elements in the buildvector agree across.
1643     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1644       if (N->getOperand(i).isUndef()) continue;
1645       // If the element isn't a constant, bail fully out.
1646       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1647 
1648 
1649       if (!UniquedVals[i&(Multiple-1)].getNode())
1650         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1651       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1652         return SDValue();  // no match.
1653     }
1654 
1655     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1656     // either constant or undef values that are identical for each chunk.  See
1657     // if these chunks can form into a larger vspltis*.
1658 
1659     // Check to see if all of the leading entries are either 0 or -1.  If
1660     // neither, then this won't fit into the immediate field.
1661     bool LeadingZero = true;
1662     bool LeadingOnes = true;
1663     for (unsigned i = 0; i != Multiple-1; ++i) {
1664       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1665 
1666       LeadingZero &= isNullConstant(UniquedVals[i]);
1667       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
1668     }
1669     // Finally, check the least significant entry.
1670     if (LeadingZero) {
1671       if (!UniquedVals[Multiple-1].getNode())
1672         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
1673       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1674       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
1675         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1676     }
1677     if (LeadingOnes) {
1678       if (!UniquedVals[Multiple-1].getNode())
1679         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1680       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1681       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1682         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1683     }
1684 
1685     return SDValue();
1686   }
1687 
1688   // Check to see if this buildvec has a single non-undef value in its elements.
1689   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1690     if (N->getOperand(i).isUndef()) continue;
1691     if (!OpVal.getNode())
1692       OpVal = N->getOperand(i);
1693     else if (OpVal != N->getOperand(i))
1694       return SDValue();
1695   }
1696 
1697   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1698 
1699   unsigned ValSizeInBytes = EltSize;
1700   uint64_t Value = 0;
1701   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1702     Value = CN->getZExtValue();
1703   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1704     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1705     Value = FloatToBits(CN->getValueAPF().convertToFloat());
1706   }
1707 
1708   // If the splat value is larger than the element value, then we can never do
1709   // this splat.  The only case that we could fit the replicated bits into our
1710   // immediate field for would be zero, and we prefer to use vxor for it.
1711   if (ValSizeInBytes < ByteSize) return SDValue();
1712 
1713   // If the element value is larger than the splat value, check if it consists
1714   // of a repeated bit pattern of size ByteSize.
1715   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
1716     return SDValue();
1717 
1718   // Properly sign extend the value.
1719   int MaskVal = SignExtend32(Value, ByteSize * 8);
1720 
1721   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1722   if (MaskVal == 0) return SDValue();
1723 
1724   // Finally, if this value fits in a 5 bit sext field, return it
1725   if (SignExtend32<5>(MaskVal) == MaskVal)
1726     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
1727   return SDValue();
1728 }
1729 
1730 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1731 /// amount, otherwise return -1.
1732 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
1733   EVT VT = N->getValueType(0);
1734   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
1735     return -1;
1736 
1737   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1738 
1739   // Find the first non-undef value in the shuffle mask.
1740   unsigned i;
1741   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
1742     /*search*/;
1743 
1744   if (i == 4) return -1;  // all undef.
1745 
1746   // Otherwise, check to see if the rest of the elements are consecutively
1747   // numbered from this value.
1748   unsigned ShiftAmt = SVOp->getMaskElt(i);
1749   if (ShiftAmt < i) return -1;
1750   ShiftAmt -= i;
1751 
1752   // Check the rest of the elements to see if they are consecutive.
1753   for (++i; i != 4; ++i)
1754     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1755       return -1;
1756 
1757   return ShiftAmt;
1758 }
1759 
1760 //===----------------------------------------------------------------------===//
1761 //  Addressing Mode Selection
1762 //===----------------------------------------------------------------------===//
1763 
1764 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
1765 /// or 64-bit immediate, and if the value can be accurately represented as a
1766 /// sign extension from a 16-bit value.  If so, this returns true and the
1767 /// immediate.
1768 static bool isIntS16Immediate(SDNode *N, short &Imm) {
1769   if (!isa<ConstantSDNode>(N))
1770     return false;
1771 
1772   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
1773   if (N->getValueType(0) == MVT::i32)
1774     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
1775   else
1776     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
1777 }
1778 static bool isIntS16Immediate(SDValue Op, short &Imm) {
1779   return isIntS16Immediate(Op.getNode(), Imm);
1780 }
1781 
1782 /// SelectAddressRegReg - Given the specified addressed, check to see if it
1783 /// can be represented as an indexed [r+r] operation.  Returns false if it
1784 /// can be more efficiently represented with [r+imm].
1785 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
1786                                             SDValue &Index,
1787                                             SelectionDAG &DAG) const {
1788   short imm = 0;
1789   if (N.getOpcode() == ISD::ADD) {
1790     if (isIntS16Immediate(N.getOperand(1), imm))
1791       return false;    // r+i
1792     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
1793       return false;    // r+i
1794 
1795     Base = N.getOperand(0);
1796     Index = N.getOperand(1);
1797     return true;
1798   } else if (N.getOpcode() == ISD::OR) {
1799     if (isIntS16Immediate(N.getOperand(1), imm))
1800       return false;    // r+i can fold it if we can.
1801 
1802     // If this is an or of disjoint bitfields, we can codegen this as an add
1803     // (for better address arithmetic) if the LHS and RHS of the OR are provably
1804     // disjoint.
1805     APInt LHSKnownZero, LHSKnownOne;
1806     APInt RHSKnownZero, RHSKnownOne;
1807     DAG.computeKnownBits(N.getOperand(0),
1808                          LHSKnownZero, LHSKnownOne);
1809 
1810     if (LHSKnownZero.getBoolValue()) {
1811       DAG.computeKnownBits(N.getOperand(1),
1812                            RHSKnownZero, RHSKnownOne);
1813       // If all of the bits are known zero on the LHS or RHS, the add won't
1814       // carry.
1815       if (~(LHSKnownZero | RHSKnownZero) == 0) {
1816         Base = N.getOperand(0);
1817         Index = N.getOperand(1);
1818         return true;
1819       }
1820     }
1821   }
1822 
1823   return false;
1824 }
1825 
1826 // If we happen to be doing an i64 load or store into a stack slot that has
1827 // less than a 4-byte alignment, then the frame-index elimination may need to
1828 // use an indexed load or store instruction (because the offset may not be a
1829 // multiple of 4). The extra register needed to hold the offset comes from the
1830 // register scavenger, and it is possible that the scavenger will need to use
1831 // an emergency spill slot. As a result, we need to make sure that a spill slot
1832 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
1833 // stack slot.
1834 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
1835   // FIXME: This does not handle the LWA case.
1836   if (VT != MVT::i64)
1837     return;
1838 
1839   // NOTE: We'll exclude negative FIs here, which come from argument
1840   // lowering, because there are no known test cases triggering this problem
1841   // using packed structures (or similar). We can remove this exclusion if
1842   // we find such a test case. The reason why this is so test-case driven is
1843   // because this entire 'fixup' is only to prevent crashes (from the
1844   // register scavenger) on not-really-valid inputs. For example, if we have:
1845   //   %a = alloca i1
1846   //   %b = bitcast i1* %a to i64*
1847   //   store i64* a, i64 b
1848   // then the store should really be marked as 'align 1', but is not. If it
1849   // were marked as 'align 1' then the indexed form would have been
1850   // instruction-selected initially, and the problem this 'fixup' is preventing
1851   // won't happen regardless.
1852   if (FrameIdx < 0)
1853     return;
1854 
1855   MachineFunction &MF = DAG.getMachineFunction();
1856   MachineFrameInfo &MFI = MF.getFrameInfo();
1857 
1858   unsigned Align = MFI.getObjectAlignment(FrameIdx);
1859   if (Align >= 4)
1860     return;
1861 
1862   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1863   FuncInfo->setHasNonRISpills();
1864 }
1865 
1866 /// Returns true if the address N can be represented by a base register plus
1867 /// a signed 16-bit displacement [r+imm], and if it is not better
1868 /// represented as reg+reg.  If Aligned is true, only accept displacements
1869 /// suitable for STD and friends, i.e. multiples of 4.
1870 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
1871                                             SDValue &Base,
1872                                             SelectionDAG &DAG,
1873                                             bool Aligned) const {
1874   // FIXME dl should come from parent load or store, not from address
1875   SDLoc dl(N);
1876   // If this can be more profitably realized as r+r, fail.
1877   if (SelectAddressRegReg(N, Disp, Base, DAG))
1878     return false;
1879 
1880   if (N.getOpcode() == ISD::ADD) {
1881     short imm = 0;
1882     if (isIntS16Immediate(N.getOperand(1), imm) &&
1883         (!Aligned || (imm & 3) == 0)) {
1884       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1885       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1886         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1887         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1888       } else {
1889         Base = N.getOperand(0);
1890       }
1891       return true; // [r+i]
1892     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1893       // Match LOAD (ADD (X, Lo(G))).
1894       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1895              && "Cannot handle constant offsets yet!");
1896       Disp = N.getOperand(1).getOperand(0);  // The global address.
1897       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1898              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1899              Disp.getOpcode() == ISD::TargetConstantPool ||
1900              Disp.getOpcode() == ISD::TargetJumpTable);
1901       Base = N.getOperand(0);
1902       return true;  // [&g+r]
1903     }
1904   } else if (N.getOpcode() == ISD::OR) {
1905     short imm = 0;
1906     if (isIntS16Immediate(N.getOperand(1), imm) &&
1907         (!Aligned || (imm & 3) == 0)) {
1908       // If this is an or of disjoint bitfields, we can codegen this as an add
1909       // (for better address arithmetic) if the LHS and RHS of the OR are
1910       // provably disjoint.
1911       APInt LHSKnownZero, LHSKnownOne;
1912       DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1913 
1914       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1915         // If all of the bits are known zero on the LHS or RHS, the add won't
1916         // carry.
1917         if (FrameIndexSDNode *FI =
1918               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1919           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1920           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1921         } else {
1922           Base = N.getOperand(0);
1923         }
1924         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1925         return true;
1926       }
1927     }
1928   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1929     // Loading from a constant address.
1930 
1931     // If this address fits entirely in a 16-bit sext immediate field, codegen
1932     // this as "d, 0"
1933     short Imm;
1934     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
1935       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
1936       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1937                              CN->getValueType(0));
1938       return true;
1939     }
1940 
1941     // Handle 32-bit sext immediates with LIS + addr mode.
1942     if ((CN->getValueType(0) == MVT::i32 ||
1943          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
1944         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
1945       int Addr = (int)CN->getZExtValue();
1946 
1947       // Otherwise, break this down into an LIS + disp.
1948       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
1949 
1950       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
1951                                    MVT::i32);
1952       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1953       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1954       return true;
1955     }
1956   }
1957 
1958   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
1959   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
1960     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1961     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1962   } else
1963     Base = N;
1964   return true;      // [r+0]
1965 }
1966 
1967 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1968 /// represented as an indexed [r+r] operation.
1969 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
1970                                                 SDValue &Index,
1971                                                 SelectionDAG &DAG) const {
1972   // Check to see if we can easily represent this as an [r+r] address.  This
1973   // will fail if it thinks that the address is more profitably represented as
1974   // reg+imm, e.g. where imm = 0.
1975   if (SelectAddressRegReg(N, Base, Index, DAG))
1976     return true;
1977 
1978   // If the operand is an addition, always emit this as [r+r], since this is
1979   // better (for code size, and execution, as the memop does the add for free)
1980   // than emitting an explicit add.
1981   if (N.getOpcode() == ISD::ADD) {
1982     Base = N.getOperand(0);
1983     Index = N.getOperand(1);
1984     return true;
1985   }
1986 
1987   // Otherwise, do it the hard way, using R0 as the base register.
1988   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1989                          N.getValueType());
1990   Index = N;
1991   return true;
1992 }
1993 
1994 /// getPreIndexedAddressParts - returns true by value, base pointer and
1995 /// offset pointer and addressing mode by reference if the node's address
1996 /// can be legally represented as pre-indexed load / store address.
1997 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1998                                                   SDValue &Offset,
1999                                                   ISD::MemIndexedMode &AM,
2000                                                   SelectionDAG &DAG) const {
2001   if (DisablePPCPreinc) return false;
2002 
2003   bool isLoad = true;
2004   SDValue Ptr;
2005   EVT VT;
2006   unsigned Alignment;
2007   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2008     Ptr = LD->getBasePtr();
2009     VT = LD->getMemoryVT();
2010     Alignment = LD->getAlignment();
2011   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2012     Ptr = ST->getBasePtr();
2013     VT  = ST->getMemoryVT();
2014     Alignment = ST->getAlignment();
2015     isLoad = false;
2016   } else
2017     return false;
2018 
2019   // PowerPC doesn't have preinc load/store instructions for vectors (except
2020   // for QPX, which does have preinc r+r forms).
2021   if (VT.isVector()) {
2022     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2023       return false;
2024     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2025       AM = ISD::PRE_INC;
2026       return true;
2027     }
2028   }
2029 
2030   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2031 
2032     // Common code will reject creating a pre-inc form if the base pointer
2033     // is a frame index, or if N is a store and the base pointer is either
2034     // the same as or a predecessor of the value being stored.  Check for
2035     // those situations here, and try with swapped Base/Offset instead.
2036     bool Swap = false;
2037 
2038     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2039       Swap = true;
2040     else if (!isLoad) {
2041       SDValue Val = cast<StoreSDNode>(N)->getValue();
2042       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2043         Swap = true;
2044     }
2045 
2046     if (Swap)
2047       std::swap(Base, Offset);
2048 
2049     AM = ISD::PRE_INC;
2050     return true;
2051   }
2052 
2053   // LDU/STU can only handle immediates that are a multiple of 4.
2054   if (VT != MVT::i64) {
2055     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
2056       return false;
2057   } else {
2058     // LDU/STU need an address with at least 4-byte alignment.
2059     if (Alignment < 4)
2060       return false;
2061 
2062     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
2063       return false;
2064   }
2065 
2066   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2067     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2068     // sext i32 to i64 when addr mode is r+i.
2069     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2070         LD->getExtensionType() == ISD::SEXTLOAD &&
2071         isa<ConstantSDNode>(Offset))
2072       return false;
2073   }
2074 
2075   AM = ISD::PRE_INC;
2076   return true;
2077 }
2078 
2079 //===----------------------------------------------------------------------===//
2080 //  LowerOperation implementation
2081 //===----------------------------------------------------------------------===//
2082 
2083 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2084 /// and LoOpFlags to the target MO flags.
2085 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2086                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2087                                const GlobalValue *GV = nullptr) {
2088   HiOpFlags = PPCII::MO_HA;
2089   LoOpFlags = PPCII::MO_LO;
2090 
2091   // Don't use the pic base if not in PIC relocation model.
2092   if (IsPIC) {
2093     HiOpFlags |= PPCII::MO_PIC_FLAG;
2094     LoOpFlags |= PPCII::MO_PIC_FLAG;
2095   }
2096 
2097   // If this is a reference to a global value that requires a non-lazy-ptr, make
2098   // sure that instruction lowering adds it.
2099   if (GV && Subtarget.hasLazyResolverStub(GV)) {
2100     HiOpFlags |= PPCII::MO_NLP_FLAG;
2101     LoOpFlags |= PPCII::MO_NLP_FLAG;
2102 
2103     if (GV->hasHiddenVisibility()) {
2104       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2105       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2106     }
2107   }
2108 }
2109 
2110 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2111                              SelectionDAG &DAG) {
2112   SDLoc DL(HiPart);
2113   EVT PtrVT = HiPart.getValueType();
2114   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2115 
2116   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2117   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2118 
2119   // With PIC, the first instruction is actually "GR+hi(&G)".
2120   if (isPIC)
2121     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2122                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2123 
2124   // Generate non-pic code that has direct accesses to the constant pool.
2125   // The address of the global is just (hi(&g)+lo(&g)).
2126   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2127 }
2128 
2129 static void setUsesTOCBasePtr(MachineFunction &MF) {
2130   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2131   FuncInfo->setUsesTOCBasePtr();
2132 }
2133 
2134 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2135   setUsesTOCBasePtr(DAG.getMachineFunction());
2136 }
2137 
2138 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2139                            SDValue GA) {
2140   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2141   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2142                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2143 
2144   SDValue Ops[] = { GA, Reg };
2145   return DAG.getMemIntrinsicNode(
2146       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2147       MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
2148       false, 0);
2149 }
2150 
2151 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2152                                              SelectionDAG &DAG) const {
2153   EVT PtrVT = Op.getValueType();
2154   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2155   const Constant *C = CP->getConstVal();
2156 
2157   // 64-bit SVR4 ABI code is always position-independent.
2158   // The actual address of the GlobalValue is stored in the TOC.
2159   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2160     setUsesTOCBasePtr(DAG);
2161     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2162     return getTOCEntry(DAG, SDLoc(CP), true, GA);
2163   }
2164 
2165   unsigned MOHiFlag, MOLoFlag;
2166   bool IsPIC = isPositionIndependent();
2167   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2168 
2169   if (IsPIC && Subtarget.isSVR4ABI()) {
2170     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2171                                            PPCII::MO_PIC_FLAG);
2172     return getTOCEntry(DAG, SDLoc(CP), false, GA);
2173   }
2174 
2175   SDValue CPIHi =
2176     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2177   SDValue CPILo =
2178     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2179   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2180 }
2181 
2182 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2183   EVT PtrVT = Op.getValueType();
2184   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2185 
2186   // 64-bit SVR4 ABI code is always position-independent.
2187   // The actual address of the GlobalValue is stored in the TOC.
2188   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2189     setUsesTOCBasePtr(DAG);
2190     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2191     return getTOCEntry(DAG, SDLoc(JT), true, GA);
2192   }
2193 
2194   unsigned MOHiFlag, MOLoFlag;
2195   bool IsPIC = isPositionIndependent();
2196   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2197 
2198   if (IsPIC && Subtarget.isSVR4ABI()) {
2199     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2200                                         PPCII::MO_PIC_FLAG);
2201     return getTOCEntry(DAG, SDLoc(GA), false, GA);
2202   }
2203 
2204   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2205   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2206   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2207 }
2208 
2209 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2210                                              SelectionDAG &DAG) const {
2211   EVT PtrVT = Op.getValueType();
2212   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2213   const BlockAddress *BA = BASDN->getBlockAddress();
2214 
2215   // 64-bit SVR4 ABI code is always position-independent.
2216   // The actual BlockAddress is stored in the TOC.
2217   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2218     setUsesTOCBasePtr(DAG);
2219     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2220     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
2221   }
2222 
2223   unsigned MOHiFlag, MOLoFlag;
2224   bool IsPIC = isPositionIndependent();
2225   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2226   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2227   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2228   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2229 }
2230 
2231 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2232                                               SelectionDAG &DAG) const {
2233 
2234   // FIXME: TLS addresses currently use medium model code sequences,
2235   // which is the most useful form.  Eventually support for small and
2236   // large models could be added if users need it, at the cost of
2237   // additional complexity.
2238   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2239   if (DAG.getTarget().Options.EmulatedTLS)
2240     return LowerToTLSEmulatedModel(GA, DAG);
2241 
2242   SDLoc dl(GA);
2243   const GlobalValue *GV = GA->getGlobal();
2244   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2245   bool is64bit = Subtarget.isPPC64();
2246   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2247   PICLevel::Level picLevel = M->getPICLevel();
2248 
2249   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
2250 
2251   if (Model == TLSModel::LocalExec) {
2252     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2253                                                PPCII::MO_TPREL_HA);
2254     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2255                                                PPCII::MO_TPREL_LO);
2256     SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
2257                                      is64bit ? MVT::i64 : MVT::i32);
2258     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2259     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2260   }
2261 
2262   if (Model == TLSModel::InitialExec) {
2263     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2264     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2265                                                 PPCII::MO_TLS);
2266     SDValue GOTPtr;
2267     if (is64bit) {
2268       setUsesTOCBasePtr(DAG);
2269       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2270       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2271                            PtrVT, GOTReg, TGA);
2272     } else
2273       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2274     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2275                                    PtrVT, TGA, GOTPtr);
2276     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2277   }
2278 
2279   if (Model == TLSModel::GeneralDynamic) {
2280     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2281     SDValue GOTPtr;
2282     if (is64bit) {
2283       setUsesTOCBasePtr(DAG);
2284       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2285       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2286                                    GOTReg, TGA);
2287     } else {
2288       if (picLevel == PICLevel::SmallPIC)
2289         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2290       else
2291         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2292     }
2293     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2294                        GOTPtr, TGA, TGA);
2295   }
2296 
2297   if (Model == TLSModel::LocalDynamic) {
2298     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2299     SDValue GOTPtr;
2300     if (is64bit) {
2301       setUsesTOCBasePtr(DAG);
2302       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2303       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2304                            GOTReg, TGA);
2305     } else {
2306       if (picLevel == PICLevel::SmallPIC)
2307         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2308       else
2309         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2310     }
2311     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2312                                   PtrVT, GOTPtr, TGA, TGA);
2313     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2314                                       PtrVT, TLSAddr, TGA);
2315     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2316   }
2317 
2318   llvm_unreachable("Unknown TLS model!");
2319 }
2320 
2321 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2322                                               SelectionDAG &DAG) const {
2323   EVT PtrVT = Op.getValueType();
2324   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2325   SDLoc DL(GSDN);
2326   const GlobalValue *GV = GSDN->getGlobal();
2327 
2328   // 64-bit SVR4 ABI code is always position-independent.
2329   // The actual address of the GlobalValue is stored in the TOC.
2330   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2331     setUsesTOCBasePtr(DAG);
2332     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2333     return getTOCEntry(DAG, DL, true, GA);
2334   }
2335 
2336   unsigned MOHiFlag, MOLoFlag;
2337   bool IsPIC = isPositionIndependent();
2338   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2339 
2340   if (IsPIC && Subtarget.isSVR4ABI()) {
2341     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2342                                             GSDN->getOffset(),
2343                                             PPCII::MO_PIC_FLAG);
2344     return getTOCEntry(DAG, DL, false, GA);
2345   }
2346 
2347   SDValue GAHi =
2348     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2349   SDValue GALo =
2350     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2351 
2352   SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2353 
2354   // If the global reference is actually to a non-lazy-pointer, we have to do an
2355   // extra load to get the address of the global.
2356   if (MOHiFlag & PPCII::MO_NLP_FLAG)
2357     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2358   return Ptr;
2359 }
2360 
2361 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2362   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2363   SDLoc dl(Op);
2364 
2365   if (Op.getValueType() == MVT::v2i64) {
2366     // When the operands themselves are v2i64 values, we need to do something
2367     // special because VSX has no underlying comparison operations for these.
2368     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2369       // Equality can be handled by casting to the legal type for Altivec
2370       // comparisons, everything else needs to be expanded.
2371       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2372         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2373                  DAG.getSetCC(dl, MVT::v4i32,
2374                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2375                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2376                    CC));
2377       }
2378 
2379       return SDValue();
2380     }
2381 
2382     // We handle most of these in the usual way.
2383     return Op;
2384   }
2385 
2386   // If we're comparing for equality to zero, expose the fact that this is
2387   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2388   // fold the new nodes.
2389   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2390     return V;
2391 
2392   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2393     // Leave comparisons against 0 and -1 alone for now, since they're usually
2394     // optimized.  FIXME: revisit this when we can custom lower all setcc
2395     // optimizations.
2396     if (C->isAllOnesValue() || C->isNullValue())
2397       return SDValue();
2398   }
2399 
2400   // If we have an integer seteq/setne, turn it into a compare against zero
2401   // by xor'ing the rhs with the lhs, which is faster than setting a
2402   // condition register, reading it back out, and masking the correct bit.  The
2403   // normal approach here uses sub to do this instead of xor.  Using xor exposes
2404   // the result to other bit-twiddling opportunities.
2405   EVT LHSVT = Op.getOperand(0).getValueType();
2406   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2407     EVT VT = Op.getValueType();
2408     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2409                                 Op.getOperand(1));
2410     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2411   }
2412   return SDValue();
2413 }
2414 
2415 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2416   SDNode *Node = Op.getNode();
2417   EVT VT = Node->getValueType(0);
2418   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2419   SDValue InChain = Node->getOperand(0);
2420   SDValue VAListPtr = Node->getOperand(1);
2421   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2422   SDLoc dl(Node);
2423 
2424   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2425 
2426   // gpr_index
2427   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2428                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
2429   InChain = GprIndex.getValue(1);
2430 
2431   if (VT == MVT::i64) {
2432     // Check if GprIndex is even
2433     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2434                                  DAG.getConstant(1, dl, MVT::i32));
2435     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2436                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2437     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2438                                           DAG.getConstant(1, dl, MVT::i32));
2439     // Align GprIndex to be even if it isn't
2440     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2441                            GprIndex);
2442   }
2443 
2444   // fpr index is 1 byte after gpr
2445   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2446                                DAG.getConstant(1, dl, MVT::i32));
2447 
2448   // fpr
2449   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2450                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
2451   InChain = FprIndex.getValue(1);
2452 
2453   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2454                                        DAG.getConstant(8, dl, MVT::i32));
2455 
2456   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2457                                         DAG.getConstant(4, dl, MVT::i32));
2458 
2459   // areas
2460   SDValue OverflowArea =
2461       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
2462   InChain = OverflowArea.getValue(1);
2463 
2464   SDValue RegSaveArea =
2465       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
2466   InChain = RegSaveArea.getValue(1);
2467 
2468   // select overflow_area if index > 8
2469   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2470                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2471 
2472   // adjustment constant gpr_index * 4/8
2473   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2474                                     VT.isInteger() ? GprIndex : FprIndex,
2475                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2476                                                     MVT::i32));
2477 
2478   // OurReg = RegSaveArea + RegConstant
2479   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2480                                RegConstant);
2481 
2482   // Floating types are 32 bytes into RegSaveArea
2483   if (VT.isFloatingPoint())
2484     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2485                          DAG.getConstant(32, dl, MVT::i32));
2486 
2487   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2488   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2489                                    VT.isInteger() ? GprIndex : FprIndex,
2490                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2491                                                    MVT::i32));
2492 
2493   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2494                               VT.isInteger() ? VAListPtr : FprPtr,
2495                               MachinePointerInfo(SV), MVT::i8);
2496 
2497   // determine if we should load from reg_save_area or overflow_area
2498   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2499 
2500   // increase overflow_area by 4/8 if gpr/fpr > 8
2501   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2502                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
2503                                           dl, MVT::i32));
2504 
2505   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2506                              OverflowAreaPlusN);
2507 
2508   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
2509                               MachinePointerInfo(), MVT::i32);
2510 
2511   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
2512 }
2513 
2514 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
2515   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2516 
2517   // We have to copy the entire va_list struct:
2518   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2519   return DAG.getMemcpy(Op.getOperand(0), Op,
2520                        Op.getOperand(1), Op.getOperand(2),
2521                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2522                        false, MachinePointerInfo(), MachinePointerInfo());
2523 }
2524 
2525 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2526                                                   SelectionDAG &DAG) const {
2527   return Op.getOperand(0);
2528 }
2529 
2530 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2531                                                 SelectionDAG &DAG) const {
2532   SDValue Chain = Op.getOperand(0);
2533   SDValue Trmp = Op.getOperand(1); // trampoline
2534   SDValue FPtr = Op.getOperand(2); // nested function
2535   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2536   SDLoc dl(Op);
2537 
2538   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2539   bool isPPC64 = (PtrVT == MVT::i64);
2540   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
2541 
2542   TargetLowering::ArgListTy Args;
2543   TargetLowering::ArgListEntry Entry;
2544 
2545   Entry.Ty = IntPtrTy;
2546   Entry.Node = Trmp; Args.push_back(Entry);
2547 
2548   // TrampSize == (isPPC64 ? 48 : 40);
2549   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
2550                                isPPC64 ? MVT::i64 : MVT::i32);
2551   Args.push_back(Entry);
2552 
2553   Entry.Node = FPtr; Args.push_back(Entry);
2554   Entry.Node = Nest; Args.push_back(Entry);
2555 
2556   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2557   TargetLowering::CallLoweringInfo CLI(DAG);
2558   CLI.setDebugLoc(dl).setChain(Chain)
2559     .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2560                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
2561                std::move(Args));
2562 
2563   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2564   return CallResult.second;
2565 }
2566 
2567 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2568   MachineFunction &MF = DAG.getMachineFunction();
2569   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2570   EVT PtrVT = getPointerTy(MF.getDataLayout());
2571 
2572   SDLoc dl(Op);
2573 
2574   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2575     // vastart just stores the address of the VarArgsFrameIndex slot into the
2576     // memory location argument.
2577     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2578     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2579     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2580                         MachinePointerInfo(SV));
2581   }
2582 
2583   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2584   // We suppose the given va_list is already allocated.
2585   //
2586   // typedef struct {
2587   //  char gpr;     /* index into the array of 8 GPRs
2588   //                 * stored in the register save area
2589   //                 * gpr=0 corresponds to r3,
2590   //                 * gpr=1 to r4, etc.
2591   //                 */
2592   //  char fpr;     /* index into the array of 8 FPRs
2593   //                 * stored in the register save area
2594   //                 * fpr=0 corresponds to f1,
2595   //                 * fpr=1 to f2, etc.
2596   //                 */
2597   //  char *overflow_arg_area;
2598   //                /* location on stack that holds
2599   //                 * the next overflow argument
2600   //                 */
2601   //  char *reg_save_area;
2602   //               /* where r3:r10 and f1:f8 (if saved)
2603   //                * are stored
2604   //                */
2605   // } va_list[1];
2606 
2607   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2608   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2609   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2610                                             PtrVT);
2611   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2612                                  PtrVT);
2613 
2614   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2615   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2616 
2617   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2618   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2619 
2620   uint64_t FPROffset = 1;
2621   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2622 
2623   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2624 
2625   // Store first byte : number of int regs
2626   SDValue firstStore =
2627       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
2628                         MachinePointerInfo(SV), MVT::i8);
2629   uint64_t nextOffset = FPROffset;
2630   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2631                                   ConstFPROffset);
2632 
2633   // Store second byte : number of float regs
2634   SDValue secondStore =
2635       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2636                         MachinePointerInfo(SV, nextOffset), MVT::i8);
2637   nextOffset += StackOffset;
2638   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2639 
2640   // Store second word : arguments given on stack
2641   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2642                                     MachinePointerInfo(SV, nextOffset));
2643   nextOffset += FrameOffset;
2644   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2645 
2646   // Store third word : arguments given in registers
2647   return DAG.getStore(thirdStore, dl, FR, nextPtr,
2648                       MachinePointerInfo(SV, nextOffset));
2649 }
2650 
2651 #include "PPCGenCallingConv.inc"
2652 
2653 // Function whose sole purpose is to kill compiler warnings
2654 // stemming from unused functions included from PPCGenCallingConv.inc.
2655 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2656   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2657 }
2658 
2659 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2660                                       CCValAssign::LocInfo &LocInfo,
2661                                       ISD::ArgFlagsTy &ArgFlags,
2662                                       CCState &State) {
2663   return true;
2664 }
2665 
2666 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2667                                              MVT &LocVT,
2668                                              CCValAssign::LocInfo &LocInfo,
2669                                              ISD::ArgFlagsTy &ArgFlags,
2670                                              CCState &State) {
2671   static const MCPhysReg ArgRegs[] = {
2672     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2673     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2674   };
2675   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2676 
2677   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2678 
2679   // Skip one register if the first unallocated register has an even register
2680   // number and there are still argument registers available which have not been
2681   // allocated yet. RegNum is actually an index into ArgRegs, which means we
2682   // need to skip a register if RegNum is odd.
2683   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2684     State.AllocateReg(ArgRegs[RegNum]);
2685   }
2686 
2687   // Always return false here, as this function only makes sure that the first
2688   // unallocated register has an odd register number and does not actually
2689   // allocate a register for the current argument.
2690   return false;
2691 }
2692 
2693 bool
2694 llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
2695                                                   MVT &LocVT,
2696                                                   CCValAssign::LocInfo &LocInfo,
2697                                                   ISD::ArgFlagsTy &ArgFlags,
2698                                                   CCState &State) {
2699   static const MCPhysReg ArgRegs[] = {
2700     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2701     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2702   };
2703   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2704 
2705   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2706   int RegsLeft = NumArgRegs - RegNum;
2707 
2708   // Skip if there is not enough registers left for long double type (4 gpr regs
2709   // in soft float mode) and put long double argument on the stack.
2710   if (RegNum != NumArgRegs && RegsLeft < 4) {
2711     for (int i = 0; i < RegsLeft; i++) {
2712       State.AllocateReg(ArgRegs[RegNum + i]);
2713     }
2714   }
2715 
2716   return false;
2717 }
2718 
2719 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
2720                                                MVT &LocVT,
2721                                                CCValAssign::LocInfo &LocInfo,
2722                                                ISD::ArgFlagsTy &ArgFlags,
2723                                                CCState &State) {
2724   static const MCPhysReg ArgRegs[] = {
2725     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2726     PPC::F8
2727   };
2728 
2729   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2730 
2731   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2732 
2733   // If there is only one Floating-point register left we need to put both f64
2734   // values of a split ppc_fp128 value on the stack.
2735   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
2736     State.AllocateReg(ArgRegs[RegNum]);
2737   }
2738 
2739   // Always return false here, as this function only makes sure that the two f64
2740   // values a ppc_fp128 value is split into are both passed in registers or both
2741   // passed on the stack and does not actually allocate a register for the
2742   // current argument.
2743   return false;
2744 }
2745 
2746 /// FPR - The set of FP registers that should be allocated for arguments,
2747 /// on Darwin.
2748 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
2749                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
2750                                 PPC::F11, PPC::F12, PPC::F13};
2751 
2752 /// QFPR - The set of QPX registers that should be allocated for arguments.
2753 static const MCPhysReg QFPR[] = {
2754     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
2755     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
2756 
2757 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
2758 /// the stack.
2759 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
2760                                        unsigned PtrByteSize) {
2761   unsigned ArgSize = ArgVT.getStoreSize();
2762   if (Flags.isByVal())
2763     ArgSize = Flags.getByValSize();
2764 
2765   // Round up to multiples of the pointer size, except for array members,
2766   // which are always packed.
2767   if (!Flags.isInConsecutiveRegs())
2768     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2769 
2770   return ArgSize;
2771 }
2772 
2773 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
2774 /// on the stack.
2775 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
2776                                             ISD::ArgFlagsTy Flags,
2777                                             unsigned PtrByteSize) {
2778   unsigned Align = PtrByteSize;
2779 
2780   // Altivec parameters are padded to a 16 byte boundary.
2781   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2782       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2783       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2784       ArgVT == MVT::v1i128)
2785     Align = 16;
2786   // QPX vector types stored in double-precision are padded to a 32 byte
2787   // boundary.
2788   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
2789     Align = 32;
2790 
2791   // ByVal parameters are aligned as requested.
2792   if (Flags.isByVal()) {
2793     unsigned BVAlign = Flags.getByValAlign();
2794     if (BVAlign > PtrByteSize) {
2795       if (BVAlign % PtrByteSize != 0)
2796           llvm_unreachable(
2797             "ByVal alignment is not a multiple of the pointer size");
2798 
2799       Align = BVAlign;
2800     }
2801   }
2802 
2803   // Array members are always packed to their original alignment.
2804   if (Flags.isInConsecutiveRegs()) {
2805     // If the array member was split into multiple registers, the first
2806     // needs to be aligned to the size of the full type.  (Except for
2807     // ppcf128, which is only aligned as its f64 components.)
2808     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
2809       Align = OrigVT.getStoreSize();
2810     else
2811       Align = ArgVT.getStoreSize();
2812   }
2813 
2814   return Align;
2815 }
2816 
2817 /// CalculateStackSlotUsed - Return whether this argument will use its
2818 /// stack slot (instead of being passed in registers).  ArgOffset,
2819 /// AvailableFPRs, and AvailableVRs must hold the current argument
2820 /// position, and will be updated to account for this argument.
2821 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
2822                                    ISD::ArgFlagsTy Flags,
2823                                    unsigned PtrByteSize,
2824                                    unsigned LinkageSize,
2825                                    unsigned ParamAreaSize,
2826                                    unsigned &ArgOffset,
2827                                    unsigned &AvailableFPRs,
2828                                    unsigned &AvailableVRs, bool HasQPX) {
2829   bool UseMemory = false;
2830 
2831   // Respect alignment of argument on the stack.
2832   unsigned Align =
2833     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
2834   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2835   // If there's no space left in the argument save area, we must
2836   // use memory (this check also catches zero-sized arguments).
2837   if (ArgOffset >= LinkageSize + ParamAreaSize)
2838     UseMemory = true;
2839 
2840   // Allocate argument on the stack.
2841   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2842   if (Flags.isInConsecutiveRegsLast())
2843     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2844   // If we overran the argument save area, we must use memory
2845   // (this check catches arguments passed partially in memory)
2846   if (ArgOffset > LinkageSize + ParamAreaSize)
2847     UseMemory = true;
2848 
2849   // However, if the argument is actually passed in an FPR or a VR,
2850   // we don't use memory after all.
2851   if (!Flags.isByVal()) {
2852     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
2853         // QPX registers overlap with the scalar FP registers.
2854         (HasQPX && (ArgVT == MVT::v4f32 ||
2855                     ArgVT == MVT::v4f64 ||
2856                     ArgVT == MVT::v4i1)))
2857       if (AvailableFPRs > 0) {
2858         --AvailableFPRs;
2859         return false;
2860       }
2861     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2862         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2863         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2864         ArgVT == MVT::v1i128)
2865       if (AvailableVRs > 0) {
2866         --AvailableVRs;
2867         return false;
2868       }
2869   }
2870 
2871   return UseMemory;
2872 }
2873 
2874 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
2875 /// ensure minimum alignment required for target.
2876 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
2877                                      unsigned NumBytes) {
2878   unsigned TargetAlign = Lowering->getStackAlignment();
2879   unsigned AlignMask = TargetAlign - 1;
2880   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2881   return NumBytes;
2882 }
2883 
2884 SDValue PPCTargetLowering::LowerFormalArguments(
2885     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2886     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2887     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2888   if (Subtarget.isSVR4ABI()) {
2889     if (Subtarget.isPPC64())
2890       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
2891                                          dl, DAG, InVals);
2892     else
2893       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
2894                                          dl, DAG, InVals);
2895   } else {
2896     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
2897                                        dl, DAG, InVals);
2898   }
2899 }
2900 
2901 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
2902     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2903     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2904     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2905 
2906   // 32-bit SVR4 ABI Stack Frame Layout:
2907   //              +-----------------------------------+
2908   //        +-->  |            Back chain             |
2909   //        |     +-----------------------------------+
2910   //        |     | Floating-point register save area |
2911   //        |     +-----------------------------------+
2912   //        |     |    General register save area     |
2913   //        |     +-----------------------------------+
2914   //        |     |          CR save word             |
2915   //        |     +-----------------------------------+
2916   //        |     |         VRSAVE save word          |
2917   //        |     +-----------------------------------+
2918   //        |     |         Alignment padding         |
2919   //        |     +-----------------------------------+
2920   //        |     |     Vector register save area     |
2921   //        |     +-----------------------------------+
2922   //        |     |       Local variable space        |
2923   //        |     +-----------------------------------+
2924   //        |     |        Parameter list area        |
2925   //        |     +-----------------------------------+
2926   //        |     |           LR save word            |
2927   //        |     +-----------------------------------+
2928   // SP-->  +---  |            Back chain             |
2929   //              +-----------------------------------+
2930   //
2931   // Specifications:
2932   //   System V Application Binary Interface PowerPC Processor Supplement
2933   //   AltiVec Technology Programming Interface Manual
2934 
2935   MachineFunction &MF = DAG.getMachineFunction();
2936   MachineFrameInfo &MFI = MF.getFrameInfo();
2937   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2938 
2939   EVT PtrVT = getPointerTy(MF.getDataLayout());
2940   // Potential tail calls could cause overwriting of argument stack slots.
2941   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2942                        (CallConv == CallingConv::Fast));
2943   unsigned PtrByteSize = 4;
2944 
2945   // Assign locations to all of the incoming arguments.
2946   SmallVector<CCValAssign, 16> ArgLocs;
2947   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2948                  *DAG.getContext());
2949 
2950   // Reserve space for the linkage area on the stack.
2951   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
2952   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
2953   if (useSoftFloat())
2954     CCInfo.PreAnalyzeFormalArguments(Ins);
2955 
2956   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
2957   CCInfo.clearWasPPCF128();
2958 
2959   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2960     CCValAssign &VA = ArgLocs[i];
2961 
2962     // Arguments stored in registers.
2963     if (VA.isRegLoc()) {
2964       const TargetRegisterClass *RC;
2965       EVT ValVT = VA.getValVT();
2966 
2967       switch (ValVT.getSimpleVT().SimpleTy) {
2968         default:
2969           llvm_unreachable("ValVT not supported by formal arguments Lowering");
2970         case MVT::i1:
2971         case MVT::i32:
2972           RC = &PPC::GPRCRegClass;
2973           break;
2974         case MVT::f32:
2975           if (Subtarget.hasP8Vector())
2976             RC = &PPC::VSSRCRegClass;
2977           else
2978             RC = &PPC::F4RCRegClass;
2979           break;
2980         case MVT::f64:
2981           if (Subtarget.hasVSX())
2982             RC = &PPC::VSFRCRegClass;
2983           else
2984             RC = &PPC::F8RCRegClass;
2985           break;
2986         case MVT::v16i8:
2987         case MVT::v8i16:
2988         case MVT::v4i32:
2989           RC = &PPC::VRRCRegClass;
2990           break;
2991         case MVT::v4f32:
2992           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
2993           break;
2994         case MVT::v2f64:
2995         case MVT::v2i64:
2996           RC = &PPC::VRRCRegClass;
2997           break;
2998         case MVT::v4f64:
2999           RC = &PPC::QFRCRegClass;
3000           break;
3001         case MVT::v4i1:
3002           RC = &PPC::QBRCRegClass;
3003           break;
3004       }
3005 
3006       // Transform the arguments stored in physical registers into virtual ones.
3007       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3008       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3009                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
3010 
3011       if (ValVT == MVT::i1)
3012         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3013 
3014       InVals.push_back(ArgValue);
3015     } else {
3016       // Argument stored in memory.
3017       assert(VA.isMemLoc());
3018 
3019       unsigned ArgSize = VA.getLocVT().getStoreSize();
3020       int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
3021                                      isImmutable);
3022 
3023       // Create load nodes to retrieve arguments from the stack.
3024       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3025       InVals.push_back(
3026           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3027     }
3028   }
3029 
3030   // Assign locations to all of the incoming aggregate by value arguments.
3031   // Aggregates passed by value are stored in the local variable space of the
3032   // caller's stack frame, right above the parameter list area.
3033   SmallVector<CCValAssign, 16> ByValArgLocs;
3034   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3035                       ByValArgLocs, *DAG.getContext());
3036 
3037   // Reserve stack space for the allocations in CCInfo.
3038   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3039 
3040   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3041 
3042   // Area that is at least reserved in the caller of this function.
3043   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3044   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3045 
3046   // Set the size that is at least reserved in caller of this function.  Tail
3047   // call optimized function's reserved stack space needs to be aligned so that
3048   // taking the difference between two stack areas will result in an aligned
3049   // stack.
3050   MinReservedArea =
3051       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3052   FuncInfo->setMinReservedArea(MinReservedArea);
3053 
3054   SmallVector<SDValue, 8> MemOps;
3055 
3056   // If the function takes variable number of arguments, make a frame index for
3057   // the start of the first vararg value... for expansion of llvm.va_start.
3058   if (isVarArg) {
3059     static const MCPhysReg GPArgRegs[] = {
3060       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3061       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3062     };
3063     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3064 
3065     static const MCPhysReg FPArgRegs[] = {
3066       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3067       PPC::F8
3068     };
3069     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3070 
3071     if (useSoftFloat())
3072        NumFPArgRegs = 0;
3073 
3074     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3075     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3076 
3077     // Make room for NumGPArgRegs and NumFPArgRegs.
3078     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3079                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3080 
3081     FuncInfo->setVarArgsStackOffset(
3082       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3083                             CCInfo.getNextStackOffset(), true));
3084 
3085     FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3086     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3087 
3088     // The fixed integer arguments of a variadic function are stored to the
3089     // VarArgsFrameIndex on the stack so that they may be loaded by
3090     // dereferencing the result of va_next.
3091     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3092       // Get an existing live-in vreg, or add a new one.
3093       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3094       if (!VReg)
3095         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3096 
3097       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3098       SDValue Store =
3099           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3100       MemOps.push_back(Store);
3101       // Increment the address by four for the next argument to store
3102       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3103       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3104     }
3105 
3106     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3107     // is set.
3108     // The double arguments are stored to the VarArgsFrameIndex
3109     // on the stack.
3110     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3111       // Get an existing live-in vreg, or add a new one.
3112       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3113       if (!VReg)
3114         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3115 
3116       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3117       SDValue Store =
3118           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3119       MemOps.push_back(Store);
3120       // Increment the address by eight for the next argument to store
3121       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3122                                          PtrVT);
3123       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3124     }
3125   }
3126 
3127   if (!MemOps.empty())
3128     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3129 
3130   return Chain;
3131 }
3132 
3133 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3134 // value to MVT::i64 and then truncate to the correct register size.
3135 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3136                                              EVT ObjectVT, SelectionDAG &DAG,
3137                                              SDValue ArgVal,
3138                                              const SDLoc &dl) const {
3139   if (Flags.isSExt())
3140     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3141                          DAG.getValueType(ObjectVT));
3142   else if (Flags.isZExt())
3143     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3144                          DAG.getValueType(ObjectVT));
3145 
3146   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3147 }
3148 
3149 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3150     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3151     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3152     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3153   // TODO: add description of PPC stack frame format, or at least some docs.
3154   //
3155   bool isELFv2ABI = Subtarget.isELFv2ABI();
3156   bool isLittleEndian = Subtarget.isLittleEndian();
3157   MachineFunction &MF = DAG.getMachineFunction();
3158   MachineFrameInfo &MFI = MF.getFrameInfo();
3159   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3160 
3161   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3162          "fastcc not supported on varargs functions");
3163 
3164   EVT PtrVT = getPointerTy(MF.getDataLayout());
3165   // Potential tail calls could cause overwriting of argument stack slots.
3166   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3167                        (CallConv == CallingConv::Fast));
3168   unsigned PtrByteSize = 8;
3169   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3170 
3171   static const MCPhysReg GPR[] = {
3172     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3173     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3174   };
3175   static const MCPhysReg VR[] = {
3176     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3177     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3178   };
3179 
3180   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3181   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3182   const unsigned Num_VR_Regs  = array_lengthof(VR);
3183   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3184 
3185   // Do a first pass over the arguments to determine whether the ABI
3186   // guarantees that our caller has allocated the parameter save area
3187   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3188   // in the ELFv2 ABI, it is true if this is a vararg function or if
3189   // any parameter is located in a stack slot.
3190 
3191   bool HasParameterArea = !isELFv2ABI || isVarArg;
3192   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3193   unsigned NumBytes = LinkageSize;
3194   unsigned AvailableFPRs = Num_FPR_Regs;
3195   unsigned AvailableVRs = Num_VR_Regs;
3196   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3197     if (Ins[i].Flags.isNest())
3198       continue;
3199 
3200     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3201                                PtrByteSize, LinkageSize, ParamAreaSize,
3202                                NumBytes, AvailableFPRs, AvailableVRs,
3203                                Subtarget.hasQPX()))
3204       HasParameterArea = true;
3205   }
3206 
3207   // Add DAG nodes to load the arguments or copy them out of registers.  On
3208   // entry to a function on PPC, the arguments start after the linkage area,
3209   // although the first ones are often in registers.
3210 
3211   unsigned ArgOffset = LinkageSize;
3212   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3213   unsigned &QFPR_idx = FPR_idx;
3214   SmallVector<SDValue, 8> MemOps;
3215   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3216   unsigned CurArgIdx = 0;
3217   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3218     SDValue ArgVal;
3219     bool needsLoad = false;
3220     EVT ObjectVT = Ins[ArgNo].VT;
3221     EVT OrigVT = Ins[ArgNo].ArgVT;
3222     unsigned ObjSize = ObjectVT.getStoreSize();
3223     unsigned ArgSize = ObjSize;
3224     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3225     if (Ins[ArgNo].isOrigArg()) {
3226       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3227       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3228     }
3229     // We re-align the argument offset for each argument, except when using the
3230     // fast calling convention, when we need to make sure we do that only when
3231     // we'll actually use a stack slot.
3232     unsigned CurArgOffset, Align;
3233     auto ComputeArgOffset = [&]() {
3234       /* Respect alignment of argument on the stack.  */
3235       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3236       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3237       CurArgOffset = ArgOffset;
3238     };
3239 
3240     if (CallConv != CallingConv::Fast) {
3241       ComputeArgOffset();
3242 
3243       /* Compute GPR index associated with argument offset.  */
3244       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3245       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3246     }
3247 
3248     // FIXME the codegen can be much improved in some cases.
3249     // We do not have to keep everything in memory.
3250     if (Flags.isByVal()) {
3251       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3252 
3253       if (CallConv == CallingConv::Fast)
3254         ComputeArgOffset();
3255 
3256       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3257       ObjSize = Flags.getByValSize();
3258       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3259       // Empty aggregate parameters do not take up registers.  Examples:
3260       //   struct { } a;
3261       //   union  { } b;
3262       //   int c[0];
3263       // etc.  However, we have to provide a place-holder in InVals, so
3264       // pretend we have an 8-byte item at the current address for that
3265       // purpose.
3266       if (!ObjSize) {
3267         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3268         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3269         InVals.push_back(FIN);
3270         continue;
3271       }
3272 
3273       // Create a stack object covering all stack doublewords occupied
3274       // by the argument.  If the argument is (fully or partially) on
3275       // the stack, or if the argument is fully in registers but the
3276       // caller has allocated the parameter save anyway, we can refer
3277       // directly to the caller's stack frame.  Otherwise, create a
3278       // local copy in our own frame.
3279       int FI;
3280       if (HasParameterArea ||
3281           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3282         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3283       else
3284         FI = MFI.CreateStackObject(ArgSize, Align, false);
3285       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3286 
3287       // Handle aggregates smaller than 8 bytes.
3288       if (ObjSize < PtrByteSize) {
3289         // The value of the object is its address, which differs from the
3290         // address of the enclosing doubleword on big-endian systems.
3291         SDValue Arg = FIN;
3292         if (!isLittleEndian) {
3293           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3294           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3295         }
3296         InVals.push_back(Arg);
3297 
3298         if (GPR_idx != Num_GPR_Regs) {
3299           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3300           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3301           SDValue Store;
3302 
3303           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3304             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3305                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
3306             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3307                                       MachinePointerInfo(&*FuncArg), ObjType);
3308           } else {
3309             // For sizes that don't fit a truncating store (3, 5, 6, 7),
3310             // store the whole register as-is to the parameter save area
3311             // slot.
3312             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3313                                  MachinePointerInfo(&*FuncArg));
3314           }
3315 
3316           MemOps.push_back(Store);
3317         }
3318         // Whether we copied from a register or not, advance the offset
3319         // into the parameter save area by a full doubleword.
3320         ArgOffset += PtrByteSize;
3321         continue;
3322       }
3323 
3324       // The value of the object is its address, which is the address of
3325       // its first stack doubleword.
3326       InVals.push_back(FIN);
3327 
3328       // Store whatever pieces of the object are in registers to memory.
3329       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3330         if (GPR_idx == Num_GPR_Regs)
3331           break;
3332 
3333         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3334         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3335         SDValue Addr = FIN;
3336         if (j) {
3337           SDValue Off = DAG.getConstant(j, dl, PtrVT);
3338           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3339         }
3340         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3341                                      MachinePointerInfo(&*FuncArg, j));
3342         MemOps.push_back(Store);
3343         ++GPR_idx;
3344       }
3345       ArgOffset += ArgSize;
3346       continue;
3347     }
3348 
3349     switch (ObjectVT.getSimpleVT().SimpleTy) {
3350     default: llvm_unreachable("Unhandled argument type!");
3351     case MVT::i1:
3352     case MVT::i32:
3353     case MVT::i64:
3354       if (Flags.isNest()) {
3355         // The 'nest' parameter, if any, is passed in R11.
3356         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3357         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3358 
3359         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3360           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3361 
3362         break;
3363       }
3364 
3365       // These can be scalar arguments or elements of an integer array type
3366       // passed directly.  Clang may use those instead of "byval" aggregate
3367       // types to avoid forcing arguments to memory unnecessarily.
3368       if (GPR_idx != Num_GPR_Regs) {
3369         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3370         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3371 
3372         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3373           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3374           // value to MVT::i64 and then truncate to the correct register size.
3375           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3376       } else {
3377         if (CallConv == CallingConv::Fast)
3378           ComputeArgOffset();
3379 
3380         needsLoad = true;
3381         ArgSize = PtrByteSize;
3382       }
3383       if (CallConv != CallingConv::Fast || needsLoad)
3384         ArgOffset += 8;
3385       break;
3386 
3387     case MVT::f32:
3388     case MVT::f64:
3389       // These can be scalar arguments or elements of a float array type
3390       // passed directly.  The latter are used to implement ELFv2 homogenous
3391       // float aggregates.
3392       if (FPR_idx != Num_FPR_Regs) {
3393         unsigned VReg;
3394 
3395         if (ObjectVT == MVT::f32)
3396           VReg = MF.addLiveIn(FPR[FPR_idx],
3397                               Subtarget.hasP8Vector()
3398                                   ? &PPC::VSSRCRegClass
3399                                   : &PPC::F4RCRegClass);
3400         else
3401           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3402                                                 ? &PPC::VSFRCRegClass
3403                                                 : &PPC::F8RCRegClass);
3404 
3405         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3406         ++FPR_idx;
3407       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3408         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3409         // once we support fp <-> gpr moves.
3410 
3411         // This can only ever happen in the presence of f32 array types,
3412         // since otherwise we never run out of FPRs before running out
3413         // of GPRs.
3414         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3415         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3416 
3417         if (ObjectVT == MVT::f32) {
3418           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3419             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3420                                  DAG.getConstant(32, dl, MVT::i32));
3421           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3422         }
3423 
3424         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3425       } else {
3426         if (CallConv == CallingConv::Fast)
3427           ComputeArgOffset();
3428 
3429         needsLoad = true;
3430       }
3431 
3432       // When passing an array of floats, the array occupies consecutive
3433       // space in the argument area; only round up to the next doubleword
3434       // at the end of the array.  Otherwise, each float takes 8 bytes.
3435       if (CallConv != CallingConv::Fast || needsLoad) {
3436         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3437         ArgOffset += ArgSize;
3438         if (Flags.isInConsecutiveRegsLast())
3439           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3440       }
3441       break;
3442     case MVT::v4f32:
3443     case MVT::v4i32:
3444     case MVT::v8i16:
3445     case MVT::v16i8:
3446     case MVT::v2f64:
3447     case MVT::v2i64:
3448     case MVT::v1i128:
3449       if (!Subtarget.hasQPX()) {
3450       // These can be scalar arguments or elements of a vector array type
3451       // passed directly.  The latter are used to implement ELFv2 homogenous
3452       // vector aggregates.
3453       if (VR_idx != Num_VR_Regs) {
3454         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3455         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3456         ++VR_idx;
3457       } else {
3458         if (CallConv == CallingConv::Fast)
3459           ComputeArgOffset();
3460 
3461         needsLoad = true;
3462       }
3463       if (CallConv != CallingConv::Fast || needsLoad)
3464         ArgOffset += 16;
3465       break;
3466       } // not QPX
3467 
3468       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3469              "Invalid QPX parameter type");
3470       /* fall through */
3471 
3472     case MVT::v4f64:
3473     case MVT::v4i1:
3474       // QPX vectors are treated like their scalar floating-point subregisters
3475       // (except that they're larger).
3476       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3477       if (QFPR_idx != Num_QFPR_Regs) {
3478         const TargetRegisterClass *RC;
3479         switch (ObjectVT.getSimpleVT().SimpleTy) {
3480         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3481         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3482         default:         RC = &PPC::QBRCRegClass; break;
3483         }
3484 
3485         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3486         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3487         ++QFPR_idx;
3488       } else {
3489         if (CallConv == CallingConv::Fast)
3490           ComputeArgOffset();
3491         needsLoad = true;
3492       }
3493       if (CallConv != CallingConv::Fast || needsLoad)
3494         ArgOffset += Sz;
3495       break;
3496     }
3497 
3498     // We need to load the argument to a virtual register if we determined
3499     // above that we ran out of physical registers of the appropriate type.
3500     if (needsLoad) {
3501       if (ObjSize < ArgSize && !isLittleEndian)
3502         CurArgOffset += ArgSize - ObjSize;
3503       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3504       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3505       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3506     }
3507 
3508     InVals.push_back(ArgVal);
3509   }
3510 
3511   // Area that is at least reserved in the caller of this function.
3512   unsigned MinReservedArea;
3513   if (HasParameterArea)
3514     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3515   else
3516     MinReservedArea = LinkageSize;
3517 
3518   // Set the size that is at least reserved in caller of this function.  Tail
3519   // call optimized functions' reserved stack space needs to be aligned so that
3520   // taking the difference between two stack areas will result in an aligned
3521   // stack.
3522   MinReservedArea =
3523       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3524   FuncInfo->setMinReservedArea(MinReservedArea);
3525 
3526   // If the function takes variable number of arguments, make a frame index for
3527   // the start of the first vararg value... for expansion of llvm.va_start.
3528   if (isVarArg) {
3529     int Depth = ArgOffset;
3530 
3531     FuncInfo->setVarArgsFrameIndex(
3532       MFI.CreateFixedObject(PtrByteSize, Depth, true));
3533     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3534 
3535     // If this function is vararg, store any remaining integer argument regs
3536     // to their spots on the stack so that they may be loaded by dereferencing
3537     // the result of va_next.
3538     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3539          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3540       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3541       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3542       SDValue Store =
3543           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3544       MemOps.push_back(Store);
3545       // Increment the address by four for the next argument to store
3546       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3547       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3548     }
3549   }
3550 
3551   if (!MemOps.empty())
3552     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3553 
3554   return Chain;
3555 }
3556 
3557 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
3558     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3559     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3560     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3561   // TODO: add description of PPC stack frame format, or at least some docs.
3562   //
3563   MachineFunction &MF = DAG.getMachineFunction();
3564   MachineFrameInfo &MFI = MF.getFrameInfo();
3565   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3566 
3567   EVT PtrVT = getPointerTy(MF.getDataLayout());
3568   bool isPPC64 = PtrVT == MVT::i64;
3569   // Potential tail calls could cause overwriting of argument stack slots.
3570   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3571                        (CallConv == CallingConv::Fast));
3572   unsigned PtrByteSize = isPPC64 ? 8 : 4;
3573   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3574   unsigned ArgOffset = LinkageSize;
3575   // Area that is at least reserved in caller of this function.
3576   unsigned MinReservedArea = ArgOffset;
3577 
3578   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
3579     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3580     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3581   };
3582   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
3583     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3584     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3585   };
3586   static const MCPhysReg VR[] = {
3587     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3588     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3589   };
3590 
3591   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3592   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3593   const unsigned Num_VR_Regs  = array_lengthof( VR);
3594 
3595   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3596 
3597   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3598 
3599   // In 32-bit non-varargs functions, the stack space for vectors is after the
3600   // stack space for non-vectors.  We do not use this space unless we have
3601   // too many vectors to fit in registers, something that only occurs in
3602   // constructed examples:), but we have to walk the arglist to figure
3603   // that out...for the pathological case, compute VecArgOffset as the
3604   // start of the vector parameter area.  Computing VecArgOffset is the
3605   // entire point of the following loop.
3606   unsigned VecArgOffset = ArgOffset;
3607   if (!isVarArg && !isPPC64) {
3608     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3609          ++ArgNo) {
3610       EVT ObjectVT = Ins[ArgNo].VT;
3611       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3612 
3613       if (Flags.isByVal()) {
3614         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3615         unsigned ObjSize = Flags.getByValSize();
3616         unsigned ArgSize =
3617                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3618         VecArgOffset += ArgSize;
3619         continue;
3620       }
3621 
3622       switch(ObjectVT.getSimpleVT().SimpleTy) {
3623       default: llvm_unreachable("Unhandled argument type!");
3624       case MVT::i1:
3625       case MVT::i32:
3626       case MVT::f32:
3627         VecArgOffset += 4;
3628         break;
3629       case MVT::i64:  // PPC64
3630       case MVT::f64:
3631         // FIXME: We are guaranteed to be !isPPC64 at this point.
3632         // Does MVT::i64 apply?
3633         VecArgOffset += 8;
3634         break;
3635       case MVT::v4f32:
3636       case MVT::v4i32:
3637       case MVT::v8i16:
3638       case MVT::v16i8:
3639         // Nothing to do, we're only looking at Nonvector args here.
3640         break;
3641       }
3642     }
3643   }
3644   // We've found where the vector parameter area in memory is.  Skip the
3645   // first 12 parameters; these don't use that memory.
3646   VecArgOffset = ((VecArgOffset+15)/16)*16;
3647   VecArgOffset += 12*16;
3648 
3649   // Add DAG nodes to load the arguments or copy them out of registers.  On
3650   // entry to a function on PPC, the arguments start after the linkage area,
3651   // although the first ones are often in registers.
3652 
3653   SmallVector<SDValue, 8> MemOps;
3654   unsigned nAltivecParamsAtEnd = 0;
3655   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3656   unsigned CurArgIdx = 0;
3657   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3658     SDValue ArgVal;
3659     bool needsLoad = false;
3660     EVT ObjectVT = Ins[ArgNo].VT;
3661     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3662     unsigned ArgSize = ObjSize;
3663     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3664     if (Ins[ArgNo].isOrigArg()) {
3665       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3666       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3667     }
3668     unsigned CurArgOffset = ArgOffset;
3669 
3670     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3671     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3672         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3673       if (isVarArg || isPPC64) {
3674         MinReservedArea = ((MinReservedArea+15)/16)*16;
3675         MinReservedArea += CalculateStackSlotSize(ObjectVT,
3676                                                   Flags,
3677                                                   PtrByteSize);
3678       } else  nAltivecParamsAtEnd++;
3679     } else
3680       // Calculate min reserved area.
3681       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3682                                                 Flags,
3683                                                 PtrByteSize);
3684 
3685     // FIXME the codegen can be much improved in some cases.
3686     // We do not have to keep everything in memory.
3687     if (Flags.isByVal()) {
3688       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3689 
3690       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3691       ObjSize = Flags.getByValSize();
3692       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3693       // Objects of size 1 and 2 are right justified, everything else is
3694       // left justified.  This means the memory address is adjusted forwards.
3695       if (ObjSize==1 || ObjSize==2) {
3696         CurArgOffset = CurArgOffset + (4 - ObjSize);
3697       }
3698       // The value of the object is its address.
3699       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
3700       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3701       InVals.push_back(FIN);
3702       if (ObjSize==1 || ObjSize==2) {
3703         if (GPR_idx != Num_GPR_Regs) {
3704           unsigned VReg;
3705           if (isPPC64)
3706             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3707           else
3708             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3709           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3710           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
3711           SDValue Store =
3712               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
3713                                 MachinePointerInfo(&*FuncArg), ObjType);
3714           MemOps.push_back(Store);
3715           ++GPR_idx;
3716         }
3717 
3718         ArgOffset += PtrByteSize;
3719 
3720         continue;
3721       }
3722       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3723         // Store whatever pieces of the object are in registers
3724         // to memory.  ArgOffset will be the address of the beginning
3725         // of the object.
3726         if (GPR_idx != Num_GPR_Regs) {
3727           unsigned VReg;
3728           if (isPPC64)
3729             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3730           else
3731             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3732           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3733           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3734           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3735           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3736                                        MachinePointerInfo(&*FuncArg, j));
3737           MemOps.push_back(Store);
3738           ++GPR_idx;
3739           ArgOffset += PtrByteSize;
3740         } else {
3741           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
3742           break;
3743         }
3744       }
3745       continue;
3746     }
3747 
3748     switch (ObjectVT.getSimpleVT().SimpleTy) {
3749     default: llvm_unreachable("Unhandled argument type!");
3750     case MVT::i1:
3751     case MVT::i32:
3752       if (!isPPC64) {
3753         if (GPR_idx != Num_GPR_Regs) {
3754           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3755           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3756 
3757           if (ObjectVT == MVT::i1)
3758             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
3759 
3760           ++GPR_idx;
3761         } else {
3762           needsLoad = true;
3763           ArgSize = PtrByteSize;
3764         }
3765         // All int arguments reserve stack space in the Darwin ABI.
3766         ArgOffset += PtrByteSize;
3767         break;
3768       }
3769       LLVM_FALLTHROUGH;
3770     case MVT::i64:  // PPC64
3771       if (GPR_idx != Num_GPR_Regs) {
3772         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3773         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3774 
3775         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3776           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3777           // value to MVT::i64 and then truncate to the correct register size.
3778           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3779 
3780         ++GPR_idx;
3781       } else {
3782         needsLoad = true;
3783         ArgSize = PtrByteSize;
3784       }
3785       // All int arguments reserve stack space in the Darwin ABI.
3786       ArgOffset += 8;
3787       break;
3788 
3789     case MVT::f32:
3790     case MVT::f64:
3791       // Every 4 bytes of argument space consumes one of the GPRs available for
3792       // argument passing.
3793       if (GPR_idx != Num_GPR_Regs) {
3794         ++GPR_idx;
3795         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
3796           ++GPR_idx;
3797       }
3798       if (FPR_idx != Num_FPR_Regs) {
3799         unsigned VReg;
3800 
3801         if (ObjectVT == MVT::f32)
3802           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
3803         else
3804           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
3805 
3806         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3807         ++FPR_idx;
3808       } else {
3809         needsLoad = true;
3810       }
3811 
3812       // All FP arguments reserve stack space in the Darwin ABI.
3813       ArgOffset += isPPC64 ? 8 : ObjSize;
3814       break;
3815     case MVT::v4f32:
3816     case MVT::v4i32:
3817     case MVT::v8i16:
3818     case MVT::v16i8:
3819       // Note that vector arguments in registers don't reserve stack space,
3820       // except in varargs functions.
3821       if (VR_idx != Num_VR_Regs) {
3822         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3823         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3824         if (isVarArg) {
3825           while ((ArgOffset % 16) != 0) {
3826             ArgOffset += PtrByteSize;
3827             if (GPR_idx != Num_GPR_Regs)
3828               GPR_idx++;
3829           }
3830           ArgOffset += 16;
3831           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
3832         }
3833         ++VR_idx;
3834       } else {
3835         if (!isVarArg && !isPPC64) {
3836           // Vectors go after all the nonvectors.
3837           CurArgOffset = VecArgOffset;
3838           VecArgOffset += 16;
3839         } else {
3840           // Vectors are aligned.
3841           ArgOffset = ((ArgOffset+15)/16)*16;
3842           CurArgOffset = ArgOffset;
3843           ArgOffset += 16;
3844         }
3845         needsLoad = true;
3846       }
3847       break;
3848     }
3849 
3850     // We need to load the argument to a virtual register if we determined above
3851     // that we ran out of physical registers of the appropriate type.
3852     if (needsLoad) {
3853       int FI = MFI.CreateFixedObject(ObjSize,
3854                                      CurArgOffset + (ArgSize - ObjSize),
3855                                      isImmutable);
3856       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3857       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3858     }
3859 
3860     InVals.push_back(ArgVal);
3861   }
3862 
3863   // Allow for Altivec parameters at the end, if needed.
3864   if (nAltivecParamsAtEnd) {
3865     MinReservedArea = ((MinReservedArea+15)/16)*16;
3866     MinReservedArea += 16*nAltivecParamsAtEnd;
3867   }
3868 
3869   // Area that is at least reserved in the caller of this function.
3870   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
3871 
3872   // Set the size that is at least reserved in caller of this function.  Tail
3873   // call optimized functions' reserved stack space needs to be aligned so that
3874   // taking the difference between two stack areas will result in an aligned
3875   // stack.
3876   MinReservedArea =
3877       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3878   FuncInfo->setMinReservedArea(MinReservedArea);
3879 
3880   // If the function takes variable number of arguments, make a frame index for
3881   // the start of the first vararg value... for expansion of llvm.va_start.
3882   if (isVarArg) {
3883     int Depth = ArgOffset;
3884 
3885     FuncInfo->setVarArgsFrameIndex(
3886       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3887                             Depth, true));
3888     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3889 
3890     // If this function is vararg, store any remaining integer argument regs
3891     // to their spots on the stack so that they may be loaded by dereferencing
3892     // the result of va_next.
3893     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
3894       unsigned VReg;
3895 
3896       if (isPPC64)
3897         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3898       else
3899         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3900 
3901       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3902       SDValue Store =
3903           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3904       MemOps.push_back(Store);
3905       // Increment the address by four for the next argument to store
3906       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3907       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3908     }
3909   }
3910 
3911   if (!MemOps.empty())
3912     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3913 
3914   return Chain;
3915 }
3916 
3917 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
3918 /// adjusted to accommodate the arguments for the tailcall.
3919 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
3920                                    unsigned ParamSize) {
3921 
3922   if (!isTailCall) return 0;
3923 
3924   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3925   unsigned CallerMinReservedArea = FI->getMinReservedArea();
3926   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
3927   // Remember only if the new adjustement is bigger.
3928   if (SPDiff < FI->getTailCallSPDelta())
3929     FI->setTailCallSPDelta(SPDiff);
3930 
3931   return SPDiff;
3932 }
3933 
3934 static bool isFunctionGlobalAddress(SDValue Callee);
3935 
3936 static bool
3937 resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
3938   // If !G, Callee can be an external symbol.
3939   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3940   if (!G) return false;
3941 
3942   const GlobalValue *GV = G->getGlobal();
3943 
3944   if (GV->isDeclaration()) return false;
3945 
3946   switch(GV->getLinkage()) {
3947   default: llvm_unreachable("unknow linkage type");
3948   case GlobalValue::AvailableExternallyLinkage:
3949   case GlobalValue::ExternalWeakLinkage:
3950     return false;
3951 
3952   // Callee with weak linkage is allowed if it has hidden or protected
3953   // visibility
3954   case GlobalValue::LinkOnceAnyLinkage:
3955   case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
3956   case GlobalValue::WeakAnyLinkage:
3957   case GlobalValue::WeakODRLinkage:     // e.g. c++ template instantiation
3958     if (GV->hasDefaultVisibility())
3959       return false;
3960 
3961   case GlobalValue::ExternalLinkage:
3962   case GlobalValue::InternalLinkage:
3963   case GlobalValue::PrivateLinkage:
3964     break;
3965   }
3966 
3967   // With '-fPIC', calling default visiblity function need insert 'nop' after
3968   // function call, no matter that function resides in same module or not, so
3969   // we treat it as in different module.
3970   if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
3971     return false;
3972 
3973   return true;
3974 }
3975 
3976 static bool
3977 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
3978                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
3979   assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
3980 
3981   const unsigned PtrByteSize = 8;
3982   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3983 
3984   static const MCPhysReg GPR[] = {
3985     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3986     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3987   };
3988   static const MCPhysReg VR[] = {
3989     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3990     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3991   };
3992 
3993   const unsigned NumGPRs = array_lengthof(GPR);
3994   const unsigned NumFPRs = 13;
3995   const unsigned NumVRs = array_lengthof(VR);
3996   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
3997 
3998   unsigned NumBytes = LinkageSize;
3999   unsigned AvailableFPRs = NumFPRs;
4000   unsigned AvailableVRs = NumVRs;
4001 
4002   for (const ISD::OutputArg& Param : Outs) {
4003     if (Param.Flags.isNest()) continue;
4004 
4005     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4006                                PtrByteSize, LinkageSize, ParamAreaSize,
4007                                NumBytes, AvailableFPRs, AvailableVRs,
4008                                Subtarget.hasQPX()))
4009       return true;
4010   }
4011   return false;
4012 }
4013 
4014 static bool
4015 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
4016   if (CS->arg_size() != CallerFn->getArgumentList().size())
4017     return false;
4018 
4019   ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
4020   ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end();
4021   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4022 
4023   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4024     const Value* CalleeArg = *CalleeArgIter;
4025     const Value* CallerArg = &(*CallerArgIter);
4026     if (CalleeArg == CallerArg)
4027       continue;
4028 
4029     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4030     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4031     //      }
4032     // 1st argument of callee is undef and has the same type as caller.
4033     if (CalleeArg->getType() == CallerArg->getType() &&
4034         isa<UndefValue>(CalleeArg))
4035       continue;
4036 
4037     return false;
4038   }
4039 
4040   return true;
4041 }
4042 
4043 bool
4044 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4045                                     SDValue Callee,
4046                                     CallingConv::ID CalleeCC,
4047                                     ImmutableCallSite *CS,
4048                                     bool isVarArg,
4049                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4050                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4051                                     SelectionDAG& DAG) const {
4052   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4053 
4054   if (DisableSCO && !TailCallOpt) return false;
4055 
4056   // Variadic argument functions are not supported.
4057   if (isVarArg) return false;
4058 
4059   MachineFunction &MF = DAG.getMachineFunction();
4060   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4061 
4062   // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
4063   // the same calling convention
4064   if (CallerCC != CalleeCC) return false;
4065 
4066   // SCO support C calling convention
4067   if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
4068     return false;
4069 
4070   // Caller contains any byval parameter is not supported.
4071   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4072     return false;
4073 
4074   // Callee contains any byval parameter is not supported, too.
4075   // Note: This is a quick work around, because in some cases, e.g.
4076   // caller's stack size > callee's stack size, we are still able to apply
4077   // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
4078   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4079     return false;
4080 
4081   // No TCO/SCO on indirect call because Caller have to restore its TOC
4082   if (!isFunctionGlobalAddress(Callee) &&
4083       !isa<ExternalSymbolSDNode>(Callee))
4084     return false;
4085 
4086   // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
4087   // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
4088   // module.
4089   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4090   if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
4091     return false;
4092 
4093   // TCO allows altering callee ABI, so we don't have to check further.
4094   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4095     return true;
4096 
4097   if (DisableSCO) return false;
4098 
4099   // If callee use the same argument list that caller is using, then we can
4100   // apply SCO on this case. If it is not, then we need to check if callee needs
4101   // stack for passing arguments.
4102   if (!hasSameArgumentList(MF.getFunction(), CS) &&
4103       needStackSlotPassParameters(Subtarget, Outs)) {
4104     return false;
4105   }
4106 
4107   return true;
4108 }
4109 
4110 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4111 /// for tail call optimization. Targets which want to do tail call
4112 /// optimization should implement this function.
4113 bool
4114 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4115                                                      CallingConv::ID CalleeCC,
4116                                                      bool isVarArg,
4117                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4118                                                      SelectionDAG& DAG) const {
4119   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4120     return false;
4121 
4122   // Variable argument functions are not supported.
4123   if (isVarArg)
4124     return false;
4125 
4126   MachineFunction &MF = DAG.getMachineFunction();
4127   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4128   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4129     // Functions containing by val parameters are not supported.
4130     for (unsigned i = 0; i != Ins.size(); i++) {
4131        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4132        if (Flags.isByVal()) return false;
4133     }
4134 
4135     // Non-PIC/GOT tail calls are supported.
4136     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4137       return true;
4138 
4139     // At the moment we can only do local tail calls (in same module, hidden
4140     // or protected) if we are generating PIC.
4141     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4142       return G->getGlobal()->hasHiddenVisibility()
4143           || G->getGlobal()->hasProtectedVisibility();
4144   }
4145 
4146   return false;
4147 }
4148 
4149 /// isCallCompatibleAddress - Return the immediate to use if the specified
4150 /// 32-bit value is representable in the immediate field of a BxA instruction.
4151 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4152   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4153   if (!C) return nullptr;
4154 
4155   int Addr = C->getZExtValue();
4156   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4157       SignExtend32<26>(Addr) != Addr)
4158     return nullptr;  // Top 6 bits have to be sext of immediate.
4159 
4160   return DAG
4161       .getConstant(
4162           (int)C->getZExtValue() >> 2, SDLoc(Op),
4163           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4164       .getNode();
4165 }
4166 
4167 namespace {
4168 
4169 struct TailCallArgumentInfo {
4170   SDValue Arg;
4171   SDValue FrameIdxOp;
4172   int       FrameIdx;
4173 
4174   TailCallArgumentInfo() : FrameIdx(0) {}
4175 };
4176 }
4177 
4178 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4179 static void StoreTailCallArgumentsToStackSlot(
4180     SelectionDAG &DAG, SDValue Chain,
4181     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4182     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4183   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4184     SDValue Arg = TailCallArgs[i].Arg;
4185     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4186     int FI = TailCallArgs[i].FrameIdx;
4187     // Store relative to framepointer.
4188     MemOpChains.push_back(DAG.getStore(
4189         Chain, dl, Arg, FIN,
4190         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4191   }
4192 }
4193 
4194 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4195 /// the appropriate stack slot for the tail call optimized function call.
4196 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4197                                              SDValue OldRetAddr, SDValue OldFP,
4198                                              int SPDiff, const SDLoc &dl) {
4199   if (SPDiff) {
4200     // Calculate the new stack slot for the return address.
4201     MachineFunction &MF = DAG.getMachineFunction();
4202     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4203     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4204     bool isPPC64 = Subtarget.isPPC64();
4205     int SlotSize = isPPC64 ? 8 : 4;
4206     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4207     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4208                                                          NewRetAddrLoc, true);
4209     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4210     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4211     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4212                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4213 
4214     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4215     // slot as the FP is never overwritten.
4216     if (Subtarget.isDarwinABI()) {
4217       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4218       int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4219                                                          true);
4220       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4221       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4222                            MachinePointerInfo::getFixedStack(
4223                                DAG.getMachineFunction(), NewFPIdx));
4224     }
4225   }
4226   return Chain;
4227 }
4228 
4229 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4230 /// the position of the argument.
4231 static void
4232 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4233                          SDValue Arg, int SPDiff, unsigned ArgOffset,
4234                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4235   int Offset = ArgOffset + SPDiff;
4236   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4237   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4238   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4239   SDValue FIN = DAG.getFrameIndex(FI, VT);
4240   TailCallArgumentInfo Info;
4241   Info.Arg = Arg;
4242   Info.FrameIdxOp = FIN;
4243   Info.FrameIdx = FI;
4244   TailCallArguments.push_back(Info);
4245 }
4246 
4247 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4248 /// stack slot. Returns the chain as result and the loaded frame pointers in
4249 /// LROpOut/FPOpout. Used when tail calling.
4250 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4251     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4252     SDValue &FPOpOut, const SDLoc &dl) const {
4253   if (SPDiff) {
4254     // Load the LR and FP stack slot for later adjusting.
4255     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4256     LROpOut = getReturnAddrFrameIndex(DAG);
4257     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4258     Chain = SDValue(LROpOut.getNode(), 1);
4259 
4260     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4261     // slot as the FP is never overwritten.
4262     if (Subtarget.isDarwinABI()) {
4263       FPOpOut = getFramePointerFrameIndex(DAG);
4264       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4265       Chain = SDValue(FPOpOut.getNode(), 1);
4266     }
4267   }
4268   return Chain;
4269 }
4270 
4271 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4272 /// by "Src" to address "Dst" of size "Size".  Alignment information is
4273 /// specified by the specific parameter attribute. The copy will be passed as
4274 /// a byval function parameter.
4275 /// Sometimes what we are copying is the end of a larger object, the part that
4276 /// does not fit in registers.
4277 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4278                                          SDValue Chain, ISD::ArgFlagsTy Flags,
4279                                          SelectionDAG &DAG, const SDLoc &dl) {
4280   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4281   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4282                        false, false, false, MachinePointerInfo(),
4283                        MachinePointerInfo());
4284 }
4285 
4286 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4287 /// tail calls.
4288 static void LowerMemOpCallTo(
4289     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4290     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4291     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4292     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4293   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4294   if (!isTailCall) {
4295     if (isVector) {
4296       SDValue StackPtr;
4297       if (isPPC64)
4298         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4299       else
4300         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4301       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4302                            DAG.getConstant(ArgOffset, dl, PtrVT));
4303     }
4304     MemOpChains.push_back(
4305         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4306     // Calculate and remember argument location.
4307   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4308                                   TailCallArguments);
4309 }
4310 
4311 static void
4312 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4313                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4314                 SDValue FPOp,
4315                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4316   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4317   // might overwrite each other in case of tail call optimization.
4318   SmallVector<SDValue, 8> MemOpChains2;
4319   // Do not flag preceding copytoreg stuff together with the following stuff.
4320   InFlag = SDValue();
4321   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4322                                     MemOpChains2, dl);
4323   if (!MemOpChains2.empty())
4324     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4325 
4326   // Store the return address to the appropriate stack slot.
4327   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4328 
4329   // Emit callseq_end just before tailcall node.
4330   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4331                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4332   InFlag = Chain.getValue(1);
4333 }
4334 
4335 // Is this global address that of a function that can be called by name? (as
4336 // opposed to something that must hold a descriptor for an indirect call).
4337 static bool isFunctionGlobalAddress(SDValue Callee) {
4338   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4339     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4340         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4341       return false;
4342 
4343     return G->getGlobal()->getValueType()->isFunctionTy();
4344   }
4345 
4346   return false;
4347 }
4348 
4349 static unsigned
4350 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4351             SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4352             bool isPatchPoint, bool hasNest,
4353             SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4354             SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4355             ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
4356 
4357   bool isPPC64 = Subtarget.isPPC64();
4358   bool isSVR4ABI = Subtarget.isSVR4ABI();
4359   bool isELFv2ABI = Subtarget.isELFv2ABI();
4360 
4361   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4362   NodeTys.push_back(MVT::Other);   // Returns a chain
4363   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4364 
4365   unsigned CallOpc = PPCISD::CALL;
4366 
4367   bool needIndirectCall = true;
4368   if (!isSVR4ABI || !isPPC64)
4369     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4370       // If this is an absolute destination address, use the munged value.
4371       Callee = SDValue(Dest, 0);
4372       needIndirectCall = false;
4373     }
4374 
4375   // PC-relative references to external symbols should go through $stub, unless
4376   // we're building with the leopard linker or later, which automatically
4377   // synthesizes these stubs.
4378   const TargetMachine &TM = DAG.getTarget();
4379   const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
4380   const GlobalValue *GV = nullptr;
4381   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4382     GV = G->getGlobal();
4383   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4384   bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4385 
4386   if (isFunctionGlobalAddress(Callee)) {
4387     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4388     // A call to a TLS address is actually an indirect call to a
4389     // thread-specific pointer.
4390     unsigned OpFlags = 0;
4391     if (UsePlt)
4392       OpFlags = PPCII::MO_PLT;
4393 
4394     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4395     // every direct call is) turn it into a TargetGlobalAddress /
4396     // TargetExternalSymbol node so that legalize doesn't hack it.
4397     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4398                                         Callee.getValueType(), 0, OpFlags);
4399     needIndirectCall = false;
4400   }
4401 
4402   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4403     unsigned char OpFlags = 0;
4404 
4405     if (UsePlt)
4406       OpFlags = PPCII::MO_PLT;
4407 
4408     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4409                                          OpFlags);
4410     needIndirectCall = false;
4411   }
4412 
4413   if (isPatchPoint) {
4414     // We'll form an invalid direct call when lowering a patchpoint; the full
4415     // sequence for an indirect call is complicated, and many of the
4416     // instructions introduced might have side effects (and, thus, can't be
4417     // removed later). The call itself will be removed as soon as the
4418     // argument/return lowering is complete, so the fact that it has the wrong
4419     // kind of operands should not really matter.
4420     needIndirectCall = false;
4421   }
4422 
4423   if (needIndirectCall) {
4424     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4425     // to do the call, we can't use PPCISD::CALL.
4426     SDValue MTCTROps[] = {Chain, Callee, InFlag};
4427 
4428     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4429       // Function pointers in the 64-bit SVR4 ABI do not point to the function
4430       // entry point, but to the function descriptor (the function entry point
4431       // address is part of the function descriptor though).
4432       // The function descriptor is a three doubleword structure with the
4433       // following fields: function entry point, TOC base address and
4434       // environment pointer.
4435       // Thus for a call through a function pointer, the following actions need
4436       // to be performed:
4437       //   1. Save the TOC of the caller in the TOC save area of its stack
4438       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4439       //   2. Load the address of the function entry point from the function
4440       //      descriptor.
4441       //   3. Load the TOC of the callee from the function descriptor into r2.
4442       //   4. Load the environment pointer from the function descriptor into
4443       //      r11.
4444       //   5. Branch to the function entry point address.
4445       //   6. On return of the callee, the TOC of the caller needs to be
4446       //      restored (this is done in FinishCall()).
4447       //
4448       // The loads are scheduled at the beginning of the call sequence, and the
4449       // register copies are flagged together to ensure that no other
4450       // operations can be scheduled in between. E.g. without flagging the
4451       // copies together, a TOC access in the caller could be scheduled between
4452       // the assignment of the callee TOC and the branch to the callee, which
4453       // results in the TOC access going through the TOC of the callee instead
4454       // of going through the TOC of the caller, which leads to incorrect code.
4455 
4456       // Load the address of the function entry point from the function
4457       // descriptor.
4458       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4459       if (LDChain.getValueType() == MVT::Glue)
4460         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4461 
4462       auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
4463                           ? (MachineMemOperand::MODereferenceable |
4464                              MachineMemOperand::MOInvariant)
4465                           : MachineMemOperand::MONone;
4466 
4467       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4468       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4469                                         /* Alignment = */ 8, MMOFlags);
4470 
4471       // Load environment pointer into r11.
4472       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4473       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4474       SDValue LoadEnvPtr =
4475           DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
4476                       /* Alignment = */ 8, MMOFlags);
4477 
4478       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4479       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4480       SDValue TOCPtr =
4481           DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
4482                       /* Alignment = */ 8, MMOFlags);
4483 
4484       setUsesTOCBasePtr(DAG);
4485       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4486                                         InFlag);
4487       Chain = TOCVal.getValue(0);
4488       InFlag = TOCVal.getValue(1);
4489 
4490       // If the function call has an explicit 'nest' parameter, it takes the
4491       // place of the environment pointer.
4492       if (!hasNest) {
4493         SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4494                                           InFlag);
4495 
4496         Chain = EnvVal.getValue(0);
4497         InFlag = EnvVal.getValue(1);
4498       }
4499 
4500       MTCTROps[0] = Chain;
4501       MTCTROps[1] = LoadFuncPtr;
4502       MTCTROps[2] = InFlag;
4503     }
4504 
4505     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4506                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4507     InFlag = Chain.getValue(1);
4508 
4509     NodeTys.clear();
4510     NodeTys.push_back(MVT::Other);
4511     NodeTys.push_back(MVT::Glue);
4512     Ops.push_back(Chain);
4513     CallOpc = PPCISD::BCTRL;
4514     Callee.setNode(nullptr);
4515     // Add use of X11 (holding environment pointer)
4516     if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
4517       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4518     // Add CTR register as callee so a bctr can be emitted later.
4519     if (isTailCall)
4520       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4521   }
4522 
4523   // If this is a direct call, pass the chain and the callee.
4524   if (Callee.getNode()) {
4525     Ops.push_back(Chain);
4526     Ops.push_back(Callee);
4527   }
4528   // If this is a tail call add stack pointer delta.
4529   if (isTailCall)
4530     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4531 
4532   // Add argument registers to the end of the list so that they are known live
4533   // into the call.
4534   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4535     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4536                                   RegsToPass[i].second.getValueType()));
4537 
4538   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4539   // into the call.
4540   if (isSVR4ABI && isPPC64 && !isPatchPoint) {
4541     setUsesTOCBasePtr(DAG);
4542     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4543   }
4544 
4545   return CallOpc;
4546 }
4547 
4548 static
4549 bool isLocalCall(const SDValue &Callee)
4550 {
4551   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4552     return G->getGlobal()->isStrongDefinitionForLinker();
4553   return false;
4554 }
4555 
4556 SDValue PPCTargetLowering::LowerCallResult(
4557     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4558     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4559     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4560 
4561   SmallVector<CCValAssign, 16> RVLocs;
4562   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4563                     *DAG.getContext());
4564   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4565 
4566   // Copy all of the result registers out of their specified physreg.
4567   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4568     CCValAssign &VA = RVLocs[i];
4569     assert(VA.isRegLoc() && "Can only return in registers!");
4570 
4571     SDValue Val = DAG.getCopyFromReg(Chain, dl,
4572                                      VA.getLocReg(), VA.getLocVT(), InFlag);
4573     Chain = Val.getValue(1);
4574     InFlag = Val.getValue(2);
4575 
4576     switch (VA.getLocInfo()) {
4577     default: llvm_unreachable("Unknown loc info!");
4578     case CCValAssign::Full: break;
4579     case CCValAssign::AExt:
4580       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4581       break;
4582     case CCValAssign::ZExt:
4583       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4584                         DAG.getValueType(VA.getValVT()));
4585       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4586       break;
4587     case CCValAssign::SExt:
4588       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4589                         DAG.getValueType(VA.getValVT()));
4590       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4591       break;
4592     }
4593 
4594     InVals.push_back(Val);
4595   }
4596 
4597   return Chain;
4598 }
4599 
4600 SDValue PPCTargetLowering::FinishCall(
4601     CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
4602     bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
4603     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
4604     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
4605     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
4606     SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
4607 
4608   std::vector<EVT> NodeTys;
4609   SmallVector<SDValue, 8> Ops;
4610   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4611                                  SPDiff, isTailCall, isPatchPoint, hasNest,
4612                                  RegsToPass, Ops, NodeTys, CS, Subtarget);
4613 
4614   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4615   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4616     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4617 
4618   // When performing tail call optimization the callee pops its arguments off
4619   // the stack. Account for this here so these bytes can be pushed back on in
4620   // PPCFrameLowering::eliminateCallFramePseudoInstr.
4621   int BytesCalleePops =
4622     (CallConv == CallingConv::Fast &&
4623      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
4624 
4625   // Add a register mask operand representing the call-preserved registers.
4626   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4627   const uint32_t *Mask =
4628       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4629   assert(Mask && "Missing call preserved mask for calling convention");
4630   Ops.push_back(DAG.getRegisterMask(Mask));
4631 
4632   if (InFlag.getNode())
4633     Ops.push_back(InFlag);
4634 
4635   // Emit tail call.
4636   if (isTailCall) {
4637     assert(((Callee.getOpcode() == ISD::Register &&
4638              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4639             Callee.getOpcode() == ISD::TargetExternalSymbol ||
4640             Callee.getOpcode() == ISD::TargetGlobalAddress ||
4641             isa<ConstantSDNode>(Callee)) &&
4642     "Expecting an global address, external symbol, absolute value or register");
4643 
4644     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
4645     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4646   }
4647 
4648   // Add a NOP immediately after the branch instruction when using the 64-bit
4649   // SVR4 ABI. At link time, if caller and callee are in a different module and
4650   // thus have a different TOC, the call will be replaced with a call to a stub
4651   // function which saves the current TOC, loads the TOC of the callee and
4652   // branches to the callee. The NOP will be replaced with a load instruction
4653   // which restores the TOC of the caller from the TOC save slot of the current
4654   // stack frame. If caller and callee belong to the same module (and have the
4655   // same TOC), the NOP will remain unchanged.
4656 
4657   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4658       !isPatchPoint) {
4659     if (CallOpc == PPCISD::BCTRL) {
4660       // This is a call through a function pointer.
4661       // Restore the caller TOC from the save area into R2.
4662       // See PrepareCall() for more information about calls through function
4663       // pointers in the 64-bit SVR4 ABI.
4664       // We are using a target-specific load with r2 hard coded, because the
4665       // result of a target-independent load would never go directly into r2,
4666       // since r2 is a reserved register (which prevents the register allocator
4667       // from allocating it), resulting in an additional register being
4668       // allocated and an unnecessary move instruction being generated.
4669       CallOpc = PPCISD::BCTRL_LOAD_TOC;
4670 
4671       EVT PtrVT = getPointerTy(DAG.getDataLayout());
4672       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4673       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4674       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
4675       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4676 
4677       // The address needs to go after the chain input but before the flag (or
4678       // any other variadic arguments).
4679       Ops.insert(std::next(Ops.begin()), AddTOC);
4680     } else if ((CallOpc == PPCISD::CALL) &&
4681                (!isLocalCall(Callee) ||
4682                 DAG.getTarget().getRelocationModel() == Reloc::PIC_))
4683       // Otherwise insert NOP for non-local calls.
4684       CallOpc = PPCISD::CALL_NOP;
4685   }
4686 
4687   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
4688   InFlag = Chain.getValue(1);
4689 
4690   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4691                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
4692                              InFlag, dl);
4693   if (!Ins.empty())
4694     InFlag = Chain.getValue(1);
4695 
4696   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
4697                          Ins, dl, DAG, InVals);
4698 }
4699 
4700 SDValue
4701 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4702                              SmallVectorImpl<SDValue> &InVals) const {
4703   SelectionDAG &DAG                     = CLI.DAG;
4704   SDLoc &dl                             = CLI.DL;
4705   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4706   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
4707   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
4708   SDValue Chain                         = CLI.Chain;
4709   SDValue Callee                        = CLI.Callee;
4710   bool &isTailCall                      = CLI.IsTailCall;
4711   CallingConv::ID CallConv              = CLI.CallConv;
4712   bool isVarArg                         = CLI.IsVarArg;
4713   bool isPatchPoint                     = CLI.IsPatchPoint;
4714   ImmutableCallSite *CS                 = CLI.CS;
4715 
4716   if (isTailCall) {
4717     if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall()))
4718       isTailCall = false;
4719     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
4720       isTailCall =
4721         IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
4722                                                  isVarArg, Outs, Ins, DAG);
4723     else
4724       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
4725                                                      Ins, DAG);
4726     if (isTailCall) {
4727       ++NumTailCalls;
4728       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4729         ++NumSiblingCalls;
4730 
4731       assert(isa<GlobalAddressSDNode>(Callee) &&
4732              "Callee should be an llvm::Function object.");
4733       DEBUG(
4734         const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
4735         const unsigned Width = 80 - strlen("TCO caller: ")
4736                                   - strlen(", callee linkage: 0, 0");
4737         dbgs() << "TCO caller: "
4738                << left_justify(DAG.getMachineFunction().getName(), Width)
4739                << ", callee linkage: "
4740                << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
4741       );
4742     }
4743   }
4744 
4745   if (!isTailCall && CS && CS->isMustTailCall())
4746     report_fatal_error("failed to perform tail call elimination on a call "
4747                        "site marked musttail");
4748 
4749   // When long calls (i.e. indirect calls) are always used, calls are always
4750   // made via function pointer. If we have a function name, first translate it
4751   // into a pointer.
4752   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
4753       !isTailCall)
4754     Callee = LowerGlobalAddress(Callee, DAG);
4755 
4756   if (Subtarget.isSVR4ABI()) {
4757     if (Subtarget.isPPC64())
4758       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
4759                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
4760                               dl, DAG, InVals, CS);
4761     else
4762       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
4763                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
4764                               dl, DAG, InVals, CS);
4765   }
4766 
4767   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
4768                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
4769                           dl, DAG, InVals, CS);
4770 }
4771 
4772 SDValue PPCTargetLowering::LowerCall_32SVR4(
4773     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
4774     bool isTailCall, bool isPatchPoint,
4775     const SmallVectorImpl<ISD::OutputArg> &Outs,
4776     const SmallVectorImpl<SDValue> &OutVals,
4777     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4778     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
4779     ImmutableCallSite *CS) const {
4780   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
4781   // of the 32-bit SVR4 ABI stack frame layout.
4782 
4783   assert((CallConv == CallingConv::C ||
4784           CallConv == CallingConv::Fast) && "Unknown calling convention!");
4785 
4786   unsigned PtrByteSize = 4;
4787 
4788   MachineFunction &MF = DAG.getMachineFunction();
4789 
4790   // Mark this function as potentially containing a function that contains a
4791   // tail call. As a consequence the frame pointer will be used for dynamicalloc
4792   // and restoring the callers stack pointer in this functions epilog. This is
4793   // done because by tail calling the called function might overwrite the value
4794   // in this function's (MF) stack pointer stack slot 0(SP).
4795   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4796       CallConv == CallingConv::Fast)
4797     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4798 
4799   // Count how many bytes are to be pushed on the stack, including the linkage
4800   // area, parameter list area and the part of the local variable space which
4801   // contains copies of aggregates which are passed by value.
4802 
4803   // Assign locations to all of the outgoing arguments.
4804   SmallVector<CCValAssign, 16> ArgLocs;
4805   PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4806 
4807   // Reserve space for the linkage area on the stack.
4808   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
4809                        PtrByteSize);
4810   if (useSoftFloat())
4811     CCInfo.PreAnalyzeCallOperands(Outs);
4812 
4813   if (isVarArg) {
4814     // Handle fixed and variable vector arguments differently.
4815     // Fixed vector arguments go into registers as long as registers are
4816     // available. Variable vector arguments always go into memory.
4817     unsigned NumArgs = Outs.size();
4818 
4819     for (unsigned i = 0; i != NumArgs; ++i) {
4820       MVT ArgVT = Outs[i].VT;
4821       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4822       bool Result;
4823 
4824       if (Outs[i].IsFixed) {
4825         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
4826                                CCInfo);
4827       } else {
4828         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
4829                                       ArgFlags, CCInfo);
4830       }
4831 
4832       if (Result) {
4833 #ifndef NDEBUG
4834         errs() << "Call operand #" << i << " has unhandled type "
4835              << EVT(ArgVT).getEVTString() << "\n";
4836 #endif
4837         llvm_unreachable(nullptr);
4838       }
4839     }
4840   } else {
4841     // All arguments are treated the same.
4842     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
4843   }
4844   CCInfo.clearWasPPCF128();
4845 
4846   // Assign locations to all of the outgoing aggregate by value arguments.
4847   SmallVector<CCValAssign, 16> ByValArgLocs;
4848   CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
4849 
4850   // Reserve stack space for the allocations in CCInfo.
4851   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
4852 
4853   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
4854 
4855   // Size of the linkage area, parameter list area and the part of the local
4856   // space variable where copies of aggregates which are passed by value are
4857   // stored.
4858   unsigned NumBytes = CCByValInfo.getNextStackOffset();
4859 
4860   // Calculate by how many bytes the stack has to be adjusted in case of tail
4861   // call optimization.
4862   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4863 
4864   // Adjust the stack pointer for the new arguments...
4865   // These operations are automatically eliminated by the prolog/epilog pass
4866   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4867                                dl);
4868   SDValue CallSeqStart = Chain;
4869 
4870   // Load the return address and frame pointer so it can be moved somewhere else
4871   // later.
4872   SDValue LROp, FPOp;
4873   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
4874 
4875   // Set up a copy of the stack pointer for use loading and storing any
4876   // arguments that may not fit in the registers available for argument
4877   // passing.
4878   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4879 
4880   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4881   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4882   SmallVector<SDValue, 8> MemOpChains;
4883 
4884   bool seenFloatArg = false;
4885   // Walk the register/memloc assignments, inserting copies/loads.
4886   for (unsigned i = 0, j = 0, e = ArgLocs.size();
4887        i != e;
4888        ++i) {
4889     CCValAssign &VA = ArgLocs[i];
4890     SDValue Arg = OutVals[i];
4891     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4892 
4893     if (Flags.isByVal()) {
4894       // Argument is an aggregate which is passed by value, thus we need to
4895       // create a copy of it in the local variable space of the current stack
4896       // frame (which is the stack frame of the caller) and pass the address of
4897       // this copy to the callee.
4898       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
4899       CCValAssign &ByValVA = ByValArgLocs[j++];
4900       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
4901 
4902       // Memory reserved in the local variable space of the callers stack frame.
4903       unsigned LocMemOffset = ByValVA.getLocMemOffset();
4904 
4905       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4906       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4907                            StackPtr, PtrOff);
4908 
4909       // Create a copy of the argument in the local area of the current
4910       // stack frame.
4911       SDValue MemcpyCall =
4912         CreateCopyOfByValArgument(Arg, PtrOff,
4913                                   CallSeqStart.getNode()->getOperand(0),
4914                                   Flags, DAG, dl);
4915 
4916       // This must go outside the CALLSEQ_START..END.
4917       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4918                            CallSeqStart.getNode()->getOperand(1),
4919                            SDLoc(MemcpyCall));
4920       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4921                              NewCallSeqStart.getNode());
4922       Chain = CallSeqStart = NewCallSeqStart;
4923 
4924       // Pass the address of the aggregate copy on the stack either in a
4925       // physical register or in the parameter list area of the current stack
4926       // frame to the callee.
4927       Arg = PtrOff;
4928     }
4929 
4930     if (VA.isRegLoc()) {
4931       if (Arg.getValueType() == MVT::i1)
4932         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
4933 
4934       seenFloatArg |= VA.getLocVT().isFloatingPoint();
4935       // Put argument in a physical register.
4936       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4937     } else {
4938       // Put argument in the parameter list area of the current stack frame.
4939       assert(VA.isMemLoc());
4940       unsigned LocMemOffset = VA.getLocMemOffset();
4941 
4942       if (!isTailCall) {
4943         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4944         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4945                              StackPtr, PtrOff);
4946 
4947         MemOpChains.push_back(
4948             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4949       } else {
4950         // Calculate and remember argument location.
4951         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
4952                                  TailCallArguments);
4953       }
4954     }
4955   }
4956 
4957   if (!MemOpChains.empty())
4958     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4959 
4960   // Build a sequence of copy-to-reg nodes chained together with token chain
4961   // and flag operands which copy the outgoing args into the appropriate regs.
4962   SDValue InFlag;
4963   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4964     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4965                              RegsToPass[i].second, InFlag);
4966     InFlag = Chain.getValue(1);
4967   }
4968 
4969   // Set CR bit 6 to true if this is a vararg call with floating args passed in
4970   // registers.
4971   if (isVarArg) {
4972     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
4973     SDValue Ops[] = { Chain, InFlag };
4974 
4975     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
4976                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
4977 
4978     InFlag = Chain.getValue(1);
4979   }
4980 
4981   if (isTailCall)
4982     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
4983                     TailCallArguments);
4984 
4985   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
4986                     /* unused except on PPC64 ELFv1 */ false, DAG,
4987                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
4988                     NumBytes, Ins, InVals, CS);
4989 }
4990 
4991 // Copy an argument into memory, being careful to do this outside the
4992 // call sequence for the call to which the argument belongs.
4993 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
4994     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
4995     SelectionDAG &DAG, const SDLoc &dl) const {
4996   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
4997                         CallSeqStart.getNode()->getOperand(0),
4998                         Flags, DAG, dl);
4999   // The MEMCPY must go outside the CALLSEQ_START..END.
5000   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
5001                              CallSeqStart.getNode()->getOperand(1),
5002                              SDLoc(MemcpyCall));
5003   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5004                          NewCallSeqStart.getNode());
5005   return NewCallSeqStart;
5006 }
5007 
5008 SDValue PPCTargetLowering::LowerCall_64SVR4(
5009     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5010     bool isTailCall, bool isPatchPoint,
5011     const SmallVectorImpl<ISD::OutputArg> &Outs,
5012     const SmallVectorImpl<SDValue> &OutVals,
5013     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5014     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5015     ImmutableCallSite *CS) const {
5016 
5017   bool isELFv2ABI = Subtarget.isELFv2ABI();
5018   bool isLittleEndian = Subtarget.isLittleEndian();
5019   unsigned NumOps = Outs.size();
5020   bool hasNest = false;
5021   bool IsSibCall = false;
5022 
5023   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5024   unsigned PtrByteSize = 8;
5025 
5026   MachineFunction &MF = DAG.getMachineFunction();
5027 
5028   if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5029     IsSibCall = true;
5030 
5031   // Mark this function as potentially containing a function that contains a
5032   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5033   // and restoring the callers stack pointer in this functions epilog. This is
5034   // done because by tail calling the called function might overwrite the value
5035   // in this function's (MF) stack pointer stack slot 0(SP).
5036   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5037       CallConv == CallingConv::Fast)
5038     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5039 
5040   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5041          "fastcc not supported on varargs functions");
5042 
5043   // Count how many bytes are to be pushed on the stack, including the linkage
5044   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5045   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5046   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5047   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5048   unsigned NumBytes = LinkageSize;
5049   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5050   unsigned &QFPR_idx = FPR_idx;
5051 
5052   static const MCPhysReg GPR[] = {
5053     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5054     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5055   };
5056   static const MCPhysReg VR[] = {
5057     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5058     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5059   };
5060 
5061   const unsigned NumGPRs = array_lengthof(GPR);
5062   const unsigned NumFPRs = 13;
5063   const unsigned NumVRs  = array_lengthof(VR);
5064   const unsigned NumQFPRs = NumFPRs;
5065 
5066   // When using the fast calling convention, we don't provide backing for
5067   // arguments that will be in registers.
5068   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5069 
5070   // Add up all the space actually used.
5071   for (unsigned i = 0; i != NumOps; ++i) {
5072     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5073     EVT ArgVT = Outs[i].VT;
5074     EVT OrigVT = Outs[i].ArgVT;
5075 
5076     if (Flags.isNest())
5077       continue;
5078 
5079     if (CallConv == CallingConv::Fast) {
5080       if (Flags.isByVal())
5081         NumGPRsUsed += (Flags.getByValSize()+7)/8;
5082       else
5083         switch (ArgVT.getSimpleVT().SimpleTy) {
5084         default: llvm_unreachable("Unexpected ValueType for argument!");
5085         case MVT::i1:
5086         case MVT::i32:
5087         case MVT::i64:
5088           if (++NumGPRsUsed <= NumGPRs)
5089             continue;
5090           break;
5091         case MVT::v4i32:
5092         case MVT::v8i16:
5093         case MVT::v16i8:
5094         case MVT::v2f64:
5095         case MVT::v2i64:
5096         case MVT::v1i128:
5097           if (++NumVRsUsed <= NumVRs)
5098             continue;
5099           break;
5100         case MVT::v4f32:
5101           // When using QPX, this is handled like a FP register, otherwise, it
5102           // is an Altivec register.
5103           if (Subtarget.hasQPX()) {
5104             if (++NumFPRsUsed <= NumFPRs)
5105               continue;
5106           } else {
5107             if (++NumVRsUsed <= NumVRs)
5108               continue;
5109           }
5110           break;
5111         case MVT::f32:
5112         case MVT::f64:
5113         case MVT::v4f64: // QPX
5114         case MVT::v4i1:  // QPX
5115           if (++NumFPRsUsed <= NumFPRs)
5116             continue;
5117           break;
5118         }
5119     }
5120 
5121     /* Respect alignment of argument on the stack.  */
5122     unsigned Align =
5123       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5124     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5125 
5126     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5127     if (Flags.isInConsecutiveRegsLast())
5128       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5129   }
5130 
5131   unsigned NumBytesActuallyUsed = NumBytes;
5132 
5133   // The prolog code of the callee may store up to 8 GPR argument registers to
5134   // the stack, allowing va_start to index over them in memory if its varargs.
5135   // Because we cannot tell if this is needed on the caller side, we have to
5136   // conservatively assume that it is needed.  As such, make sure we have at
5137   // least enough stack space for the caller to store the 8 GPRs.
5138   // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
5139   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5140 
5141   // Tail call needs the stack to be aligned.
5142   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5143       CallConv == CallingConv::Fast)
5144     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5145 
5146   int SPDiff = 0;
5147 
5148   // Calculate by how many bytes the stack has to be adjusted in case of tail
5149   // call optimization.
5150   if (!IsSibCall)
5151     SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5152 
5153   // To protect arguments on the stack from being clobbered in a tail call,
5154   // force all the loads to happen before doing any other lowering.
5155   if (isTailCall)
5156     Chain = DAG.getStackArgumentTokenFactor(Chain);
5157 
5158   // Adjust the stack pointer for the new arguments...
5159   // These operations are automatically eliminated by the prolog/epilog pass
5160   if (!IsSibCall)
5161     Chain = DAG.getCALLSEQ_START(Chain,
5162                                  DAG.getIntPtrConstant(NumBytes, dl, true), dl);
5163   SDValue CallSeqStart = Chain;
5164 
5165   // Load the return address and frame pointer so it can be move somewhere else
5166   // later.
5167   SDValue LROp, FPOp;
5168   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5169 
5170   // Set up a copy of the stack pointer for use loading and storing any
5171   // arguments that may not fit in the registers available for argument
5172   // passing.
5173   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5174 
5175   // Figure out which arguments are going to go in registers, and which in
5176   // memory.  Also, if this is a vararg function, floating point operations
5177   // must be stored to our stack, and loaded into integer regs as well, if
5178   // any integer regs are available for argument passing.
5179   unsigned ArgOffset = LinkageSize;
5180 
5181   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5182   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5183 
5184   SmallVector<SDValue, 8> MemOpChains;
5185   for (unsigned i = 0; i != NumOps; ++i) {
5186     SDValue Arg = OutVals[i];
5187     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5188     EVT ArgVT = Outs[i].VT;
5189     EVT OrigVT = Outs[i].ArgVT;
5190 
5191     // PtrOff will be used to store the current argument to the stack if a
5192     // register cannot be found for it.
5193     SDValue PtrOff;
5194 
5195     // We re-align the argument offset for each argument, except when using the
5196     // fast calling convention, when we need to make sure we do that only when
5197     // we'll actually use a stack slot.
5198     auto ComputePtrOff = [&]() {
5199       /* Respect alignment of argument on the stack.  */
5200       unsigned Align =
5201         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5202       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5203 
5204       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5205 
5206       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5207     };
5208 
5209     if (CallConv != CallingConv::Fast) {
5210       ComputePtrOff();
5211 
5212       /* Compute GPR index associated with argument offset.  */
5213       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5214       GPR_idx = std::min(GPR_idx, NumGPRs);
5215     }
5216 
5217     // Promote integers to 64-bit values.
5218     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5219       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5220       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5221       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5222     }
5223 
5224     // FIXME memcpy is used way more than necessary.  Correctness first.
5225     // Note: "by value" is code for passing a structure by value, not
5226     // basic types.
5227     if (Flags.isByVal()) {
5228       // Note: Size includes alignment padding, so
5229       //   struct x { short a; char b; }
5230       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5231       // These are the proper values we need for right-justifying the
5232       // aggregate in a parameter register.
5233       unsigned Size = Flags.getByValSize();
5234 
5235       // An empty aggregate parameter takes up no storage and no
5236       // registers.
5237       if (Size == 0)
5238         continue;
5239 
5240       if (CallConv == CallingConv::Fast)
5241         ComputePtrOff();
5242 
5243       // All aggregates smaller than 8 bytes must be passed right-justified.
5244       if (Size==1 || Size==2 || Size==4) {
5245         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5246         if (GPR_idx != NumGPRs) {
5247           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5248                                         MachinePointerInfo(), VT);
5249           MemOpChains.push_back(Load.getValue(1));
5250           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5251 
5252           ArgOffset += PtrByteSize;
5253           continue;
5254         }
5255       }
5256 
5257       if (GPR_idx == NumGPRs && Size < 8) {
5258         SDValue AddPtr = PtrOff;
5259         if (!isLittleEndian) {
5260           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5261                                           PtrOff.getValueType());
5262           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5263         }
5264         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5265                                                           CallSeqStart,
5266                                                           Flags, DAG, dl);
5267         ArgOffset += PtrByteSize;
5268         continue;
5269       }
5270       // Copy entire object into memory.  There are cases where gcc-generated
5271       // code assumes it is there, even if it could be put entirely into
5272       // registers.  (This is not what the doc says.)
5273 
5274       // FIXME: The above statement is likely due to a misunderstanding of the
5275       // documents.  All arguments must be copied into the parameter area BY
5276       // THE CALLEE in the event that the callee takes the address of any
5277       // formal argument.  That has not yet been implemented.  However, it is
5278       // reasonable to use the stack area as a staging area for the register
5279       // load.
5280 
5281       // Skip this for small aggregates, as we will use the same slot for a
5282       // right-justified copy, below.
5283       if (Size >= 8)
5284         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5285                                                           CallSeqStart,
5286                                                           Flags, DAG, dl);
5287 
5288       // When a register is available, pass a small aggregate right-justified.
5289       if (Size < 8 && GPR_idx != NumGPRs) {
5290         // The easiest way to get this right-justified in a register
5291         // is to copy the structure into the rightmost portion of a
5292         // local variable slot, then load the whole slot into the
5293         // register.
5294         // FIXME: The memcpy seems to produce pretty awful code for
5295         // small aggregates, particularly for packed ones.
5296         // FIXME: It would be preferable to use the slot in the
5297         // parameter save area instead of a new local variable.
5298         SDValue AddPtr = PtrOff;
5299         if (!isLittleEndian) {
5300           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5301           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5302         }
5303         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5304                                                           CallSeqStart,
5305                                                           Flags, DAG, dl);
5306 
5307         // Load the slot into the register.
5308         SDValue Load =
5309             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5310         MemOpChains.push_back(Load.getValue(1));
5311         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5312 
5313         // Done with this argument.
5314         ArgOffset += PtrByteSize;
5315         continue;
5316       }
5317 
5318       // For aggregates larger than PtrByteSize, copy the pieces of the
5319       // object that fit into registers from the parameter save area.
5320       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5321         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5322         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5323         if (GPR_idx != NumGPRs) {
5324           SDValue Load =
5325               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5326           MemOpChains.push_back(Load.getValue(1));
5327           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5328           ArgOffset += PtrByteSize;
5329         } else {
5330           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5331           break;
5332         }
5333       }
5334       continue;
5335     }
5336 
5337     switch (Arg.getSimpleValueType().SimpleTy) {
5338     default: llvm_unreachable("Unexpected ValueType for argument!");
5339     case MVT::i1:
5340     case MVT::i32:
5341     case MVT::i64:
5342       if (Flags.isNest()) {
5343         // The 'nest' parameter, if any, is passed in R11.
5344         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5345         hasNest = true;
5346         break;
5347       }
5348 
5349       // These can be scalar arguments or elements of an integer array type
5350       // passed directly.  Clang may use those instead of "byval" aggregate
5351       // types to avoid forcing arguments to memory unnecessarily.
5352       if (GPR_idx != NumGPRs) {
5353         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5354       } else {
5355         if (CallConv == CallingConv::Fast)
5356           ComputePtrOff();
5357 
5358         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5359                          true, isTailCall, false, MemOpChains,
5360                          TailCallArguments, dl);
5361         if (CallConv == CallingConv::Fast)
5362           ArgOffset += PtrByteSize;
5363       }
5364       if (CallConv != CallingConv::Fast)
5365         ArgOffset += PtrByteSize;
5366       break;
5367     case MVT::f32:
5368     case MVT::f64: {
5369       // These can be scalar arguments or elements of a float array type
5370       // passed directly.  The latter are used to implement ELFv2 homogenous
5371       // float aggregates.
5372 
5373       // Named arguments go into FPRs first, and once they overflow, the
5374       // remaining arguments go into GPRs and then the parameter save area.
5375       // Unnamed arguments for vararg functions always go to GPRs and
5376       // then the parameter save area.  For now, put all arguments to vararg
5377       // routines always in both locations (FPR *and* GPR or stack slot).
5378       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5379       bool NeededLoad = false;
5380 
5381       // First load the argument into the next available FPR.
5382       if (FPR_idx != NumFPRs)
5383         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5384 
5385       // Next, load the argument into GPR or stack slot if needed.
5386       if (!NeedGPROrStack)
5387         ;
5388       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
5389         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5390         // once we support fp <-> gpr moves.
5391 
5392         // In the non-vararg case, this can only ever happen in the
5393         // presence of f32 array types, since otherwise we never run
5394         // out of FPRs before running out of GPRs.
5395         SDValue ArgVal;
5396 
5397         // Double values are always passed in a single GPR.
5398         if (Arg.getValueType() != MVT::f32) {
5399           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
5400 
5401         // Non-array float values are extended and passed in a GPR.
5402         } else if (!Flags.isInConsecutiveRegs()) {
5403           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5404           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5405 
5406         // If we have an array of floats, we collect every odd element
5407         // together with its predecessor into one GPR.
5408         } else if (ArgOffset % PtrByteSize != 0) {
5409           SDValue Lo, Hi;
5410           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5411           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5412           if (!isLittleEndian)
5413             std::swap(Lo, Hi);
5414           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5415 
5416         // The final element, if even, goes into the first half of a GPR.
5417         } else if (Flags.isInConsecutiveRegsLast()) {
5418           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5419           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5420           if (!isLittleEndian)
5421             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5422                                  DAG.getConstant(32, dl, MVT::i32));
5423 
5424         // Non-final even elements are skipped; they will be handled
5425         // together the with subsequent argument on the next go-around.
5426         } else
5427           ArgVal = SDValue();
5428 
5429         if (ArgVal.getNode())
5430           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5431       } else {
5432         if (CallConv == CallingConv::Fast)
5433           ComputePtrOff();
5434 
5435         // Single-precision floating-point values are mapped to the
5436         // second (rightmost) word of the stack doubleword.
5437         if (Arg.getValueType() == MVT::f32 &&
5438             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
5439           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5440           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5441         }
5442 
5443         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5444                          true, isTailCall, false, MemOpChains,
5445                          TailCallArguments, dl);
5446 
5447         NeededLoad = true;
5448       }
5449       // When passing an array of floats, the array occupies consecutive
5450       // space in the argument area; only round up to the next doubleword
5451       // at the end of the array.  Otherwise, each float takes 8 bytes.
5452       if (CallConv != CallingConv::Fast || NeededLoad) {
5453         ArgOffset += (Arg.getValueType() == MVT::f32 &&
5454                       Flags.isInConsecutiveRegs()) ? 4 : 8;
5455         if (Flags.isInConsecutiveRegsLast())
5456           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5457       }
5458       break;
5459     }
5460     case MVT::v4f32:
5461     case MVT::v4i32:
5462     case MVT::v8i16:
5463     case MVT::v16i8:
5464     case MVT::v2f64:
5465     case MVT::v2i64:
5466     case MVT::v1i128:
5467       if (!Subtarget.hasQPX()) {
5468       // These can be scalar arguments or elements of a vector array type
5469       // passed directly.  The latter are used to implement ELFv2 homogenous
5470       // vector aggregates.
5471 
5472       // For a varargs call, named arguments go into VRs or on the stack as
5473       // usual; unnamed arguments always go to the stack or the corresponding
5474       // GPRs when within range.  For now, we always put the value in both
5475       // locations (or even all three).
5476       if (isVarArg) {
5477         // We could elide this store in the case where the object fits
5478         // entirely in R registers.  Maybe later.
5479         SDValue Store =
5480             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5481         MemOpChains.push_back(Store);
5482         if (VR_idx != NumVRs) {
5483           SDValue Load =
5484               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
5485           MemOpChains.push_back(Load.getValue(1));
5486           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5487         }
5488         ArgOffset += 16;
5489         for (unsigned i=0; i<16; i+=PtrByteSize) {
5490           if (GPR_idx == NumGPRs)
5491             break;
5492           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5493                                    DAG.getConstant(i, dl, PtrVT));
5494           SDValue Load =
5495               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5496           MemOpChains.push_back(Load.getValue(1));
5497           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5498         }
5499         break;
5500       }
5501 
5502       // Non-varargs Altivec params go into VRs or on the stack.
5503       if (VR_idx != NumVRs) {
5504         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5505       } else {
5506         if (CallConv == CallingConv::Fast)
5507           ComputePtrOff();
5508 
5509         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5510                          true, isTailCall, true, MemOpChains,
5511                          TailCallArguments, dl);
5512         if (CallConv == CallingConv::Fast)
5513           ArgOffset += 16;
5514       }
5515 
5516       if (CallConv != CallingConv::Fast)
5517         ArgOffset += 16;
5518       break;
5519       } // not QPX
5520 
5521       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5522              "Invalid QPX parameter type");
5523 
5524       /* fall through */
5525     case MVT::v4f64:
5526     case MVT::v4i1: {
5527       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5528       if (isVarArg) {
5529         // We could elide this store in the case where the object fits
5530         // entirely in R registers.  Maybe later.
5531         SDValue Store =
5532             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5533         MemOpChains.push_back(Store);
5534         if (QFPR_idx != NumQFPRs) {
5535           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
5536                                      PtrOff, MachinePointerInfo());
5537           MemOpChains.push_back(Load.getValue(1));
5538           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5539         }
5540         ArgOffset += (IsF32 ? 16 : 32);
5541         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5542           if (GPR_idx == NumGPRs)
5543             break;
5544           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5545                                    DAG.getConstant(i, dl, PtrVT));
5546           SDValue Load =
5547               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5548           MemOpChains.push_back(Load.getValue(1));
5549           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5550         }
5551         break;
5552       }
5553 
5554       // Non-varargs QPX params go into registers or on the stack.
5555       if (QFPR_idx != NumQFPRs) {
5556         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5557       } else {
5558         if (CallConv == CallingConv::Fast)
5559           ComputePtrOff();
5560 
5561         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5562                          true, isTailCall, true, MemOpChains,
5563                          TailCallArguments, dl);
5564         if (CallConv == CallingConv::Fast)
5565           ArgOffset += (IsF32 ? 16 : 32);
5566       }
5567 
5568       if (CallConv != CallingConv::Fast)
5569         ArgOffset += (IsF32 ? 16 : 32);
5570       break;
5571       }
5572     }
5573   }
5574 
5575   assert(NumBytesActuallyUsed == ArgOffset);
5576   (void)NumBytesActuallyUsed;
5577 
5578   if (!MemOpChains.empty())
5579     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5580 
5581   // Check if this is an indirect call (MTCTR/BCTRL).
5582   // See PrepareCall() for more information about calls through function
5583   // pointers in the 64-bit SVR4 ABI.
5584   if (!isTailCall && !isPatchPoint &&
5585       !isFunctionGlobalAddress(Callee) &&
5586       !isa<ExternalSymbolSDNode>(Callee)) {
5587     // Load r2 into a virtual register and store it to the TOC save area.
5588     setUsesTOCBasePtr(DAG);
5589     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5590     // TOC save area offset.
5591     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5592     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5593     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5594     Chain = DAG.getStore(
5595         Val.getValue(1), dl, Val, AddPtr,
5596         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
5597     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5598     // This does not mean the MTCTR instruction must use R12; it's easier
5599     // to model this as an extra parameter, so do that.
5600     if (isELFv2ABI && !isPatchPoint)
5601       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5602   }
5603 
5604   // Build a sequence of copy-to-reg nodes chained together with token chain
5605   // and flag operands which copy the outgoing args into the appropriate regs.
5606   SDValue InFlag;
5607   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5608     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5609                              RegsToPass[i].second, InFlag);
5610     InFlag = Chain.getValue(1);
5611   }
5612 
5613   if (isTailCall && !IsSibCall)
5614     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5615                     TailCallArguments);
5616 
5617   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
5618                     DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
5619                     SPDiff, NumBytes, Ins, InVals, CS);
5620 }
5621 
5622 SDValue PPCTargetLowering::LowerCall_Darwin(
5623     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5624     bool isTailCall, bool isPatchPoint,
5625     const SmallVectorImpl<ISD::OutputArg> &Outs,
5626     const SmallVectorImpl<SDValue> &OutVals,
5627     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5628     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5629     ImmutableCallSite *CS) const {
5630 
5631   unsigned NumOps = Outs.size();
5632 
5633   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5634   bool isPPC64 = PtrVT == MVT::i64;
5635   unsigned PtrByteSize = isPPC64 ? 8 : 4;
5636 
5637   MachineFunction &MF = DAG.getMachineFunction();
5638 
5639   // Mark this function as potentially containing a function that contains a
5640   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5641   // and restoring the callers stack pointer in this functions epilog. This is
5642   // done because by tail calling the called function might overwrite the value
5643   // in this function's (MF) stack pointer stack slot 0(SP).
5644   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5645       CallConv == CallingConv::Fast)
5646     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5647 
5648   // Count how many bytes are to be pushed on the stack, including the linkage
5649   // area, and parameter passing area.  We start with 24/48 bytes, which is
5650   // prereserved space for [SP][CR][LR][3 x unused].
5651   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5652   unsigned NumBytes = LinkageSize;
5653 
5654   // Add up all the space actually used.
5655   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
5656   // they all go in registers, but we must reserve stack space for them for
5657   // possible use by the caller.  In varargs or 64-bit calls, parameters are
5658   // assigned stack space in order, with padding so Altivec parameters are
5659   // 16-byte aligned.
5660   unsigned nAltivecParamsAtEnd = 0;
5661   for (unsigned i = 0; i != NumOps; ++i) {
5662     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5663     EVT ArgVT = Outs[i].VT;
5664     // Varargs Altivec parameters are padded to a 16 byte boundary.
5665     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
5666         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
5667         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
5668       if (!isVarArg && !isPPC64) {
5669         // Non-varargs Altivec parameters go after all the non-Altivec
5670         // parameters; handle those later so we know how much padding we need.
5671         nAltivecParamsAtEnd++;
5672         continue;
5673       }
5674       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
5675       NumBytes = ((NumBytes+15)/16)*16;
5676     }
5677     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5678   }
5679 
5680   // Allow for Altivec parameters at the end, if needed.
5681   if (nAltivecParamsAtEnd) {
5682     NumBytes = ((NumBytes+15)/16)*16;
5683     NumBytes += 16*nAltivecParamsAtEnd;
5684   }
5685 
5686   // The prolog code of the callee may store up to 8 GPR argument registers to
5687   // the stack, allowing va_start to index over them in memory if its varargs.
5688   // Because we cannot tell if this is needed on the caller side, we have to
5689   // conservatively assume that it is needed.  As such, make sure we have at
5690   // least enough stack space for the caller to store the 8 GPRs.
5691   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5692 
5693   // Tail call needs the stack to be aligned.
5694   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5695       CallConv == CallingConv::Fast)
5696     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5697 
5698   // Calculate by how many bytes the stack has to be adjusted in case of tail
5699   // call optimization.
5700   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5701 
5702   // To protect arguments on the stack from being clobbered in a tail call,
5703   // force all the loads to happen before doing any other lowering.
5704   if (isTailCall)
5705     Chain = DAG.getStackArgumentTokenFactor(Chain);
5706 
5707   // Adjust the stack pointer for the new arguments...
5708   // These operations are automatically eliminated by the prolog/epilog pass
5709   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5710                                dl);
5711   SDValue CallSeqStart = Chain;
5712 
5713   // Load the return address and frame pointer so it can be move somewhere else
5714   // later.
5715   SDValue LROp, FPOp;
5716   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5717 
5718   // Set up a copy of the stack pointer for use loading and storing any
5719   // arguments that may not fit in the registers available for argument
5720   // passing.
5721   SDValue StackPtr;
5722   if (isPPC64)
5723     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5724   else
5725     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5726 
5727   // Figure out which arguments are going to go in registers, and which in
5728   // memory.  Also, if this is a vararg function, floating point operations
5729   // must be stored to our stack, and loaded into integer regs as well, if
5730   // any integer regs are available for argument passing.
5731   unsigned ArgOffset = LinkageSize;
5732   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5733 
5734   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
5735     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
5736     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
5737   };
5738   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
5739     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5740     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5741   };
5742   static const MCPhysReg VR[] = {
5743     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5744     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5745   };
5746   const unsigned NumGPRs = array_lengthof(GPR_32);
5747   const unsigned NumFPRs = 13;
5748   const unsigned NumVRs  = array_lengthof(VR);
5749 
5750   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
5751 
5752   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5753   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5754 
5755   SmallVector<SDValue, 8> MemOpChains;
5756   for (unsigned i = 0; i != NumOps; ++i) {
5757     SDValue Arg = OutVals[i];
5758     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5759 
5760     // PtrOff will be used to store the current argument to the stack if a
5761     // register cannot be found for it.
5762     SDValue PtrOff;
5763 
5764     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5765 
5766     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5767 
5768     // On PPC64, promote integers to 64-bit values.
5769     if (isPPC64 && Arg.getValueType() == MVT::i32) {
5770       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5771       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5772       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5773     }
5774 
5775     // FIXME memcpy is used way more than necessary.  Correctness first.
5776     // Note: "by value" is code for passing a structure by value, not
5777     // basic types.
5778     if (Flags.isByVal()) {
5779       unsigned Size = Flags.getByValSize();
5780       // Very small objects are passed right-justified.  Everything else is
5781       // passed left-justified.
5782       if (Size==1 || Size==2) {
5783         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
5784         if (GPR_idx != NumGPRs) {
5785           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5786                                         MachinePointerInfo(), VT);
5787           MemOpChains.push_back(Load.getValue(1));
5788           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5789 
5790           ArgOffset += PtrByteSize;
5791         } else {
5792           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5793                                           PtrOff.getValueType());
5794           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5795           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5796                                                             CallSeqStart,
5797                                                             Flags, DAG, dl);
5798           ArgOffset += PtrByteSize;
5799         }
5800         continue;
5801       }
5802       // Copy entire object into memory.  There are cases where gcc-generated
5803       // code assumes it is there, even if it could be put entirely into
5804       // registers.  (This is not what the doc says.)
5805       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5806                                                         CallSeqStart,
5807                                                         Flags, DAG, dl);
5808 
5809       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
5810       // copy the pieces of the object that fit into registers from the
5811       // parameter save area.
5812       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5813         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5814         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5815         if (GPR_idx != NumGPRs) {
5816           SDValue Load =
5817               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5818           MemOpChains.push_back(Load.getValue(1));
5819           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5820           ArgOffset += PtrByteSize;
5821         } else {
5822           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5823           break;
5824         }
5825       }
5826       continue;
5827     }
5828 
5829     switch (Arg.getSimpleValueType().SimpleTy) {
5830     default: llvm_unreachable("Unexpected ValueType for argument!");
5831     case MVT::i1:
5832     case MVT::i32:
5833     case MVT::i64:
5834       if (GPR_idx != NumGPRs) {
5835         if (Arg.getValueType() == MVT::i1)
5836           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
5837 
5838         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5839       } else {
5840         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5841                          isPPC64, isTailCall, false, MemOpChains,
5842                          TailCallArguments, dl);
5843       }
5844       ArgOffset += PtrByteSize;
5845       break;
5846     case MVT::f32:
5847     case MVT::f64:
5848       if (FPR_idx != NumFPRs) {
5849         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5850 
5851         if (isVarArg) {
5852           SDValue Store =
5853               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5854           MemOpChains.push_back(Store);
5855 
5856           // Float varargs are always shadowed in available integer registers
5857           if (GPR_idx != NumGPRs) {
5858             SDValue Load =
5859                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
5860             MemOpChains.push_back(Load.getValue(1));
5861             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5862           }
5863           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
5864             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5865             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5866             SDValue Load =
5867                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
5868             MemOpChains.push_back(Load.getValue(1));
5869             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5870           }
5871         } else {
5872           // If we have any FPRs remaining, we may also have GPRs remaining.
5873           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
5874           // GPRs.
5875           if (GPR_idx != NumGPRs)
5876             ++GPR_idx;
5877           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
5878               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
5879             ++GPR_idx;
5880         }
5881       } else
5882         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5883                          isPPC64, isTailCall, false, MemOpChains,
5884                          TailCallArguments, dl);
5885       if (isPPC64)
5886         ArgOffset += 8;
5887       else
5888         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
5889       break;
5890     case MVT::v4f32:
5891     case MVT::v4i32:
5892     case MVT::v8i16:
5893     case MVT::v16i8:
5894       if (isVarArg) {
5895         // These go aligned on the stack, or in the corresponding R registers
5896         // when within range.  The Darwin PPC ABI doc claims they also go in
5897         // V registers; in fact gcc does this only for arguments that are
5898         // prototyped, not for those that match the ...  We do it for all
5899         // arguments, seems to work.
5900         while (ArgOffset % 16 !=0) {
5901           ArgOffset += PtrByteSize;
5902           if (GPR_idx != NumGPRs)
5903             GPR_idx++;
5904         }
5905         // We could elide this store in the case where the object fits
5906         // entirely in R registers.  Maybe later.
5907         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5908                              DAG.getConstant(ArgOffset, dl, PtrVT));
5909         SDValue Store =
5910             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5911         MemOpChains.push_back(Store);
5912         if (VR_idx != NumVRs) {
5913           SDValue Load =
5914               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
5915           MemOpChains.push_back(Load.getValue(1));
5916           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5917         }
5918         ArgOffset += 16;
5919         for (unsigned i=0; i<16; i+=PtrByteSize) {
5920           if (GPR_idx == NumGPRs)
5921             break;
5922           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5923                                    DAG.getConstant(i, dl, PtrVT));
5924           SDValue Load =
5925               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5926           MemOpChains.push_back(Load.getValue(1));
5927           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5928         }
5929         break;
5930       }
5931 
5932       // Non-varargs Altivec params generally go in registers, but have
5933       // stack space allocated at the end.
5934       if (VR_idx != NumVRs) {
5935         // Doesn't have GPR space allocated.
5936         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5937       } else if (nAltivecParamsAtEnd==0) {
5938         // We are emitting Altivec params in order.
5939         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5940                          isPPC64, isTailCall, true, MemOpChains,
5941                          TailCallArguments, dl);
5942         ArgOffset += 16;
5943       }
5944       break;
5945     }
5946   }
5947   // If all Altivec parameters fit in registers, as they usually do,
5948   // they get stack space following the non-Altivec parameters.  We
5949   // don't track this here because nobody below needs it.
5950   // If there are more Altivec parameters than fit in registers emit
5951   // the stores here.
5952   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
5953     unsigned j = 0;
5954     // Offset is aligned; skip 1st 12 params which go in V registers.
5955     ArgOffset = ((ArgOffset+15)/16)*16;
5956     ArgOffset += 12*16;
5957     for (unsigned i = 0; i != NumOps; ++i) {
5958       SDValue Arg = OutVals[i];
5959       EVT ArgType = Outs[i].VT;
5960       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
5961           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
5962         if (++j > NumVRs) {
5963           SDValue PtrOff;
5964           // We are emitting Altivec params in order.
5965           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5966                            isPPC64, isTailCall, true, MemOpChains,
5967                            TailCallArguments, dl);
5968           ArgOffset += 16;
5969         }
5970       }
5971     }
5972   }
5973 
5974   if (!MemOpChains.empty())
5975     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5976 
5977   // On Darwin, R12 must contain the address of an indirect callee.  This does
5978   // not mean the MTCTR instruction must use R12; it's easier to model this as
5979   // an extra parameter, so do that.
5980   if (!isTailCall &&
5981       !isFunctionGlobalAddress(Callee) &&
5982       !isa<ExternalSymbolSDNode>(Callee) &&
5983       !isBLACompatibleAddress(Callee, DAG))
5984     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
5985                                                    PPC::R12), Callee));
5986 
5987   // Build a sequence of copy-to-reg nodes chained together with token chain
5988   // and flag operands which copy the outgoing args into the appropriate regs.
5989   SDValue InFlag;
5990   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5991     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5992                              RegsToPass[i].second, InFlag);
5993     InFlag = Chain.getValue(1);
5994   }
5995 
5996   if (isTailCall)
5997     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5998                     TailCallArguments);
5999 
6000   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6001                     /* unused except on PPC64 ELFv1 */ false, DAG,
6002                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6003                     NumBytes, Ins, InVals, CS);
6004 }
6005 
6006 bool
6007 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6008                                   MachineFunction &MF, bool isVarArg,
6009                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
6010                                   LLVMContext &Context) const {
6011   SmallVector<CCValAssign, 16> RVLocs;
6012   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6013   return CCInfo.CheckReturn(Outs, RetCC_PPC);
6014 }
6015 
6016 SDValue
6017 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6018                                bool isVarArg,
6019                                const SmallVectorImpl<ISD::OutputArg> &Outs,
6020                                const SmallVectorImpl<SDValue> &OutVals,
6021                                const SDLoc &dl, SelectionDAG &DAG) const {
6022 
6023   SmallVector<CCValAssign, 16> RVLocs;
6024   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6025                  *DAG.getContext());
6026   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
6027 
6028   SDValue Flag;
6029   SmallVector<SDValue, 4> RetOps(1, Chain);
6030 
6031   // Copy the result values into the output registers.
6032   for (unsigned i = 0; i != RVLocs.size(); ++i) {
6033     CCValAssign &VA = RVLocs[i];
6034     assert(VA.isRegLoc() && "Can only return in registers!");
6035 
6036     SDValue Arg = OutVals[i];
6037 
6038     switch (VA.getLocInfo()) {
6039     default: llvm_unreachable("Unknown loc info!");
6040     case CCValAssign::Full: break;
6041     case CCValAssign::AExt:
6042       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6043       break;
6044     case CCValAssign::ZExt:
6045       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6046       break;
6047     case CCValAssign::SExt:
6048       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6049       break;
6050     }
6051 
6052     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6053     Flag = Chain.getValue(1);
6054     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6055   }
6056 
6057   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6058   const MCPhysReg *I =
6059     TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6060   if (I) {
6061     for (; *I; ++I) {
6062 
6063       if (PPC::G8RCRegClass.contains(*I))
6064         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6065       else if (PPC::F8RCRegClass.contains(*I))
6066         RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6067       else if (PPC::CRRCRegClass.contains(*I))
6068         RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6069       else if (PPC::VRRCRegClass.contains(*I))
6070         RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6071       else
6072         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6073     }
6074   }
6075 
6076   RetOps[0] = Chain;  // Update chain.
6077 
6078   // Add the flag if we have it.
6079   if (Flag.getNode())
6080     RetOps.push_back(Flag);
6081 
6082   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6083 }
6084 
6085 SDValue
6086 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6087                                                 SelectionDAG &DAG) const {
6088   SDLoc dl(Op);
6089 
6090   // Get the corect type for integers.
6091   EVT IntVT = Op.getValueType();
6092 
6093   // Get the inputs.
6094   SDValue Chain = Op.getOperand(0);
6095   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6096   // Build a DYNAREAOFFSET node.
6097   SDValue Ops[2] = {Chain, FPSIdx};
6098   SDVTList VTs = DAG.getVTList(IntVT);
6099   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6100 }
6101 
6102 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6103                                              SelectionDAG &DAG) const {
6104   // When we pop the dynamic allocation we need to restore the SP link.
6105   SDLoc dl(Op);
6106 
6107   // Get the corect type for pointers.
6108   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6109 
6110   // Construct the stack pointer operand.
6111   bool isPPC64 = Subtarget.isPPC64();
6112   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6113   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6114 
6115   // Get the operands for the STACKRESTORE.
6116   SDValue Chain = Op.getOperand(0);
6117   SDValue SaveSP = Op.getOperand(1);
6118 
6119   // Load the old link SP.
6120   SDValue LoadLinkSP =
6121       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6122 
6123   // Restore the stack pointer.
6124   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6125 
6126   // Store the old link SP.
6127   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6128 }
6129 
6130 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6131   MachineFunction &MF = DAG.getMachineFunction();
6132   bool isPPC64 = Subtarget.isPPC64();
6133   EVT PtrVT = getPointerTy(MF.getDataLayout());
6134 
6135   // Get current frame pointer save index.  The users of this index will be
6136   // primarily DYNALLOC instructions.
6137   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6138   int RASI = FI->getReturnAddrSaveIndex();
6139 
6140   // If the frame pointer save index hasn't been defined yet.
6141   if (!RASI) {
6142     // Find out what the fix offset of the frame pointer save area.
6143     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6144     // Allocate the frame index for frame pointer save area.
6145     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6146     // Save the result.
6147     FI->setReturnAddrSaveIndex(RASI);
6148   }
6149   return DAG.getFrameIndex(RASI, PtrVT);
6150 }
6151 
6152 SDValue
6153 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6154   MachineFunction &MF = DAG.getMachineFunction();
6155   bool isPPC64 = Subtarget.isPPC64();
6156   EVT PtrVT = getPointerTy(MF.getDataLayout());
6157 
6158   // Get current frame pointer save index.  The users of this index will be
6159   // primarily DYNALLOC instructions.
6160   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6161   int FPSI = FI->getFramePointerSaveIndex();
6162 
6163   // If the frame pointer save index hasn't been defined yet.
6164   if (!FPSI) {
6165     // Find out what the fix offset of the frame pointer save area.
6166     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6167     // Allocate the frame index for frame pointer save area.
6168     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
6169     // Save the result.
6170     FI->setFramePointerSaveIndex(FPSI);
6171   }
6172   return DAG.getFrameIndex(FPSI, PtrVT);
6173 }
6174 
6175 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6176                                                    SelectionDAG &DAG) const {
6177   // Get the inputs.
6178   SDValue Chain = Op.getOperand(0);
6179   SDValue Size  = Op.getOperand(1);
6180   SDLoc dl(Op);
6181 
6182   // Get the corect type for pointers.
6183   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6184   // Negate the size.
6185   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
6186                                 DAG.getConstant(0, dl, PtrVT), Size);
6187   // Construct a node for the frame pointer save index.
6188   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6189   // Build a DYNALLOC node.
6190   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
6191   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
6192   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
6193 }
6194 
6195 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
6196                                                      SelectionDAG &DAG) const {
6197   MachineFunction &MF = DAG.getMachineFunction();
6198 
6199   bool isPPC64 = Subtarget.isPPC64();
6200   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6201 
6202   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
6203   return DAG.getFrameIndex(FI, PtrVT);
6204 }
6205 
6206 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
6207                                                SelectionDAG &DAG) const {
6208   SDLoc DL(Op);
6209   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
6210                      DAG.getVTList(MVT::i32, MVT::Other),
6211                      Op.getOperand(0), Op.getOperand(1));
6212 }
6213 
6214 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
6215                                                 SelectionDAG &DAG) const {
6216   SDLoc DL(Op);
6217   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
6218                      Op.getOperand(0), Op.getOperand(1));
6219 }
6220 
6221 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
6222   if (Op.getValueType().isVector())
6223     return LowerVectorLoad(Op, DAG);
6224 
6225   assert(Op.getValueType() == MVT::i1 &&
6226          "Custom lowering only for i1 loads");
6227 
6228   // First, load 8 bits into 32 bits, then truncate to 1 bit.
6229 
6230   SDLoc dl(Op);
6231   LoadSDNode *LD = cast<LoadSDNode>(Op);
6232 
6233   SDValue Chain = LD->getChain();
6234   SDValue BasePtr = LD->getBasePtr();
6235   MachineMemOperand *MMO = LD->getMemOperand();
6236 
6237   SDValue NewLD =
6238       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
6239                      BasePtr, MVT::i8, MMO);
6240   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
6241 
6242   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
6243   return DAG.getMergeValues(Ops, dl);
6244 }
6245 
6246 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
6247   if (Op.getOperand(1).getValueType().isVector())
6248     return LowerVectorStore(Op, DAG);
6249 
6250   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
6251          "Custom lowering only for i1 stores");
6252 
6253   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
6254 
6255   SDLoc dl(Op);
6256   StoreSDNode *ST = cast<StoreSDNode>(Op);
6257 
6258   SDValue Chain = ST->getChain();
6259   SDValue BasePtr = ST->getBasePtr();
6260   SDValue Value = ST->getValue();
6261   MachineMemOperand *MMO = ST->getMemOperand();
6262 
6263   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
6264                       Value);
6265   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
6266 }
6267 
6268 // FIXME: Remove this once the ANDI glue bug is fixed:
6269 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
6270   assert(Op.getValueType() == MVT::i1 &&
6271          "Custom lowering only for i1 results");
6272 
6273   SDLoc DL(Op);
6274   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
6275                      Op.getOperand(0));
6276 }
6277 
6278 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
6279 /// possible.
6280 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
6281   // Not FP? Not a fsel.
6282   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
6283       !Op.getOperand(2).getValueType().isFloatingPoint())
6284     return Op;
6285 
6286   // We might be able to do better than this under some circumstances, but in
6287   // general, fsel-based lowering of select is a finite-math-only optimization.
6288   // For more information, see section F.3 of the 2.06 ISA specification.
6289   if (!DAG.getTarget().Options.NoInfsFPMath ||
6290       !DAG.getTarget().Options.NoNaNsFPMath)
6291     return Op;
6292   // TODO: Propagate flags from the select rather than global settings.
6293   SDNodeFlags Flags;
6294   Flags.setNoInfs(true);
6295   Flags.setNoNaNs(true);
6296 
6297   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
6298 
6299   EVT ResVT = Op.getValueType();
6300   EVT CmpVT = Op.getOperand(0).getValueType();
6301   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6302   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
6303   SDLoc dl(Op);
6304 
6305   // If the RHS of the comparison is a 0.0, we don't need to do the
6306   // subtraction at all.
6307   SDValue Sel1;
6308   if (isFloatingPointZero(RHS))
6309     switch (CC) {
6310     default: break;       // SETUO etc aren't handled by fsel.
6311     case ISD::SETNE:
6312       std::swap(TV, FV);
6313     case ISD::SETEQ:
6314       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6315         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6316       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6317       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6318         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6319       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6320                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6321     case ISD::SETULT:
6322     case ISD::SETLT:
6323       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6324     case ISD::SETOGE:
6325     case ISD::SETGE:
6326       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6327         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6328       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6329     case ISD::SETUGT:
6330     case ISD::SETGT:
6331       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6332     case ISD::SETOLE:
6333     case ISD::SETLE:
6334       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6335         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6336       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6337                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6338     }
6339 
6340   SDValue Cmp;
6341   switch (CC) {
6342   default: break;       // SETUO etc aren't handled by fsel.
6343   case ISD::SETNE:
6344     std::swap(TV, FV);
6345   case ISD::SETEQ:
6346     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6347     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6348       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6349     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6350     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6351       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6352     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6353                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6354   case ISD::SETULT:
6355   case ISD::SETLT:
6356     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6357     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6358       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6359     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6360   case ISD::SETOGE:
6361   case ISD::SETGE:
6362     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6363     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6364       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6365     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6366   case ISD::SETUGT:
6367   case ISD::SETGT:
6368     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
6369     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6370       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6371     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6372   case ISD::SETOLE:
6373   case ISD::SETLE:
6374     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
6375     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6376       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6377     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6378   }
6379   return Op;
6380 }
6381 
6382 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
6383                                                SelectionDAG &DAG,
6384                                                const SDLoc &dl) const {
6385   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6386   SDValue Src = Op.getOperand(0);
6387   if (Src.getValueType() == MVT::f32)
6388     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6389 
6390   SDValue Tmp;
6391   switch (Op.getSimpleValueType().SimpleTy) {
6392   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6393   case MVT::i32:
6394     Tmp = DAG.getNode(
6395         Op.getOpcode() == ISD::FP_TO_SINT
6396             ? PPCISD::FCTIWZ
6397             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6398         dl, MVT::f64, Src);
6399     break;
6400   case MVT::i64:
6401     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6402            "i64 FP_TO_UINT is supported only with FPCVT");
6403     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6404                                                         PPCISD::FCTIDUZ,
6405                       dl, MVT::f64, Src);
6406     break;
6407   }
6408 
6409   // Convert the FP value to an int value through memory.
6410   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
6411     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
6412   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
6413   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
6414   MachinePointerInfo MPI =
6415       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6416 
6417   // Emit a store to the stack slot.
6418   SDValue Chain;
6419   if (i32Stack) {
6420     MachineFunction &MF = DAG.getMachineFunction();
6421     MachineMemOperand *MMO =
6422       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
6423     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
6424     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
6425               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
6426   } else
6427     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
6428 
6429   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
6430   // add in a bias on big endian.
6431   if (Op.getValueType() == MVT::i32 && !i32Stack) {
6432     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
6433                         DAG.getConstant(4, dl, FIPtr.getValueType()));
6434     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
6435   }
6436 
6437   RLI.Chain = Chain;
6438   RLI.Ptr = FIPtr;
6439   RLI.MPI = MPI;
6440 }
6441 
6442 /// \brief Custom lowers floating point to integer conversions to use
6443 /// the direct move instructions available in ISA 2.07 to avoid the
6444 /// need for load/store combinations.
6445 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6446                                                     SelectionDAG &DAG,
6447                                                     const SDLoc &dl) const {
6448   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6449   SDValue Src = Op.getOperand(0);
6450 
6451   if (Src.getValueType() == MVT::f32)
6452     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6453 
6454   SDValue Tmp;
6455   switch (Op.getSimpleValueType().SimpleTy) {
6456   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6457   case MVT::i32:
6458     Tmp = DAG.getNode(
6459         Op.getOpcode() == ISD::FP_TO_SINT
6460             ? PPCISD::FCTIWZ
6461             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6462         dl, MVT::f64, Src);
6463     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6464     break;
6465   case MVT::i64:
6466     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6467            "i64 FP_TO_UINT is supported only with FPCVT");
6468     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6469                                                         PPCISD::FCTIDUZ,
6470                       dl, MVT::f64, Src);
6471     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6472     break;
6473   }
6474   return Tmp;
6475 }
6476 
6477 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6478                                           const SDLoc &dl) const {
6479   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
6480     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6481 
6482   ReuseLoadInfo RLI;
6483   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6484 
6485   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
6486                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
6487 }
6488 
6489 // We're trying to insert a regular store, S, and then a load, L. If the
6490 // incoming value, O, is a load, we might just be able to have our load use the
6491 // address used by O. However, we don't know if anything else will store to
6492 // that address before we can load from it. To prevent this situation, we need
6493 // to insert our load, L, into the chain as a peer of O. To do this, we give L
6494 // the same chain operand as O, we create a token factor from the chain results
6495 // of O and L, and we replace all uses of O's chain result with that token
6496 // factor (see spliceIntoChain below for this last part).
6497 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6498                                             ReuseLoadInfo &RLI,
6499                                             SelectionDAG &DAG,
6500                                             ISD::LoadExtType ET) const {
6501   SDLoc dl(Op);
6502   if (ET == ISD::NON_EXTLOAD &&
6503       (Op.getOpcode() == ISD::FP_TO_UINT ||
6504        Op.getOpcode() == ISD::FP_TO_SINT) &&
6505       isOperationLegalOrCustom(Op.getOpcode(),
6506                                Op.getOperand(0).getValueType())) {
6507 
6508     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6509     return true;
6510   }
6511 
6512   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6513   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
6514       LD->isNonTemporal())
6515     return false;
6516   if (LD->getMemoryVT() != MemVT)
6517     return false;
6518 
6519   RLI.Ptr = LD->getBasePtr();
6520   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
6521     assert(LD->getAddressingMode() == ISD::PRE_INC &&
6522            "Non-pre-inc AM on PPC?");
6523     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6524                           LD->getOffset());
6525   }
6526 
6527   RLI.Chain = LD->getChain();
6528   RLI.MPI = LD->getPointerInfo();
6529   RLI.IsDereferenceable = LD->isDereferenceable();
6530   RLI.IsInvariant = LD->isInvariant();
6531   RLI.Alignment = LD->getAlignment();
6532   RLI.AAInfo = LD->getAAInfo();
6533   RLI.Ranges = LD->getRanges();
6534 
6535   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
6536   return true;
6537 }
6538 
6539 // Given the head of the old chain, ResChain, insert a token factor containing
6540 // it and NewResChain, and make users of ResChain now be users of that token
6541 // factor.
6542 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6543                                         SDValue NewResChain,
6544                                         SelectionDAG &DAG) const {
6545   if (!ResChain)
6546     return;
6547 
6548   SDLoc dl(NewResChain);
6549 
6550   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
6551                            NewResChain, DAG.getUNDEF(MVT::Other));
6552   assert(TF.getNode() != NewResChain.getNode() &&
6553          "A new TF really is required here");
6554 
6555   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6556   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6557 }
6558 
6559 /// \brief Analyze profitability of direct move
6560 /// prefer float load to int load plus direct move
6561 /// when there is no integer use of int load
6562 static bool directMoveIsProfitable(const SDValue &Op) {
6563   SDNode *Origin = Op.getOperand(0).getNode();
6564   if (Origin->getOpcode() != ISD::LOAD)
6565     return true;
6566 
6567   for (SDNode::use_iterator UI = Origin->use_begin(),
6568                             UE = Origin->use_end();
6569        UI != UE; ++UI) {
6570 
6571     // Only look at the users of the loaded value.
6572     if (UI.getUse().get().getResNo() != 0)
6573       continue;
6574 
6575     if (UI->getOpcode() != ISD::SINT_TO_FP &&
6576         UI->getOpcode() != ISD::UINT_TO_FP)
6577       return true;
6578   }
6579 
6580   return false;
6581 }
6582 
6583 /// \brief Custom lowers integer to floating point conversions to use
6584 /// the direct move instructions available in ISA 2.07 to avoid the
6585 /// need for load/store combinations.
6586 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6587                                                     SelectionDAG &DAG,
6588                                                     const SDLoc &dl) const {
6589   assert((Op.getValueType() == MVT::f32 ||
6590           Op.getValueType() == MVT::f64) &&
6591          "Invalid floating point type as target of conversion");
6592   assert(Subtarget.hasFPCVT() &&
6593          "Int to FP conversions with direct moves require FPCVT");
6594   SDValue FP;
6595   SDValue Src = Op.getOperand(0);
6596   bool SinglePrec = Op.getValueType() == MVT::f32;
6597   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6598   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6599   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
6600                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
6601 
6602   if (WordInt) {
6603     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
6604                      dl, MVT::f64, Src);
6605     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6606   }
6607   else {
6608     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6609     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6610   }
6611 
6612   return FP;
6613 }
6614 
6615 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6616                                           SelectionDAG &DAG) const {
6617   SDLoc dl(Op);
6618 
6619   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6620     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6621       return SDValue();
6622 
6623     SDValue Value = Op.getOperand(0);
6624     // The values are now known to be -1 (false) or 1 (true). To convert this
6625     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6626     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6627     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6628 
6629     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
6630 
6631     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6632 
6633     if (Op.getValueType() != MVT::v4f64)
6634       Value = DAG.getNode(ISD::FP_ROUND, dl,
6635                           Op.getValueType(), Value,
6636                           DAG.getIntPtrConstant(1, dl));
6637     return Value;
6638   }
6639 
6640   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6641   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6642     return SDValue();
6643 
6644   if (Op.getOperand(0).getValueType() == MVT::i1)
6645     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
6646                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
6647                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
6648 
6649   // If we have direct moves, we can do all the conversion, skip the store/load
6650   // however, without FPCVT we can't do most conversions.
6651   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
6652       Subtarget.isPPC64() && Subtarget.hasFPCVT())
6653     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
6654 
6655   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
6656          "UINT_TO_FP is supported only with FPCVT");
6657 
6658   // If we have FCFIDS, then use it when converting to single-precision.
6659   // Otherwise, convert to double-precision and then round.
6660   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6661                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
6662                                                             : PPCISD::FCFIDS)
6663                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
6664                                                             : PPCISD::FCFID);
6665   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6666                   ? MVT::f32
6667                   : MVT::f64;
6668 
6669   if (Op.getOperand(0).getValueType() == MVT::i64) {
6670     SDValue SINT = Op.getOperand(0);
6671     // When converting to single-precision, we actually need to convert
6672     // to double-precision first and then round to single-precision.
6673     // To avoid double-rounding effects during that operation, we have
6674     // to prepare the input operand.  Bits that might be truncated when
6675     // converting to double-precision are replaced by a bit that won't
6676     // be lost at this stage, but is below the single-precision rounding
6677     // position.
6678     //
6679     // However, if -enable-unsafe-fp-math is in effect, accept double
6680     // rounding to avoid the extra overhead.
6681     if (Op.getValueType() == MVT::f32 &&
6682         !Subtarget.hasFPCVT() &&
6683         !DAG.getTarget().Options.UnsafeFPMath) {
6684 
6685       // Twiddle input to make sure the low 11 bits are zero.  (If this
6686       // is the case, we are guaranteed the value will fit into the 53 bit
6687       // mantissa of an IEEE double-precision value without rounding.)
6688       // If any of those low 11 bits were not zero originally, make sure
6689       // bit 12 (value 2048) is set instead, so that the final rounding
6690       // to single-precision gets the correct result.
6691       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6692                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
6693       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
6694                           Round, DAG.getConstant(2047, dl, MVT::i64));
6695       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
6696       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6697                           Round, DAG.getConstant(-2048, dl, MVT::i64));
6698 
6699       // However, we cannot use that value unconditionally: if the magnitude
6700       // of the input value is small, the bit-twiddling we did above might
6701       // end up visibly changing the output.  Fortunately, in that case, we
6702       // don't need to twiddle bits since the original input will convert
6703       // exactly to double-precision floating-point already.  Therefore,
6704       // construct a conditional to use the original value if the top 11
6705       // bits are all sign-bit copies, and use the rounded value computed
6706       // above otherwise.
6707       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
6708                                  SINT, DAG.getConstant(53, dl, MVT::i32));
6709       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
6710                          Cond, DAG.getConstant(1, dl, MVT::i64));
6711       Cond = DAG.getSetCC(dl, MVT::i32,
6712                           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
6713 
6714       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
6715     }
6716 
6717     ReuseLoadInfo RLI;
6718     SDValue Bits;
6719 
6720     MachineFunction &MF = DAG.getMachineFunction();
6721     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
6722       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
6723                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
6724       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6725     } else if (Subtarget.hasLFIWAX() &&
6726                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
6727       MachineMemOperand *MMO =
6728         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6729                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6730       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6731       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
6732                                      DAG.getVTList(MVT::f64, MVT::Other),
6733                                      Ops, MVT::i32, MMO);
6734       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6735     } else if (Subtarget.hasFPCVT() &&
6736                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
6737       MachineMemOperand *MMO =
6738         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6739                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6740       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6741       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
6742                                      DAG.getVTList(MVT::f64, MVT::Other),
6743                                      Ops, MVT::i32, MMO);
6744       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6745     } else if (((Subtarget.hasLFIWAX() &&
6746                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
6747                 (Subtarget.hasFPCVT() &&
6748                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
6749                SINT.getOperand(0).getValueType() == MVT::i32) {
6750       MachineFrameInfo &MFI = MF.getFrameInfo();
6751       EVT PtrVT = getPointerTy(DAG.getDataLayout());
6752 
6753       int FrameIdx = MFI.CreateStackObject(4, 4, false);
6754       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6755 
6756       SDValue Store =
6757           DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
6758                        MachinePointerInfo::getFixedStack(
6759                            DAG.getMachineFunction(), FrameIdx));
6760 
6761       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6762              "Expected an i32 store");
6763 
6764       RLI.Ptr = FIdx;
6765       RLI.Chain = Store;
6766       RLI.MPI =
6767           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
6768       RLI.Alignment = 4;
6769 
6770       MachineMemOperand *MMO =
6771         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6772                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6773       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6774       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
6775                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
6776                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
6777                                      Ops, MVT::i32, MMO);
6778     } else
6779       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
6780 
6781     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
6782 
6783     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6784       FP = DAG.getNode(ISD::FP_ROUND, dl,
6785                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
6786     return FP;
6787   }
6788 
6789   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
6790          "Unhandled INT_TO_FP type in custom expander!");
6791   // Since we only generate this in 64-bit mode, we can take advantage of
6792   // 64-bit registers.  In particular, sign extend the input value into the
6793   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
6794   // then lfd it and fcfid it.
6795   MachineFunction &MF = DAG.getMachineFunction();
6796   MachineFrameInfo &MFI = MF.getFrameInfo();
6797   EVT PtrVT = getPointerTy(MF.getDataLayout());
6798 
6799   SDValue Ld;
6800   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
6801     ReuseLoadInfo RLI;
6802     bool ReusingLoad;
6803     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
6804                                             DAG))) {
6805       int FrameIdx = MFI.CreateStackObject(4, 4, false);
6806       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6807 
6808       SDValue Store =
6809           DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
6810                        MachinePointerInfo::getFixedStack(
6811                            DAG.getMachineFunction(), FrameIdx));
6812 
6813       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6814              "Expected an i32 store");
6815 
6816       RLI.Ptr = FIdx;
6817       RLI.Chain = Store;
6818       RLI.MPI =
6819           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
6820       RLI.Alignment = 4;
6821     }
6822 
6823     MachineMemOperand *MMO =
6824       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6825                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6826     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6827     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
6828                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
6829                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
6830                                  Ops, MVT::i32, MMO);
6831     if (ReusingLoad)
6832       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
6833   } else {
6834     assert(Subtarget.isPPC64() &&
6835            "i32->FP without LFIWAX supported only on PPC64");
6836 
6837     int FrameIdx = MFI.CreateStackObject(8, 8, false);
6838     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6839 
6840     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
6841                                 Op.getOperand(0));
6842 
6843     // STD the extended value into the stack slot.
6844     SDValue Store = DAG.getStore(
6845         DAG.getEntryNode(), dl, Ext64, FIdx,
6846         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
6847 
6848     // Load the value as a double.
6849     Ld = DAG.getLoad(
6850         MVT::f64, dl, Store, FIdx,
6851         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
6852   }
6853 
6854   // FCFID it and return it.
6855   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
6856   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6857     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
6858                      DAG.getIntPtrConstant(0, dl));
6859   return FP;
6860 }
6861 
6862 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
6863                                             SelectionDAG &DAG) const {
6864   SDLoc dl(Op);
6865   /*
6866    The rounding mode is in bits 30:31 of FPSR, and has the following
6867    settings:
6868      00 Round to nearest
6869      01 Round to 0
6870      10 Round to +inf
6871      11 Round to -inf
6872 
6873   FLT_ROUNDS, on the other hand, expects the following:
6874     -1 Undefined
6875      0 Round to 0
6876      1 Round to nearest
6877      2 Round to +inf
6878      3 Round to -inf
6879 
6880   To perform the conversion, we do:
6881     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
6882   */
6883 
6884   MachineFunction &MF = DAG.getMachineFunction();
6885   EVT VT = Op.getValueType();
6886   EVT PtrVT = getPointerTy(MF.getDataLayout());
6887 
6888   // Save FP Control Word to register
6889   EVT NodeTys[] = {
6890     MVT::f64,    // return register
6891     MVT::Glue    // unused in this context
6892   };
6893   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
6894 
6895   // Save FP register to stack slot
6896   int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
6897   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
6898   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
6899                                MachinePointerInfo());
6900 
6901   // Load FP Control Word from low 32 bits of stack slot.
6902   SDValue Four = DAG.getConstant(4, dl, PtrVT);
6903   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
6904   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
6905 
6906   // Transform as necessary
6907   SDValue CWD1 =
6908     DAG.getNode(ISD::AND, dl, MVT::i32,
6909                 CWD, DAG.getConstant(3, dl, MVT::i32));
6910   SDValue CWD2 =
6911     DAG.getNode(ISD::SRL, dl, MVT::i32,
6912                 DAG.getNode(ISD::AND, dl, MVT::i32,
6913                             DAG.getNode(ISD::XOR, dl, MVT::i32,
6914                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
6915                             DAG.getConstant(3, dl, MVT::i32)),
6916                 DAG.getConstant(1, dl, MVT::i32));
6917 
6918   SDValue RetVal =
6919     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
6920 
6921   return DAG.getNode((VT.getSizeInBits() < 16 ?
6922                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
6923 }
6924 
6925 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6926   EVT VT = Op.getValueType();
6927   unsigned BitWidth = VT.getSizeInBits();
6928   SDLoc dl(Op);
6929   assert(Op.getNumOperands() == 3 &&
6930          VT == Op.getOperand(1).getValueType() &&
6931          "Unexpected SHL!");
6932 
6933   // Expand into a bunch of logical ops.  Note that these ops
6934   // depend on the PPC behavior for oversized shift amounts.
6935   SDValue Lo = Op.getOperand(0);
6936   SDValue Hi = Op.getOperand(1);
6937   SDValue Amt = Op.getOperand(2);
6938   EVT AmtVT = Amt.getValueType();
6939 
6940   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6941                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6942   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
6943   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
6944   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
6945   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6946                              DAG.getConstant(-BitWidth, dl, AmtVT));
6947   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
6948   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6949   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
6950   SDValue OutOps[] = { OutLo, OutHi };
6951   return DAG.getMergeValues(OutOps, dl);
6952 }
6953 
6954 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6955   EVT VT = Op.getValueType();
6956   SDLoc dl(Op);
6957   unsigned BitWidth = VT.getSizeInBits();
6958   assert(Op.getNumOperands() == 3 &&
6959          VT == Op.getOperand(1).getValueType() &&
6960          "Unexpected SRL!");
6961 
6962   // Expand into a bunch of logical ops.  Note that these ops
6963   // depend on the PPC behavior for oversized shift amounts.
6964   SDValue Lo = Op.getOperand(0);
6965   SDValue Hi = Op.getOperand(1);
6966   SDValue Amt = Op.getOperand(2);
6967   EVT AmtVT = Amt.getValueType();
6968 
6969   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6970                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6971   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6972   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6973   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6974   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6975                              DAG.getConstant(-BitWidth, dl, AmtVT));
6976   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
6977   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6978   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
6979   SDValue OutOps[] = { OutLo, OutHi };
6980   return DAG.getMergeValues(OutOps, dl);
6981 }
6982 
6983 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
6984   SDLoc dl(Op);
6985   EVT VT = Op.getValueType();
6986   unsigned BitWidth = VT.getSizeInBits();
6987   assert(Op.getNumOperands() == 3 &&
6988          VT == Op.getOperand(1).getValueType() &&
6989          "Unexpected SRA!");
6990 
6991   // Expand into a bunch of logical ops, followed by a select_cc.
6992   SDValue Lo = Op.getOperand(0);
6993   SDValue Hi = Op.getOperand(1);
6994   SDValue Amt = Op.getOperand(2);
6995   EVT AmtVT = Amt.getValueType();
6996 
6997   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6998                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6999   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7000   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7001   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7002   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7003                              DAG.getConstant(-BitWidth, dl, AmtVT));
7004   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
7005   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
7006   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
7007                                   Tmp4, Tmp6, ISD::SETLE);
7008   SDValue OutOps[] = { OutLo, OutHi };
7009   return DAG.getMergeValues(OutOps, dl);
7010 }
7011 
7012 //===----------------------------------------------------------------------===//
7013 // Vector related lowering.
7014 //
7015 
7016 /// BuildSplatI - Build a canonical splati of Val with an element size of
7017 /// SplatSize.  Cast the result to VT.
7018 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
7019                            SelectionDAG &DAG, const SDLoc &dl) {
7020   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
7021 
7022   static const MVT VTys[] = { // canonical VT to use for each size.
7023     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
7024   };
7025 
7026   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
7027 
7028   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
7029   if (Val == -1)
7030     SplatSize = 1;
7031 
7032   EVT CanonicalVT = VTys[SplatSize-1];
7033 
7034   // Build a canonical splat for this value.
7035   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
7036 }
7037 
7038 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
7039 /// specified intrinsic ID.
7040 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
7041                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
7042   if (DestVT == MVT::Other) DestVT = Op.getValueType();
7043   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7044                      DAG.getConstant(IID, dl, MVT::i32), Op);
7045 }
7046 
7047 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
7048 /// specified intrinsic ID.
7049 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
7050                                 SelectionDAG &DAG, const SDLoc &dl,
7051                                 EVT DestVT = MVT::Other) {
7052   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
7053   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7054                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
7055 }
7056 
7057 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
7058 /// specified intrinsic ID.
7059 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
7060                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
7061                                 EVT DestVT = MVT::Other) {
7062   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
7063   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7064                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
7065 }
7066 
7067 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
7068 /// amount.  The result has the specified value type.
7069 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
7070                            SelectionDAG &DAG, const SDLoc &dl) {
7071   // Force LHS/RHS to be the right type.
7072   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
7073   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
7074 
7075   int Ops[16];
7076   for (unsigned i = 0; i != 16; ++i)
7077     Ops[i] = i + Amt;
7078   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
7079   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7080 }
7081 
7082 static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) {
7083   if (BVN->isConstant() || BVN->getValueType(0) != Type)
7084     return false;
7085   auto OpZero = BVN->getOperand(0);
7086   for (int i = 1, e = BVN->getNumOperands(); i < e; i++)
7087     if (BVN->getOperand(i) != OpZero)
7088       return false;
7089   return true;
7090 }
7091 
7092 // If this is a case we can't handle, return null and let the default
7093 // expansion code take care of it.  If we CAN select this case, and if it
7094 // selects to a single instruction, return Op.  Otherwise, if we can codegen
7095 // this case more efficiently than a constant pool load, lower it to the
7096 // sequence of ops that should be used.
7097 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
7098                                              SelectionDAG &DAG) const {
7099   SDLoc dl(Op);
7100   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7101   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
7102 
7103   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
7104     // We first build an i32 vector, load it into a QPX register,
7105     // then convert it to a floating-point vector and compare it
7106     // to a zero vector to get the boolean result.
7107     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7108     int FrameIdx = MFI.CreateStackObject(16, 16, false);
7109     MachinePointerInfo PtrInfo =
7110         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7111     EVT PtrVT = getPointerTy(DAG.getDataLayout());
7112     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7113 
7114     assert(BVN->getNumOperands() == 4 &&
7115       "BUILD_VECTOR for v4i1 does not have 4 operands");
7116 
7117     bool IsConst = true;
7118     for (unsigned i = 0; i < 4; ++i) {
7119       if (BVN->getOperand(i).isUndef()) continue;
7120       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
7121         IsConst = false;
7122         break;
7123       }
7124     }
7125 
7126     if (IsConst) {
7127       Constant *One =
7128         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
7129       Constant *NegOne =
7130         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
7131 
7132       Constant *CV[4];
7133       for (unsigned i = 0; i < 4; ++i) {
7134         if (BVN->getOperand(i).isUndef())
7135           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
7136         else if (isNullConstant(BVN->getOperand(i)))
7137           CV[i] = NegOne;
7138         else
7139           CV[i] = One;
7140       }
7141 
7142       Constant *CP = ConstantVector::get(CV);
7143       SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
7144                                           16 /* alignment */);
7145 
7146       SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
7147       SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
7148       return DAG.getMemIntrinsicNode(
7149           PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
7150           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7151     }
7152 
7153     SmallVector<SDValue, 4> Stores;
7154     for (unsigned i = 0; i < 4; ++i) {
7155       if (BVN->getOperand(i).isUndef()) continue;
7156 
7157       unsigned Offset = 4*i;
7158       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7159       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7160 
7161       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
7162       if (StoreSize > 4) {
7163         Stores.push_back(
7164             DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
7165                               PtrInfo.getWithOffset(Offset), MVT::i32));
7166       } else {
7167         SDValue StoreValue = BVN->getOperand(i);
7168         if (StoreSize < 4)
7169           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
7170 
7171         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
7172                                       PtrInfo.getWithOffset(Offset)));
7173       }
7174     }
7175 
7176     SDValue StoreChain;
7177     if (!Stores.empty())
7178       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7179     else
7180       StoreChain = DAG.getEntryNode();
7181 
7182     // Now load from v4i32 into the QPX register; this will extend it to
7183     // v4i64 but not yet convert it to a floating point. Nevertheless, this
7184     // is typed as v4f64 because the QPX register integer states are not
7185     // explicitly represented.
7186 
7187     SDValue Ops[] = {StoreChain,
7188                      DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
7189                      FIdx};
7190     SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
7191 
7192     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
7193       dl, VTs, Ops, MVT::v4i32, PtrInfo);
7194     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7195       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
7196       LoadedVect);
7197 
7198     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
7199 
7200     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
7201   }
7202 
7203   // All other QPX vectors are handled by generic code.
7204   if (Subtarget.hasQPX())
7205     return SDValue();
7206 
7207   // Check if this is a splat of a constant value.
7208   APInt APSplatBits, APSplatUndef;
7209   unsigned SplatBitSize;
7210   bool HasAnyUndefs;
7211   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
7212                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
7213       SplatBitSize > 32) {
7214     // We can splat a non-const value on CPU's that implement ISA 3.0
7215     // in two ways: LXVWSX (load and splat) and MTVSRWS(move and splat).
7216     auto OpZero = BVN->getOperand(0);
7217     bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD &&
7218       BVN->isOnlyUserOf(OpZero.getNode());
7219     if (Subtarget.isISA3_0() && !CanLoadAndSplat &&
7220         (isNonConstSplatBV(BVN, MVT::v4i32) ||
7221          isNonConstSplatBV(BVN, MVT::v2i64)))
7222       return Op;
7223     return SDValue();
7224   }
7225 
7226   unsigned SplatBits = APSplatBits.getZExtValue();
7227   unsigned SplatUndef = APSplatUndef.getZExtValue();
7228   unsigned SplatSize = SplatBitSize / 8;
7229 
7230   // First, handle single instruction cases.
7231 
7232   // All zeros?
7233   if (SplatBits == 0) {
7234     // Canonicalize all zero vectors to be v4i32.
7235     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
7236       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
7237       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
7238     }
7239     return Op;
7240   }
7241 
7242   // We have XXSPLTIB for constant splats one byte wide
7243   if (Subtarget.isISA3_0() && Op.getValueType() == MVT::v16i8)
7244     return Op;
7245 
7246   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
7247   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
7248                     (32-SplatBitSize));
7249   if (SextVal >= -16 && SextVal <= 15)
7250     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
7251 
7252   // Two instruction sequences.
7253 
7254   // If this value is in the range [-32,30] and is even, use:
7255   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
7256   // If this value is in the range [17,31] and is odd, use:
7257   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
7258   // If this value is in the range [-31,-17] and is odd, use:
7259   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
7260   // Note the last two are three-instruction sequences.
7261   if (SextVal >= -32 && SextVal <= 31) {
7262     // To avoid having these optimizations undone by constant folding,
7263     // we convert to a pseudo that will be expanded later into one of
7264     // the above forms.
7265     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
7266     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
7267               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
7268     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
7269     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
7270     if (VT == Op.getValueType())
7271       return RetVal;
7272     else
7273       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
7274   }
7275 
7276   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
7277   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
7278   // for fneg/fabs.
7279   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
7280     // Make -1 and vspltisw -1:
7281     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
7282 
7283     // Make the VSLW intrinsic, computing 0x8000_0000.
7284     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
7285                                    OnesV, DAG, dl);
7286 
7287     // xor by OnesV to invert it.
7288     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
7289     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7290   }
7291 
7292   // Check to see if this is a wide variety of vsplti*, binop self cases.
7293   static const signed char SplatCsts[] = {
7294     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
7295     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
7296   };
7297 
7298   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
7299     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
7300     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
7301     int i = SplatCsts[idx];
7302 
7303     // Figure out what shift amount will be used by altivec if shifted by i in
7304     // this splat size.
7305     unsigned TypeShiftAmt = i & (SplatBitSize-1);
7306 
7307     // vsplti + shl self.
7308     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
7309       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7310       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7311         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
7312         Intrinsic::ppc_altivec_vslw
7313       };
7314       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7315       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7316     }
7317 
7318     // vsplti + srl self.
7319     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7320       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7321       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7322         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
7323         Intrinsic::ppc_altivec_vsrw
7324       };
7325       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7326       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7327     }
7328 
7329     // vsplti + sra self.
7330     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7331       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7332       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7333         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
7334         Intrinsic::ppc_altivec_vsraw
7335       };
7336       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7337       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7338     }
7339 
7340     // vsplti + rol self.
7341     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
7342                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
7343       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7344       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7345         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
7346         Intrinsic::ppc_altivec_vrlw
7347       };
7348       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7349       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7350     }
7351 
7352     // t = vsplti c, result = vsldoi t, t, 1
7353     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
7354       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7355       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
7356       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7357     }
7358     // t = vsplti c, result = vsldoi t, t, 2
7359     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
7360       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7361       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
7362       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7363     }
7364     // t = vsplti c, result = vsldoi t, t, 3
7365     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
7366       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7367       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
7368       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7369     }
7370   }
7371 
7372   return SDValue();
7373 }
7374 
7375 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7376 /// the specified operations to build the shuffle.
7377 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7378                                       SDValue RHS, SelectionDAG &DAG,
7379                                       const SDLoc &dl) {
7380   unsigned OpNum = (PFEntry >> 26) & 0x0F;
7381   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7382   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
7383 
7384   enum {
7385     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7386     OP_VMRGHW,
7387     OP_VMRGLW,
7388     OP_VSPLTISW0,
7389     OP_VSPLTISW1,
7390     OP_VSPLTISW2,
7391     OP_VSPLTISW3,
7392     OP_VSLDOI4,
7393     OP_VSLDOI8,
7394     OP_VSLDOI12
7395   };
7396 
7397   if (OpNum == OP_COPY) {
7398     if (LHSID == (1*9+2)*9+3) return LHS;
7399     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7400     return RHS;
7401   }
7402 
7403   SDValue OpLHS, OpRHS;
7404   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7405   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7406 
7407   int ShufIdxs[16];
7408   switch (OpNum) {
7409   default: llvm_unreachable("Unknown i32 permute!");
7410   case OP_VMRGHW:
7411     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
7412     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
7413     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
7414     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
7415     break;
7416   case OP_VMRGLW:
7417     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
7418     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
7419     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
7420     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
7421     break;
7422   case OP_VSPLTISW0:
7423     for (unsigned i = 0; i != 16; ++i)
7424       ShufIdxs[i] = (i&3)+0;
7425     break;
7426   case OP_VSPLTISW1:
7427     for (unsigned i = 0; i != 16; ++i)
7428       ShufIdxs[i] = (i&3)+4;
7429     break;
7430   case OP_VSPLTISW2:
7431     for (unsigned i = 0; i != 16; ++i)
7432       ShufIdxs[i] = (i&3)+8;
7433     break;
7434   case OP_VSPLTISW3:
7435     for (unsigned i = 0; i != 16; ++i)
7436       ShufIdxs[i] = (i&3)+12;
7437     break;
7438   case OP_VSLDOI4:
7439     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
7440   case OP_VSLDOI8:
7441     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
7442   case OP_VSLDOI12:
7443     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
7444   }
7445   EVT VT = OpLHS.getValueType();
7446   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
7447   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
7448   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
7449   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7450 }
7451 
7452 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
7453 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
7454 /// return the code it can be lowered into.  Worst case, it can always be
7455 /// lowered into a vperm.
7456 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
7457                                                SelectionDAG &DAG) const {
7458   SDLoc dl(Op);
7459   SDValue V1 = Op.getOperand(0);
7460   SDValue V2 = Op.getOperand(1);
7461   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7462   EVT VT = Op.getValueType();
7463   bool isLittleEndian = Subtarget.isLittleEndian();
7464 
7465   unsigned ShiftElts, InsertAtByte;
7466   bool Swap;
7467   if (Subtarget.hasP9Vector() &&
7468       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
7469                            isLittleEndian)) {
7470     if (Swap)
7471       std::swap(V1, V2);
7472     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7473     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
7474     if (ShiftElts) {
7475       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
7476                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
7477       SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl,
7478                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
7479       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7480     }
7481     SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2,
7482                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
7483     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7484   }
7485 
7486   if (Subtarget.hasVSX()) {
7487     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
7488       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
7489 
7490       // If the source for the shuffle is a scalar_to_vector that came from a
7491       // 32-bit load, it will have used LXVWSX so we don't need to splat again.
7492       if (Subtarget.isISA3_0() &&
7493           ((isLittleEndian && SplatIdx == 3) ||
7494            (!isLittleEndian && SplatIdx == 0))) {
7495         SDValue Src = V1.getOperand(0);
7496         if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
7497             Src.getOperand(0).getOpcode() == ISD::LOAD &&
7498             Src.getOperand(0).hasOneUse())
7499           return V1;
7500       }
7501       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7502       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
7503                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
7504       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
7505     }
7506 
7507     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
7508     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
7509       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
7510       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
7511       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
7512     }
7513 
7514   }
7515 
7516   if (Subtarget.hasQPX()) {
7517     if (VT.getVectorNumElements() != 4)
7518       return SDValue();
7519 
7520     if (V2.isUndef()) V2 = V1;
7521 
7522     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
7523     if (AlignIdx != -1) {
7524       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
7525                          DAG.getConstant(AlignIdx, dl, MVT::i32));
7526     } else if (SVOp->isSplat()) {
7527       int SplatIdx = SVOp->getSplatIndex();
7528       if (SplatIdx >= 4) {
7529         std::swap(V1, V2);
7530         SplatIdx -= 4;
7531       }
7532 
7533       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
7534                          DAG.getConstant(SplatIdx, dl, MVT::i32));
7535     }
7536 
7537     // Lower this into a qvgpci/qvfperm pair.
7538 
7539     // Compute the qvgpci literal
7540     unsigned idx = 0;
7541     for (unsigned i = 0; i < 4; ++i) {
7542       int m = SVOp->getMaskElt(i);
7543       unsigned mm = m >= 0 ? (unsigned) m : i;
7544       idx |= mm << (3-i)*3;
7545     }
7546 
7547     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
7548                              DAG.getConstant(idx, dl, MVT::i32));
7549     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
7550   }
7551 
7552   // Cases that are handled by instructions that take permute immediates
7553   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
7554   // selected by the instruction selector.
7555   if (V2.isUndef()) {
7556     if (PPC::isSplatShuffleMask(SVOp, 1) ||
7557         PPC::isSplatShuffleMask(SVOp, 2) ||
7558         PPC::isSplatShuffleMask(SVOp, 4) ||
7559         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
7560         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
7561         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
7562         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
7563         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
7564         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
7565         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
7566         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
7567         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
7568         (Subtarget.hasP8Altivec() && (
7569          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
7570          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
7571          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
7572       return Op;
7573     }
7574   }
7575 
7576   // Altivec has a variety of "shuffle immediates" that take two vector inputs
7577   // and produce a fixed permutation.  If any of these match, do not lower to
7578   // VPERM.
7579   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
7580   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7581       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7582       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
7583       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7584       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7585       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7586       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7587       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7588       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7589       (Subtarget.hasP8Altivec() && (
7590        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7591        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
7592        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
7593     return Op;
7594 
7595   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
7596   // perfect shuffle table to emit an optimal matching sequence.
7597   ArrayRef<int> PermMask = SVOp->getMask();
7598 
7599   unsigned PFIndexes[4];
7600   bool isFourElementShuffle = true;
7601   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
7602     unsigned EltNo = 8;   // Start out undef.
7603     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
7604       if (PermMask[i*4+j] < 0)
7605         continue;   // Undef, ignore it.
7606 
7607       unsigned ByteSource = PermMask[i*4+j];
7608       if ((ByteSource & 3) != j) {
7609         isFourElementShuffle = false;
7610         break;
7611       }
7612 
7613       if (EltNo == 8) {
7614         EltNo = ByteSource/4;
7615       } else if (EltNo != ByteSource/4) {
7616         isFourElementShuffle = false;
7617         break;
7618       }
7619     }
7620     PFIndexes[i] = EltNo;
7621   }
7622 
7623   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
7624   // perfect shuffle vector to determine if it is cost effective to do this as
7625   // discrete instructions, or whether we should use a vperm.
7626   // For now, we skip this for little endian until such time as we have a
7627   // little-endian perfect shuffle table.
7628   if (isFourElementShuffle && !isLittleEndian) {
7629     // Compute the index in the perfect shuffle table.
7630     unsigned PFTableIndex =
7631       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7632 
7633     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7634     unsigned Cost  = (PFEntry >> 30);
7635 
7636     // Determining when to avoid vperm is tricky.  Many things affect the cost
7637     // of vperm, particularly how many times the perm mask needs to be computed.
7638     // For example, if the perm mask can be hoisted out of a loop or is already
7639     // used (perhaps because there are multiple permutes with the same shuffle
7640     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
7641     // the loop requires an extra register.
7642     //
7643     // As a compromise, we only emit discrete instructions if the shuffle can be
7644     // generated in 3 or fewer operations.  When we have loop information
7645     // available, if this block is within a loop, we should avoid using vperm
7646     // for 3-operation perms and use a constant pool load instead.
7647     if (Cost < 3)
7648       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7649   }
7650 
7651   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
7652   // vector that will get spilled to the constant pool.
7653   if (V2.isUndef()) V2 = V1;
7654 
7655   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
7656   // that it is in input element units, not in bytes.  Convert now.
7657 
7658   // For little endian, the order of the input vectors is reversed, and
7659   // the permutation mask is complemented with respect to 31.  This is
7660   // necessary to produce proper semantics with the big-endian-biased vperm
7661   // instruction.
7662   EVT EltVT = V1.getValueType().getVectorElementType();
7663   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
7664 
7665   SmallVector<SDValue, 16> ResultMask;
7666   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
7667     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
7668 
7669     for (unsigned j = 0; j != BytesPerElement; ++j)
7670       if (isLittleEndian)
7671         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
7672                                              dl, MVT::i32));
7673       else
7674         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
7675                                              MVT::i32));
7676   }
7677 
7678   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
7679   if (isLittleEndian)
7680     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7681                        V2, V1, VPermMask);
7682   else
7683     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7684                        V1, V2, VPermMask);
7685 }
7686 
7687 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
7688 /// vector comparison.  If it is, return true and fill in Opc/isDot with
7689 /// information about the intrinsic.
7690 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
7691                                  bool &isDot, const PPCSubtarget &Subtarget) {
7692   unsigned IntrinsicID =
7693     cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
7694   CompareOpc = -1;
7695   isDot = false;
7696   switch (IntrinsicID) {
7697   default: return false;
7698     // Comparison predicates.
7699   case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
7700   case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
7701   case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
7702   case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
7703   case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
7704   case Intrinsic::ppc_altivec_vcmpequd_p:
7705     if (Subtarget.hasP8Altivec()) {
7706       CompareOpc = 199;
7707       isDot = 1;
7708     } else
7709       return false;
7710 
7711     break;
7712   case Intrinsic::ppc_altivec_vcmpneb_p:
7713   case Intrinsic::ppc_altivec_vcmpneh_p:
7714   case Intrinsic::ppc_altivec_vcmpnew_p:
7715   case Intrinsic::ppc_altivec_vcmpnezb_p:
7716   case Intrinsic::ppc_altivec_vcmpnezh_p:
7717   case Intrinsic::ppc_altivec_vcmpnezw_p:
7718     if (Subtarget.hasP9Altivec()) {
7719       switch(IntrinsicID) {
7720       default: llvm_unreachable("Unknown comparison intrinsic.");
7721       case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break;
7722       case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break;
7723       case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break;
7724       case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break;
7725       case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break;
7726       case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break;
7727       }
7728       isDot = 1;
7729     } else
7730       return false;
7731 
7732     break;
7733   case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
7734   case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
7735   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
7736   case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
7737   case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
7738   case Intrinsic::ppc_altivec_vcmpgtsd_p:
7739     if (Subtarget.hasP8Altivec()) {
7740       CompareOpc = 967;
7741       isDot = 1;
7742     } else
7743       return false;
7744 
7745     break;
7746   case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
7747   case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
7748   case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
7749   case Intrinsic::ppc_altivec_vcmpgtud_p:
7750     if (Subtarget.hasP8Altivec()) {
7751       CompareOpc = 711;
7752       isDot = 1;
7753     } else
7754       return false;
7755 
7756     break;
7757     // VSX predicate comparisons use the same infrastructure
7758   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
7759   case Intrinsic::ppc_vsx_xvcmpgedp_p:
7760   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
7761   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
7762   case Intrinsic::ppc_vsx_xvcmpgesp_p:
7763   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
7764     if (Subtarget.hasVSX()) {
7765       switch (IntrinsicID) {
7766       case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
7767       case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
7768       case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
7769       case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
7770       case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
7771       case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
7772       }
7773       isDot = 1;
7774     }
7775     else
7776       return false;
7777 
7778     break;
7779 
7780     // Normal Comparisons.
7781   case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
7782   case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
7783   case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
7784   case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
7785   case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
7786   case Intrinsic::ppc_altivec_vcmpequd:
7787     if (Subtarget.hasP8Altivec()) {
7788       CompareOpc = 199;
7789       isDot = 0;
7790     } else
7791       return false;
7792 
7793     break;
7794   case Intrinsic::ppc_altivec_vcmpneb:
7795   case Intrinsic::ppc_altivec_vcmpneh:
7796   case Intrinsic::ppc_altivec_vcmpnew:
7797   case Intrinsic::ppc_altivec_vcmpnezb:
7798   case Intrinsic::ppc_altivec_vcmpnezh:
7799   case Intrinsic::ppc_altivec_vcmpnezw:
7800     if (Subtarget.hasP9Altivec()) {
7801       switch (IntrinsicID) {
7802       default: llvm_unreachable("Unknown comparison intrinsic.");
7803       case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break;
7804       case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break;
7805       case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break;
7806       case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break;
7807       case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break;
7808       case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break;
7809       }
7810       isDot = 0;
7811     } else
7812       return false;
7813     break;
7814   case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
7815   case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
7816   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
7817   case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
7818   case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
7819   case Intrinsic::ppc_altivec_vcmpgtsd:
7820     if (Subtarget.hasP8Altivec()) {
7821       CompareOpc = 967;
7822       isDot = 0;
7823     } else
7824       return false;
7825 
7826     break;
7827   case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
7828   case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
7829   case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
7830   case Intrinsic::ppc_altivec_vcmpgtud:
7831     if (Subtarget.hasP8Altivec()) {
7832       CompareOpc = 711;
7833       isDot = 0;
7834     } else
7835       return false;
7836 
7837     break;
7838   }
7839   return true;
7840 }
7841 
7842 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
7843 /// lower, do it, otherwise return null.
7844 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
7845                                                    SelectionDAG &DAG) const {
7846   unsigned IntrinsicID =
7847     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7848 
7849   if (IntrinsicID == Intrinsic::thread_pointer) {
7850     // Reads the thread pointer register, used for __builtin_thread_pointer.
7851     bool is64bit = Subtarget.isPPC64();
7852     return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
7853                            is64bit ? MVT::i64 : MVT::i32);
7854   }
7855 
7856   // If this is a lowered altivec predicate compare, CompareOpc is set to the
7857   // opcode number of the comparison.
7858   SDLoc dl(Op);
7859   int CompareOpc;
7860   bool isDot;
7861   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
7862     return SDValue();    // Don't custom lower most intrinsics.
7863 
7864   // If this is a non-dot comparison, make the VCMP node and we are done.
7865   if (!isDot) {
7866     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
7867                               Op.getOperand(1), Op.getOperand(2),
7868                               DAG.getConstant(CompareOpc, dl, MVT::i32));
7869     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
7870   }
7871 
7872   // Create the PPCISD altivec 'dot' comparison node.
7873   SDValue Ops[] = {
7874     Op.getOperand(2),  // LHS
7875     Op.getOperand(3),  // RHS
7876     DAG.getConstant(CompareOpc, dl, MVT::i32)
7877   };
7878   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
7879   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
7880 
7881   // Now that we have the comparison, emit a copy from the CR to a GPR.
7882   // This is flagged to the above dot comparison.
7883   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
7884                                 DAG.getRegister(PPC::CR6, MVT::i32),
7885                                 CompNode.getValue(1));
7886 
7887   // Unpack the result based on how the target uses it.
7888   unsigned BitNo;   // Bit # of CR6.
7889   bool InvertBit;   // Invert result?
7890   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
7891   default:  // Can't happen, don't crash on invalid number though.
7892   case 0:   // Return the value of the EQ bit of CR6.
7893     BitNo = 0; InvertBit = false;
7894     break;
7895   case 1:   // Return the inverted value of the EQ bit of CR6.
7896     BitNo = 0; InvertBit = true;
7897     break;
7898   case 2:   // Return the value of the LT bit of CR6.
7899     BitNo = 2; InvertBit = false;
7900     break;
7901   case 3:   // Return the inverted value of the LT bit of CR6.
7902     BitNo = 2; InvertBit = true;
7903     break;
7904   }
7905 
7906   // Shift the bit into the low position.
7907   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
7908                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
7909   // Isolate the bit.
7910   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
7911                       DAG.getConstant(1, dl, MVT::i32));
7912 
7913   // If we are supposed to, toggle the bit.
7914   if (InvertBit)
7915     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
7916                         DAG.getConstant(1, dl, MVT::i32));
7917   return Flags;
7918 }
7919 
7920 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
7921                                                   SelectionDAG &DAG) const {
7922   SDLoc dl(Op);
7923   // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
7924   // instructions), but for smaller types, we need to first extend up to v2i32
7925   // before doing going farther.
7926   if (Op.getValueType() == MVT::v2i64) {
7927     EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
7928     if (ExtVT != MVT::v2i32) {
7929       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
7930       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
7931                        DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
7932                                         ExtVT.getVectorElementType(), 4)));
7933       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
7934       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
7935                        DAG.getValueType(MVT::v2i32));
7936     }
7937 
7938     return Op;
7939   }
7940 
7941   return SDValue();
7942 }
7943 
7944 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
7945                                                    SelectionDAG &DAG) const {
7946   SDLoc dl(Op);
7947   // Create a stack slot that is 16-byte aligned.
7948   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7949   int FrameIdx = MFI.CreateStackObject(16, 16, false);
7950   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7951   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7952 
7953   // Store the input value into Value#0 of the stack slot.
7954   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7955                                MachinePointerInfo());
7956   // Load it out.
7957   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
7958 }
7959 
7960 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
7961                                                   SelectionDAG &DAG) const {
7962   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
7963          "Should only be called for ISD::INSERT_VECTOR_ELT");
7964   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
7965   // We have legal lowering for constant indices but not for variable ones.
7966   if (C)
7967     return Op;
7968   return SDValue();
7969 }
7970 
7971 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
7972                                                    SelectionDAG &DAG) const {
7973   SDLoc dl(Op);
7974   SDNode *N = Op.getNode();
7975 
7976   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
7977          "Unknown extract_vector_elt type");
7978 
7979   SDValue Value = N->getOperand(0);
7980 
7981   // The first part of this is like the store lowering except that we don't
7982   // need to track the chain.
7983 
7984   // The values are now known to be -1 (false) or 1 (true). To convert this
7985   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7986   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7987   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7988 
7989   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
7990   // understand how to form the extending load.
7991   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
7992 
7993   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7994 
7995   // Now convert to an integer and store.
7996   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7997     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
7998     Value);
7999 
8000   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8001   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8002   MachinePointerInfo PtrInfo =
8003       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8004   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8005   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8006 
8007   SDValue StoreChain = DAG.getEntryNode();
8008   SDValue Ops[] = {StoreChain,
8009                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8010                    Value, FIdx};
8011   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8012 
8013   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8014     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8015 
8016   // Extract the value requested.
8017   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
8018   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8019   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8020 
8021   SDValue IntVal =
8022       DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
8023 
8024   if (!Subtarget.useCRBits())
8025     return IntVal;
8026 
8027   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
8028 }
8029 
8030 /// Lowering for QPX v4i1 loads
8031 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
8032                                            SelectionDAG &DAG) const {
8033   SDLoc dl(Op);
8034   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
8035   SDValue LoadChain = LN->getChain();
8036   SDValue BasePtr = LN->getBasePtr();
8037 
8038   if (Op.getValueType() == MVT::v4f64 ||
8039       Op.getValueType() == MVT::v4f32) {
8040     EVT MemVT = LN->getMemoryVT();
8041     unsigned Alignment = LN->getAlignment();
8042 
8043     // If this load is properly aligned, then it is legal.
8044     if (Alignment >= MemVT.getStoreSize())
8045       return Op;
8046 
8047     EVT ScalarVT = Op.getValueType().getScalarType(),
8048         ScalarMemVT = MemVT.getScalarType();
8049     unsigned Stride = ScalarMemVT.getStoreSize();
8050 
8051     SDValue Vals[4], LoadChains[4];
8052     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8053       SDValue Load;
8054       if (ScalarVT != ScalarMemVT)
8055         Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
8056                               BasePtr,
8057                               LN->getPointerInfo().getWithOffset(Idx * Stride),
8058                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8059                               LN->getMemOperand()->getFlags(), LN->getAAInfo());
8060       else
8061         Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
8062                            LN->getPointerInfo().getWithOffset(Idx * Stride),
8063                            MinAlign(Alignment, Idx * Stride),
8064                            LN->getMemOperand()->getFlags(), LN->getAAInfo());
8065 
8066       if (Idx == 0 && LN->isIndexed()) {
8067         assert(LN->getAddressingMode() == ISD::PRE_INC &&
8068                "Unknown addressing mode on vector load");
8069         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
8070                                   LN->getAddressingMode());
8071       }
8072 
8073       Vals[Idx] = Load;
8074       LoadChains[Idx] = Load.getValue(1);
8075 
8076       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8077                             DAG.getConstant(Stride, dl,
8078                                             BasePtr.getValueType()));
8079     }
8080 
8081     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8082     SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
8083 
8084     if (LN->isIndexed()) {
8085       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
8086       return DAG.getMergeValues(RetOps, dl);
8087     }
8088 
8089     SDValue RetOps[] = { Value, TF };
8090     return DAG.getMergeValues(RetOps, dl);
8091   }
8092 
8093   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
8094   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
8095 
8096   // To lower v4i1 from a byte array, we load the byte elements of the
8097   // vector and then reuse the BUILD_VECTOR logic.
8098 
8099   SDValue VectElmts[4], VectElmtChains[4];
8100   for (unsigned i = 0; i < 4; ++i) {
8101     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8102     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8103 
8104     VectElmts[i] = DAG.getExtLoad(
8105         ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
8106         LN->getPointerInfo().getWithOffset(i), MVT::i8,
8107         /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
8108     VectElmtChains[i] = VectElmts[i].getValue(1);
8109   }
8110 
8111   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
8112   SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
8113 
8114   SDValue RVals[] = { Value, LoadChain };
8115   return DAG.getMergeValues(RVals, dl);
8116 }
8117 
8118 /// Lowering for QPX v4i1 stores
8119 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
8120                                             SelectionDAG &DAG) const {
8121   SDLoc dl(Op);
8122   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
8123   SDValue StoreChain = SN->getChain();
8124   SDValue BasePtr = SN->getBasePtr();
8125   SDValue Value = SN->getValue();
8126 
8127   if (Value.getValueType() == MVT::v4f64 ||
8128       Value.getValueType() == MVT::v4f32) {
8129     EVT MemVT = SN->getMemoryVT();
8130     unsigned Alignment = SN->getAlignment();
8131 
8132     // If this store is properly aligned, then it is legal.
8133     if (Alignment >= MemVT.getStoreSize())
8134       return Op;
8135 
8136     EVT ScalarVT = Value.getValueType().getScalarType(),
8137         ScalarMemVT = MemVT.getScalarType();
8138     unsigned Stride = ScalarMemVT.getStoreSize();
8139 
8140     SDValue Stores[4];
8141     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8142       SDValue Ex = DAG.getNode(
8143           ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
8144           DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
8145       SDValue Store;
8146       if (ScalarVT != ScalarMemVT)
8147         Store =
8148             DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
8149                               SN->getPointerInfo().getWithOffset(Idx * Stride),
8150                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8151                               SN->getMemOperand()->getFlags(), SN->getAAInfo());
8152       else
8153         Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
8154                              SN->getPointerInfo().getWithOffset(Idx * Stride),
8155                              MinAlign(Alignment, Idx * Stride),
8156                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
8157 
8158       if (Idx == 0 && SN->isIndexed()) {
8159         assert(SN->getAddressingMode() == ISD::PRE_INC &&
8160                "Unknown addressing mode on vector store");
8161         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
8162                                     SN->getAddressingMode());
8163       }
8164 
8165       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8166                             DAG.getConstant(Stride, dl,
8167                                             BasePtr.getValueType()));
8168       Stores[Idx] = Store;
8169     }
8170 
8171     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8172 
8173     if (SN->isIndexed()) {
8174       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
8175       return DAG.getMergeValues(RetOps, dl);
8176     }
8177 
8178     return TF;
8179   }
8180 
8181   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
8182   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
8183 
8184   // The values are now known to be -1 (false) or 1 (true). To convert this
8185   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8186   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8187   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8188 
8189   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8190   // understand how to form the extending load.
8191   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8192 
8193   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8194 
8195   // Now convert to an integer and store.
8196   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8197     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8198     Value);
8199 
8200   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8201   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8202   MachinePointerInfo PtrInfo =
8203       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8204   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8205   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8206 
8207   SDValue Ops[] = {StoreChain,
8208                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8209                    Value, FIdx};
8210   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8211 
8212   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8213     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8214 
8215   // Move data into the byte array.
8216   SDValue Loads[4], LoadChains[4];
8217   for (unsigned i = 0; i < 4; ++i) {
8218     unsigned Offset = 4*i;
8219     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8220     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8221 
8222     Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
8223                            PtrInfo.getWithOffset(Offset));
8224     LoadChains[i] = Loads[i].getValue(1);
8225   }
8226 
8227   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8228 
8229   SDValue Stores[4];
8230   for (unsigned i = 0; i < 4; ++i) {
8231     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8232     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8233 
8234     Stores[i] = DAG.getTruncStore(
8235         StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
8236         MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
8237         SN->getAAInfo());
8238   }
8239 
8240   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8241 
8242   return StoreChain;
8243 }
8244 
8245 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
8246   SDLoc dl(Op);
8247   if (Op.getValueType() == MVT::v4i32) {
8248     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8249 
8250     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
8251     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
8252 
8253     SDValue RHSSwap =   // = vrlw RHS, 16
8254       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
8255 
8256     // Shrinkify inputs to v8i16.
8257     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
8258     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
8259     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
8260 
8261     // Low parts multiplied together, generating 32-bit results (we ignore the
8262     // top parts).
8263     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
8264                                         LHS, RHS, DAG, dl, MVT::v4i32);
8265 
8266     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
8267                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
8268     // Shift the high parts up 16 bits.
8269     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
8270                               Neg16, DAG, dl);
8271     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
8272   } else if (Op.getValueType() == MVT::v8i16) {
8273     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8274 
8275     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
8276 
8277     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
8278                             LHS, RHS, Zero, DAG, dl);
8279   } else if (Op.getValueType() == MVT::v16i8) {
8280     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8281     bool isLittleEndian = Subtarget.isLittleEndian();
8282 
8283     // Multiply the even 8-bit parts, producing 16-bit sums.
8284     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
8285                                            LHS, RHS, DAG, dl, MVT::v8i16);
8286     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
8287 
8288     // Multiply the odd 8-bit parts, producing 16-bit sums.
8289     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
8290                                           LHS, RHS, DAG, dl, MVT::v8i16);
8291     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
8292 
8293     // Merge the results together.  Because vmuleub and vmuloub are
8294     // instructions with a big-endian bias, we must reverse the
8295     // element numbering and reverse the meaning of "odd" and "even"
8296     // when generating little endian code.
8297     int Ops[16];
8298     for (unsigned i = 0; i != 8; ++i) {
8299       if (isLittleEndian) {
8300         Ops[i*2  ] = 2*i;
8301         Ops[i*2+1] = 2*i+16;
8302       } else {
8303         Ops[i*2  ] = 2*i+1;
8304         Ops[i*2+1] = 2*i+1+16;
8305       }
8306     }
8307     if (isLittleEndian)
8308       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
8309     else
8310       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
8311   } else {
8312     llvm_unreachable("Unknown mul to lower!");
8313   }
8314 }
8315 
8316 /// LowerOperation - Provide custom lowering hooks for some operations.
8317 ///
8318 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
8319   switch (Op.getOpcode()) {
8320   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
8321   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
8322   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
8323   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
8324   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
8325   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
8326   case ISD::SETCC:              return LowerSETCC(Op, DAG);
8327   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
8328   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
8329   case ISD::VASTART:
8330     return LowerVASTART(Op, DAG);
8331 
8332   case ISD::VAARG:
8333     return LowerVAARG(Op, DAG);
8334 
8335   case ISD::VACOPY:
8336     return LowerVACOPY(Op, DAG);
8337 
8338   case ISD::STACKRESTORE:
8339     return LowerSTACKRESTORE(Op, DAG);
8340 
8341   case ISD::DYNAMIC_STACKALLOC:
8342     return LowerDYNAMIC_STACKALLOC(Op, DAG);
8343 
8344   case ISD::GET_DYNAMIC_AREA_OFFSET:
8345     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
8346 
8347   case ISD::EH_DWARF_CFA:
8348     return LowerEH_DWARF_CFA(Op, DAG);
8349 
8350   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
8351   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
8352 
8353   case ISD::LOAD:               return LowerLOAD(Op, DAG);
8354   case ISD::STORE:              return LowerSTORE(Op, DAG);
8355   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
8356   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
8357   case ISD::FP_TO_UINT:
8358   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
8359                                                       SDLoc(Op));
8360   case ISD::UINT_TO_FP:
8361   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
8362   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
8363 
8364   // Lower 64-bit shifts.
8365   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
8366   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
8367   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
8368 
8369   // Vector-related lowering.
8370   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
8371   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
8372   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
8373   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
8374   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
8375   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
8376   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
8377   case ISD::MUL:                return LowerMUL(Op, DAG);
8378 
8379   // For counter-based loop handling.
8380   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
8381 
8382   // Frame & Return address.
8383   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
8384   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
8385   }
8386 }
8387 
8388 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
8389                                            SmallVectorImpl<SDValue>&Results,
8390                                            SelectionDAG &DAG) const {
8391   SDLoc dl(N);
8392   switch (N->getOpcode()) {
8393   default:
8394     llvm_unreachable("Do not know how to custom type legalize this operation!");
8395   case ISD::READCYCLECOUNTER: {
8396     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
8397     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
8398 
8399     Results.push_back(RTB);
8400     Results.push_back(RTB.getValue(1));
8401     Results.push_back(RTB.getValue(2));
8402     break;
8403   }
8404   case ISD::INTRINSIC_W_CHAIN: {
8405     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
8406         Intrinsic::ppc_is_decremented_ctr_nonzero)
8407       break;
8408 
8409     assert(N->getValueType(0) == MVT::i1 &&
8410            "Unexpected result type for CTR decrement intrinsic");
8411     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
8412                                  N->getValueType(0));
8413     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
8414     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
8415                                  N->getOperand(1));
8416 
8417     Results.push_back(NewInt);
8418     Results.push_back(NewInt.getValue(1));
8419     break;
8420   }
8421   case ISD::VAARG: {
8422     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
8423       return;
8424 
8425     EVT VT = N->getValueType(0);
8426 
8427     if (VT == MVT::i64) {
8428       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
8429 
8430       Results.push_back(NewNode);
8431       Results.push_back(NewNode.getValue(1));
8432     }
8433     return;
8434   }
8435   case ISD::FP_ROUND_INREG: {
8436     assert(N->getValueType(0) == MVT::ppcf128);
8437     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
8438     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
8439                              MVT::f64, N->getOperand(0),
8440                              DAG.getIntPtrConstant(0, dl));
8441     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
8442                              MVT::f64, N->getOperand(0),
8443                              DAG.getIntPtrConstant(1, dl));
8444 
8445     // Add the two halves of the long double in round-to-zero mode.
8446     SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8447 
8448     // We know the low half is about to be thrown away, so just use something
8449     // convenient.
8450     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
8451                                 FPreg, FPreg));
8452     return;
8453   }
8454   case ISD::FP_TO_SINT:
8455   case ISD::FP_TO_UINT:
8456     // LowerFP_TO_INT() can only handle f32 and f64.
8457     if (N->getOperand(0).getValueType() == MVT::ppcf128)
8458       return;
8459     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
8460     return;
8461   }
8462 }
8463 
8464 //===----------------------------------------------------------------------===//
8465 //  Other Lowering Code
8466 //===----------------------------------------------------------------------===//
8467 
8468 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
8469   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
8470   Function *Func = Intrinsic::getDeclaration(M, Id);
8471   return Builder.CreateCall(Func, {});
8472 }
8473 
8474 // The mappings for emitLeading/TrailingFence is taken from
8475 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
8476 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
8477                                          AtomicOrdering Ord, bool IsStore,
8478                                          bool IsLoad) const {
8479   if (Ord == AtomicOrdering::SequentiallyConsistent)
8480     return callIntrinsic(Builder, Intrinsic::ppc_sync);
8481   if (isReleaseOrStronger(Ord))
8482     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8483   return nullptr;
8484 }
8485 
8486 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
8487                                           AtomicOrdering Ord, bool IsStore,
8488                                           bool IsLoad) const {
8489   if (IsLoad && isAcquireOrStronger(Ord))
8490     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8491   // FIXME: this is too conservative, a dependent branch + isync is enough.
8492   // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
8493   // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
8494   // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
8495   return nullptr;
8496 }
8497 
8498 MachineBasicBlock *
8499 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
8500                                     unsigned AtomicSize,
8501                                     unsigned BinOpcode,
8502                                     unsigned CmpOpcode,
8503                                     unsigned CmpPred) const {
8504   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8505   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8506 
8507   auto LoadMnemonic = PPC::LDARX;
8508   auto StoreMnemonic = PPC::STDCX;
8509   switch (AtomicSize) {
8510   default:
8511     llvm_unreachable("Unexpected size of atomic entity");
8512   case 1:
8513     LoadMnemonic = PPC::LBARX;
8514     StoreMnemonic = PPC::STBCX;
8515     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8516     break;
8517   case 2:
8518     LoadMnemonic = PPC::LHARX;
8519     StoreMnemonic = PPC::STHCX;
8520     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8521     break;
8522   case 4:
8523     LoadMnemonic = PPC::LWARX;
8524     StoreMnemonic = PPC::STWCX;
8525     break;
8526   case 8:
8527     LoadMnemonic = PPC::LDARX;
8528     StoreMnemonic = PPC::STDCX;
8529     break;
8530   }
8531 
8532   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8533   MachineFunction *F = BB->getParent();
8534   MachineFunction::iterator It = ++BB->getIterator();
8535 
8536   unsigned dest = MI.getOperand(0).getReg();
8537   unsigned ptrA = MI.getOperand(1).getReg();
8538   unsigned ptrB = MI.getOperand(2).getReg();
8539   unsigned incr = MI.getOperand(3).getReg();
8540   DebugLoc dl = MI.getDebugLoc();
8541 
8542   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8543   MachineBasicBlock *loop2MBB =
8544     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
8545   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8546   F->insert(It, loopMBB);
8547   if (CmpOpcode)
8548     F->insert(It, loop2MBB);
8549   F->insert(It, exitMBB);
8550   exitMBB->splice(exitMBB->begin(), BB,
8551                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
8552   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8553 
8554   MachineRegisterInfo &RegInfo = F->getRegInfo();
8555   unsigned TmpReg = (!BinOpcode) ? incr :
8556     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
8557                                            : &PPC::GPRCRegClass);
8558 
8559   //  thisMBB:
8560   //   ...
8561   //   fallthrough --> loopMBB
8562   BB->addSuccessor(loopMBB);
8563 
8564   //  loopMBB:
8565   //   l[wd]arx dest, ptr
8566   //   add r0, dest, incr
8567   //   st[wd]cx. r0, ptr
8568   //   bne- loopMBB
8569   //   fallthrough --> exitMBB
8570 
8571   // For max/min...
8572   //  loopMBB:
8573   //   l[wd]arx dest, ptr
8574   //   cmpl?[wd] incr, dest
8575   //   bgt exitMBB
8576   //  loop2MBB:
8577   //   st[wd]cx. dest, ptr
8578   //   bne- loopMBB
8579   //   fallthrough --> exitMBB
8580 
8581   BB = loopMBB;
8582   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
8583     .addReg(ptrA).addReg(ptrB);
8584   if (BinOpcode)
8585     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
8586   if (CmpOpcode) {
8587     // Signed comparisons of byte or halfword values must be sign-extended.
8588     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
8589       unsigned ExtReg =  RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
8590       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
8591               ExtReg).addReg(dest);
8592       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8593         .addReg(incr).addReg(ExtReg);
8594     } else
8595       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8596         .addReg(incr).addReg(dest);
8597 
8598     BuildMI(BB, dl, TII->get(PPC::BCC))
8599       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
8600     BB->addSuccessor(loop2MBB);
8601     BB->addSuccessor(exitMBB);
8602     BB = loop2MBB;
8603   }
8604   BuildMI(BB, dl, TII->get(StoreMnemonic))
8605     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
8606   BuildMI(BB, dl, TII->get(PPC::BCC))
8607     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8608   BB->addSuccessor(loopMBB);
8609   BB->addSuccessor(exitMBB);
8610 
8611   //  exitMBB:
8612   //   ...
8613   BB = exitMBB;
8614   return BB;
8615 }
8616 
8617 MachineBasicBlock *
8618 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
8619                                             MachineBasicBlock *BB,
8620                                             bool is8bit, // operation
8621                                             unsigned BinOpcode,
8622                                             unsigned CmpOpcode,
8623                                             unsigned CmpPred) const {
8624   // If we support part-word atomic mnemonics, just use them
8625   if (Subtarget.hasPartwordAtomics())
8626     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
8627                             CmpOpcode, CmpPred);
8628 
8629   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8630   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8631   // In 64 bit mode we have to use 64 bits for addresses, even though the
8632   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
8633   // registers without caring whether they're 32 or 64, but here we're
8634   // doing actual arithmetic on the addresses.
8635   bool is64bit = Subtarget.isPPC64();
8636   bool isLittleEndian = Subtarget.isLittleEndian();
8637   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
8638 
8639   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8640   MachineFunction *F = BB->getParent();
8641   MachineFunction::iterator It = ++BB->getIterator();
8642 
8643   unsigned dest = MI.getOperand(0).getReg();
8644   unsigned ptrA = MI.getOperand(1).getReg();
8645   unsigned ptrB = MI.getOperand(2).getReg();
8646   unsigned incr = MI.getOperand(3).getReg();
8647   DebugLoc dl = MI.getDebugLoc();
8648 
8649   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8650   MachineBasicBlock *loop2MBB =
8651     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
8652   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8653   F->insert(It, loopMBB);
8654   if (CmpOpcode)
8655     F->insert(It, loop2MBB);
8656   F->insert(It, exitMBB);
8657   exitMBB->splice(exitMBB->begin(), BB,
8658                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
8659   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8660 
8661   MachineRegisterInfo &RegInfo = F->getRegInfo();
8662   const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
8663                                           : &PPC::GPRCRegClass;
8664   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
8665   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
8666   unsigned ShiftReg =
8667     isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
8668   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
8669   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
8670   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
8671   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
8672   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
8673   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
8674   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
8675   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
8676   unsigned Ptr1Reg;
8677   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
8678 
8679   //  thisMBB:
8680   //   ...
8681   //   fallthrough --> loopMBB
8682   BB->addSuccessor(loopMBB);
8683 
8684   // The 4-byte load must be aligned, while a char or short may be
8685   // anywhere in the word.  Hence all this nasty bookkeeping code.
8686   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
8687   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
8688   //   xori shift, shift1, 24 [16]
8689   //   rlwinm ptr, ptr1, 0, 0, 29
8690   //   slw incr2, incr, shift
8691   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
8692   //   slw mask, mask2, shift
8693   //  loopMBB:
8694   //   lwarx tmpDest, ptr
8695   //   add tmp, tmpDest, incr2
8696   //   andc tmp2, tmpDest, mask
8697   //   and tmp3, tmp, mask
8698   //   or tmp4, tmp3, tmp2
8699   //   stwcx. tmp4, ptr
8700   //   bne- loopMBB
8701   //   fallthrough --> exitMBB
8702   //   srw dest, tmpDest, shift
8703   if (ptrA != ZeroReg) {
8704     Ptr1Reg = RegInfo.createVirtualRegister(RC);
8705     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
8706       .addReg(ptrA).addReg(ptrB);
8707   } else {
8708     Ptr1Reg = ptrB;
8709   }
8710   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
8711       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
8712   if (!isLittleEndian)
8713     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
8714         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
8715   if (is64bit)
8716     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
8717       .addReg(Ptr1Reg).addImm(0).addImm(61);
8718   else
8719     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
8720       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
8721   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
8722       .addReg(incr).addReg(ShiftReg);
8723   if (is8bit)
8724     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
8725   else {
8726     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
8727     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
8728   }
8729   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
8730       .addReg(Mask2Reg).addReg(ShiftReg);
8731 
8732   BB = loopMBB;
8733   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
8734     .addReg(ZeroReg).addReg(PtrReg);
8735   if (BinOpcode)
8736     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
8737       .addReg(Incr2Reg).addReg(TmpDestReg);
8738   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
8739     .addReg(TmpDestReg).addReg(MaskReg);
8740   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
8741     .addReg(TmpReg).addReg(MaskReg);
8742   if (CmpOpcode) {
8743     // For unsigned comparisons, we can directly compare the shifted values.
8744     // For signed comparisons we shift and sign extend.
8745     unsigned SReg = RegInfo.createVirtualRegister(RC);
8746     BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
8747       .addReg(TmpDestReg).addReg(MaskReg);
8748     unsigned ValueReg = SReg;
8749     unsigned CmpReg = Incr2Reg;
8750     if (CmpOpcode == PPC::CMPW) {
8751       ValueReg = RegInfo.createVirtualRegister(RC);
8752       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
8753         .addReg(SReg).addReg(ShiftReg);
8754       unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
8755       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
8756         .addReg(ValueReg);
8757       ValueReg = ValueSReg;
8758       CmpReg = incr;
8759     }
8760     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8761       .addReg(CmpReg).addReg(ValueReg);
8762     BuildMI(BB, dl, TII->get(PPC::BCC))
8763       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
8764     BB->addSuccessor(loop2MBB);
8765     BB->addSuccessor(exitMBB);
8766     BB = loop2MBB;
8767   }
8768   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
8769     .addReg(Tmp3Reg).addReg(Tmp2Reg);
8770   BuildMI(BB, dl, TII->get(PPC::STWCX))
8771     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
8772   BuildMI(BB, dl, TII->get(PPC::BCC))
8773     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8774   BB->addSuccessor(loopMBB);
8775   BB->addSuccessor(exitMBB);
8776 
8777   //  exitMBB:
8778   //   ...
8779   BB = exitMBB;
8780   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
8781     .addReg(ShiftReg);
8782   return BB;
8783 }
8784 
8785 llvm::MachineBasicBlock *
8786 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
8787                                     MachineBasicBlock *MBB) const {
8788   DebugLoc DL = MI.getDebugLoc();
8789   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8790 
8791   MachineFunction *MF = MBB->getParent();
8792   MachineRegisterInfo &MRI = MF->getRegInfo();
8793 
8794   const BasicBlock *BB = MBB->getBasicBlock();
8795   MachineFunction::iterator I = ++MBB->getIterator();
8796 
8797   // Memory Reference
8798   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
8799   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
8800 
8801   unsigned DstReg = MI.getOperand(0).getReg();
8802   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
8803   assert(RC->hasType(MVT::i32) && "Invalid destination!");
8804   unsigned mainDstReg = MRI.createVirtualRegister(RC);
8805   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
8806 
8807   MVT PVT = getPointerTy(MF->getDataLayout());
8808   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8809          "Invalid Pointer Size!");
8810   // For v = setjmp(buf), we generate
8811   //
8812   // thisMBB:
8813   //  SjLjSetup mainMBB
8814   //  bl mainMBB
8815   //  v_restore = 1
8816   //  b sinkMBB
8817   //
8818   // mainMBB:
8819   //  buf[LabelOffset] = LR
8820   //  v_main = 0
8821   //
8822   // sinkMBB:
8823   //  v = phi(main, restore)
8824   //
8825 
8826   MachineBasicBlock *thisMBB = MBB;
8827   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
8828   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
8829   MF->insert(I, mainMBB);
8830   MF->insert(I, sinkMBB);
8831 
8832   MachineInstrBuilder MIB;
8833 
8834   // Transfer the remainder of BB and its successor edges to sinkMBB.
8835   sinkMBB->splice(sinkMBB->begin(), MBB,
8836                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
8837   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8838 
8839   // Note that the structure of the jmp_buf used here is not compatible
8840   // with that used by libc, and is not designed to be. Specifically, it
8841   // stores only those 'reserved' registers that LLVM does not otherwise
8842   // understand how to spill. Also, by convention, by the time this
8843   // intrinsic is called, Clang has already stored the frame address in the
8844   // first slot of the buffer and stack address in the third. Following the
8845   // X86 target code, we'll store the jump address in the second slot. We also
8846   // need to save the TOC pointer (R2) to handle jumps between shared
8847   // libraries, and that will be stored in the fourth slot. The thread
8848   // identifier (R13) is not affected.
8849 
8850   // thisMBB:
8851   const int64_t LabelOffset = 1 * PVT.getStoreSize();
8852   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
8853   const int64_t BPOffset    = 4 * PVT.getStoreSize();
8854 
8855   // Prepare IP either in reg.
8856   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
8857   unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
8858   unsigned BufReg = MI.getOperand(1).getReg();
8859 
8860   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
8861     setUsesTOCBasePtr(*MBB->getParent());
8862     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
8863             .addReg(PPC::X2)
8864             .addImm(TOCOffset)
8865             .addReg(BufReg);
8866     MIB.setMemRefs(MMOBegin, MMOEnd);
8867   }
8868 
8869   // Naked functions never have a base pointer, and so we use r1. For all
8870   // other functions, this decision must be delayed until during PEI.
8871   unsigned BaseReg;
8872   if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
8873     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
8874   else
8875     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
8876 
8877   MIB = BuildMI(*thisMBB, MI, DL,
8878                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
8879             .addReg(BaseReg)
8880             .addImm(BPOffset)
8881             .addReg(BufReg);
8882   MIB.setMemRefs(MMOBegin, MMOEnd);
8883 
8884   // Setup
8885   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
8886   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
8887   MIB.addRegMask(TRI->getNoPreservedMask());
8888 
8889   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
8890 
8891   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
8892           .addMBB(mainMBB);
8893   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
8894 
8895   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
8896   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
8897 
8898   // mainMBB:
8899   //  mainDstReg = 0
8900   MIB =
8901       BuildMI(mainMBB, DL,
8902               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
8903 
8904   // Store IP
8905   if (Subtarget.isPPC64()) {
8906     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
8907             .addReg(LabelReg)
8908             .addImm(LabelOffset)
8909             .addReg(BufReg);
8910   } else {
8911     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
8912             .addReg(LabelReg)
8913             .addImm(LabelOffset)
8914             .addReg(BufReg);
8915   }
8916 
8917   MIB.setMemRefs(MMOBegin, MMOEnd);
8918 
8919   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
8920   mainMBB->addSuccessor(sinkMBB);
8921 
8922   // sinkMBB:
8923   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
8924           TII->get(PPC::PHI), DstReg)
8925     .addReg(mainDstReg).addMBB(mainMBB)
8926     .addReg(restoreDstReg).addMBB(thisMBB);
8927 
8928   MI.eraseFromParent();
8929   return sinkMBB;
8930 }
8931 
8932 MachineBasicBlock *
8933 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
8934                                      MachineBasicBlock *MBB) const {
8935   DebugLoc DL = MI.getDebugLoc();
8936   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8937 
8938   MachineFunction *MF = MBB->getParent();
8939   MachineRegisterInfo &MRI = MF->getRegInfo();
8940 
8941   // Memory Reference
8942   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
8943   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
8944 
8945   MVT PVT = getPointerTy(MF->getDataLayout());
8946   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8947          "Invalid Pointer Size!");
8948 
8949   const TargetRegisterClass *RC =
8950     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
8951   unsigned Tmp = MRI.createVirtualRegister(RC);
8952   // Since FP is only updated here but NOT referenced, it's treated as GPR.
8953   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
8954   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
8955   unsigned BP =
8956       (PVT == MVT::i64)
8957           ? PPC::X30
8958           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
8959                                                               : PPC::R30);
8960 
8961   MachineInstrBuilder MIB;
8962 
8963   const int64_t LabelOffset = 1 * PVT.getStoreSize();
8964   const int64_t SPOffset    = 2 * PVT.getStoreSize();
8965   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
8966   const int64_t BPOffset    = 4 * PVT.getStoreSize();
8967 
8968   unsigned BufReg = MI.getOperand(0).getReg();
8969 
8970   // Reload FP (the jumped-to function may not have had a
8971   // frame pointer, and if so, then its r31 will be restored
8972   // as necessary).
8973   if (PVT == MVT::i64) {
8974     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
8975             .addImm(0)
8976             .addReg(BufReg);
8977   } else {
8978     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
8979             .addImm(0)
8980             .addReg(BufReg);
8981   }
8982   MIB.setMemRefs(MMOBegin, MMOEnd);
8983 
8984   // Reload IP
8985   if (PVT == MVT::i64) {
8986     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
8987             .addImm(LabelOffset)
8988             .addReg(BufReg);
8989   } else {
8990     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
8991             .addImm(LabelOffset)
8992             .addReg(BufReg);
8993   }
8994   MIB.setMemRefs(MMOBegin, MMOEnd);
8995 
8996   // Reload SP
8997   if (PVT == MVT::i64) {
8998     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
8999             .addImm(SPOffset)
9000             .addReg(BufReg);
9001   } else {
9002     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
9003             .addImm(SPOffset)
9004             .addReg(BufReg);
9005   }
9006   MIB.setMemRefs(MMOBegin, MMOEnd);
9007 
9008   // Reload BP
9009   if (PVT == MVT::i64) {
9010     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
9011             .addImm(BPOffset)
9012             .addReg(BufReg);
9013   } else {
9014     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
9015             .addImm(BPOffset)
9016             .addReg(BufReg);
9017   }
9018   MIB.setMemRefs(MMOBegin, MMOEnd);
9019 
9020   // Reload TOC
9021   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
9022     setUsesTOCBasePtr(*MBB->getParent());
9023     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
9024             .addImm(TOCOffset)
9025             .addReg(BufReg);
9026 
9027     MIB.setMemRefs(MMOBegin, MMOEnd);
9028   }
9029 
9030   // Jump
9031   BuildMI(*MBB, MI, DL,
9032           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
9033   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
9034 
9035   MI.eraseFromParent();
9036   return MBB;
9037 }
9038 
9039 MachineBasicBlock *
9040 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
9041                                                MachineBasicBlock *BB) const {
9042   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
9043       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9044     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
9045         MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9046       // Call lowering should have added an r2 operand to indicate a dependence
9047       // on the TOC base pointer value. It can't however, because there is no
9048       // way to mark the dependence as implicit there, and so the stackmap code
9049       // will confuse it with a regular operand. Instead, add the dependence
9050       // here.
9051       setUsesTOCBasePtr(*BB->getParent());
9052       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
9053     }
9054 
9055     return emitPatchPoint(MI, BB);
9056   }
9057 
9058   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
9059       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
9060     return emitEHSjLjSetJmp(MI, BB);
9061   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
9062              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
9063     return emitEHSjLjLongJmp(MI, BB);
9064   }
9065 
9066   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9067 
9068   // To "insert" these instructions we actually have to insert their
9069   // control-flow patterns.
9070   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9071   MachineFunction::iterator It = ++BB->getIterator();
9072 
9073   MachineFunction *F = BB->getParent();
9074 
9075   if (Subtarget.hasISEL() &&
9076       (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9077        MI.getOpcode() == PPC::SELECT_CC_I8 ||
9078        MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
9079     SmallVector<MachineOperand, 2> Cond;
9080     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9081         MI.getOpcode() == PPC::SELECT_CC_I8)
9082       Cond.push_back(MI.getOperand(4));
9083     else
9084       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
9085     Cond.push_back(MI.getOperand(1));
9086 
9087     DebugLoc dl = MI.getDebugLoc();
9088     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
9089                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
9090   } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9091              MI.getOpcode() == PPC::SELECT_CC_I8 ||
9092              MI.getOpcode() == PPC::SELECT_CC_F4 ||
9093              MI.getOpcode() == PPC::SELECT_CC_F8 ||
9094              MI.getOpcode() == PPC::SELECT_CC_QFRC ||
9095              MI.getOpcode() == PPC::SELECT_CC_QSRC ||
9096              MI.getOpcode() == PPC::SELECT_CC_QBRC ||
9097              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
9098              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
9099              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
9100              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
9101              MI.getOpcode() == PPC::SELECT_I4 ||
9102              MI.getOpcode() == PPC::SELECT_I8 ||
9103              MI.getOpcode() == PPC::SELECT_F4 ||
9104              MI.getOpcode() == PPC::SELECT_F8 ||
9105              MI.getOpcode() == PPC::SELECT_QFRC ||
9106              MI.getOpcode() == PPC::SELECT_QSRC ||
9107              MI.getOpcode() == PPC::SELECT_QBRC ||
9108              MI.getOpcode() == PPC::SELECT_VRRC ||
9109              MI.getOpcode() == PPC::SELECT_VSFRC ||
9110              MI.getOpcode() == PPC::SELECT_VSSRC ||
9111              MI.getOpcode() == PPC::SELECT_VSRC) {
9112     // The incoming instruction knows the destination vreg to set, the
9113     // condition code register to branch on, the true/false values to
9114     // select between, and a branch opcode to use.
9115 
9116     //  thisMBB:
9117     //  ...
9118     //   TrueVal = ...
9119     //   cmpTY ccX, r1, r2
9120     //   bCC copy1MBB
9121     //   fallthrough --> copy0MBB
9122     MachineBasicBlock *thisMBB = BB;
9123     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
9124     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9125     DebugLoc dl = MI.getDebugLoc();
9126     F->insert(It, copy0MBB);
9127     F->insert(It, sinkMBB);
9128 
9129     // Transfer the remainder of BB and its successor edges to sinkMBB.
9130     sinkMBB->splice(sinkMBB->begin(), BB,
9131                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9132     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9133 
9134     // Next, add the true and fallthrough blocks as its successors.
9135     BB->addSuccessor(copy0MBB);
9136     BB->addSuccessor(sinkMBB);
9137 
9138     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
9139         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
9140         MI.getOpcode() == PPC::SELECT_QFRC ||
9141         MI.getOpcode() == PPC::SELECT_QSRC ||
9142         MI.getOpcode() == PPC::SELECT_QBRC ||
9143         MI.getOpcode() == PPC::SELECT_VRRC ||
9144         MI.getOpcode() == PPC::SELECT_VSFRC ||
9145         MI.getOpcode() == PPC::SELECT_VSSRC ||
9146         MI.getOpcode() == PPC::SELECT_VSRC) {
9147       BuildMI(BB, dl, TII->get(PPC::BC))
9148           .addReg(MI.getOperand(1).getReg())
9149           .addMBB(sinkMBB);
9150     } else {
9151       unsigned SelectPred = MI.getOperand(4).getImm();
9152       BuildMI(BB, dl, TII->get(PPC::BCC))
9153           .addImm(SelectPred)
9154           .addReg(MI.getOperand(1).getReg())
9155           .addMBB(sinkMBB);
9156     }
9157 
9158     //  copy0MBB:
9159     //   %FalseValue = ...
9160     //   # fallthrough to sinkMBB
9161     BB = copy0MBB;
9162 
9163     // Update machine-CFG edges
9164     BB->addSuccessor(sinkMBB);
9165 
9166     //  sinkMBB:
9167     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9168     //  ...
9169     BB = sinkMBB;
9170     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
9171         .addReg(MI.getOperand(3).getReg())
9172         .addMBB(copy0MBB)
9173         .addReg(MI.getOperand(2).getReg())
9174         .addMBB(thisMBB);
9175   } else if (MI.getOpcode() == PPC::ReadTB) {
9176     // To read the 64-bit time-base register on a 32-bit target, we read the
9177     // two halves. Should the counter have wrapped while it was being read, we
9178     // need to try again.
9179     // ...
9180     // readLoop:
9181     // mfspr Rx,TBU # load from TBU
9182     // mfspr Ry,TB  # load from TB
9183     // mfspr Rz,TBU # load from TBU
9184     // cmpw crX,Rx,Rz # check if 'old'='new'
9185     // bne readLoop   # branch if they're not equal
9186     // ...
9187 
9188     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
9189     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9190     DebugLoc dl = MI.getDebugLoc();
9191     F->insert(It, readMBB);
9192     F->insert(It, sinkMBB);
9193 
9194     // Transfer the remainder of BB and its successor edges to sinkMBB.
9195     sinkMBB->splice(sinkMBB->begin(), BB,
9196                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9197     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9198 
9199     BB->addSuccessor(readMBB);
9200     BB = readMBB;
9201 
9202     MachineRegisterInfo &RegInfo = F->getRegInfo();
9203     unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9204     unsigned LoReg = MI.getOperand(0).getReg();
9205     unsigned HiReg = MI.getOperand(1).getReg();
9206 
9207     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
9208     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
9209     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
9210 
9211     unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9212 
9213     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
9214       .addReg(HiReg).addReg(ReadAgainReg);
9215     BuildMI(BB, dl, TII->get(PPC::BCC))
9216       .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
9217 
9218     BB->addSuccessor(readMBB);
9219     BB->addSuccessor(sinkMBB);
9220   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
9221     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
9222   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
9223     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
9224   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
9225     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
9226   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
9227     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
9228 
9229   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
9230     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
9231   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
9232     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
9233   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
9234     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
9235   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
9236     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
9237 
9238   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
9239     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
9240   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
9241     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
9242   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
9243     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
9244   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
9245     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
9246 
9247   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
9248     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
9249   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
9250     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
9251   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
9252     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
9253   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
9254     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
9255 
9256   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
9257     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
9258   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
9259     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
9260   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
9261     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
9262   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
9263     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
9264 
9265   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
9266     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
9267   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
9268     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
9269   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
9270     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
9271   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
9272     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
9273 
9274   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
9275     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
9276   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
9277     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
9278   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
9279     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
9280   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
9281     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
9282 
9283   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
9284     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
9285   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
9286     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
9287   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
9288     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
9289   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
9290     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
9291 
9292   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
9293     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
9294   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
9295     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
9296   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
9297     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
9298   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
9299     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
9300 
9301   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
9302     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
9303   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
9304     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
9305   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
9306     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
9307   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
9308     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
9309 
9310   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
9311     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
9312   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
9313     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
9314   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
9315     BB = EmitAtomicBinary(MI, BB, 4, 0);
9316   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
9317     BB = EmitAtomicBinary(MI, BB, 8, 0);
9318 
9319   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
9320            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
9321            (Subtarget.hasPartwordAtomics() &&
9322             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
9323            (Subtarget.hasPartwordAtomics() &&
9324             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
9325     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
9326 
9327     auto LoadMnemonic = PPC::LDARX;
9328     auto StoreMnemonic = PPC::STDCX;
9329     switch (MI.getOpcode()) {
9330     default:
9331       llvm_unreachable("Compare and swap of unknown size");
9332     case PPC::ATOMIC_CMP_SWAP_I8:
9333       LoadMnemonic = PPC::LBARX;
9334       StoreMnemonic = PPC::STBCX;
9335       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9336       break;
9337     case PPC::ATOMIC_CMP_SWAP_I16:
9338       LoadMnemonic = PPC::LHARX;
9339       StoreMnemonic = PPC::STHCX;
9340       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9341       break;
9342     case PPC::ATOMIC_CMP_SWAP_I32:
9343       LoadMnemonic = PPC::LWARX;
9344       StoreMnemonic = PPC::STWCX;
9345       break;
9346     case PPC::ATOMIC_CMP_SWAP_I64:
9347       LoadMnemonic = PPC::LDARX;
9348       StoreMnemonic = PPC::STDCX;
9349       break;
9350     }
9351     unsigned dest = MI.getOperand(0).getReg();
9352     unsigned ptrA = MI.getOperand(1).getReg();
9353     unsigned ptrB = MI.getOperand(2).getReg();
9354     unsigned oldval = MI.getOperand(3).getReg();
9355     unsigned newval = MI.getOperand(4).getReg();
9356     DebugLoc dl = MI.getDebugLoc();
9357 
9358     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9359     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9360     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9361     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9362     F->insert(It, loop1MBB);
9363     F->insert(It, loop2MBB);
9364     F->insert(It, midMBB);
9365     F->insert(It, exitMBB);
9366     exitMBB->splice(exitMBB->begin(), BB,
9367                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9368     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9369 
9370     //  thisMBB:
9371     //   ...
9372     //   fallthrough --> loopMBB
9373     BB->addSuccessor(loop1MBB);
9374 
9375     // loop1MBB:
9376     //   l[bhwd]arx dest, ptr
9377     //   cmp[wd] dest, oldval
9378     //   bne- midMBB
9379     // loop2MBB:
9380     //   st[bhwd]cx. newval, ptr
9381     //   bne- loopMBB
9382     //   b exitBB
9383     // midMBB:
9384     //   st[bhwd]cx. dest, ptr
9385     // exitBB:
9386     BB = loop1MBB;
9387     BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9388       .addReg(ptrA).addReg(ptrB);
9389     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
9390       .addReg(oldval).addReg(dest);
9391     BuildMI(BB, dl, TII->get(PPC::BCC))
9392       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9393     BB->addSuccessor(loop2MBB);
9394     BB->addSuccessor(midMBB);
9395 
9396     BB = loop2MBB;
9397     BuildMI(BB, dl, TII->get(StoreMnemonic))
9398       .addReg(newval).addReg(ptrA).addReg(ptrB);
9399     BuildMI(BB, dl, TII->get(PPC::BCC))
9400       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9401     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9402     BB->addSuccessor(loop1MBB);
9403     BB->addSuccessor(exitMBB);
9404 
9405     BB = midMBB;
9406     BuildMI(BB, dl, TII->get(StoreMnemonic))
9407       .addReg(dest).addReg(ptrA).addReg(ptrB);
9408     BB->addSuccessor(exitMBB);
9409 
9410     //  exitMBB:
9411     //   ...
9412     BB = exitMBB;
9413   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
9414              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
9415     // We must use 64-bit registers for addresses when targeting 64-bit,
9416     // since we're actually doing arithmetic on them.  Other registers
9417     // can be 32-bit.
9418     bool is64bit = Subtarget.isPPC64();
9419     bool isLittleEndian = Subtarget.isLittleEndian();
9420     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
9421 
9422     unsigned dest = MI.getOperand(0).getReg();
9423     unsigned ptrA = MI.getOperand(1).getReg();
9424     unsigned ptrB = MI.getOperand(2).getReg();
9425     unsigned oldval = MI.getOperand(3).getReg();
9426     unsigned newval = MI.getOperand(4).getReg();
9427     DebugLoc dl = MI.getDebugLoc();
9428 
9429     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9430     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9431     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9432     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9433     F->insert(It, loop1MBB);
9434     F->insert(It, loop2MBB);
9435     F->insert(It, midMBB);
9436     F->insert(It, exitMBB);
9437     exitMBB->splice(exitMBB->begin(), BB,
9438                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9439     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9440 
9441     MachineRegisterInfo &RegInfo = F->getRegInfo();
9442     const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
9443                                             : &PPC::GPRCRegClass;
9444     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
9445     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
9446     unsigned ShiftReg =
9447       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
9448     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
9449     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
9450     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
9451     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
9452     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
9453     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
9454     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
9455     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
9456     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
9457     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
9458     unsigned Ptr1Reg;
9459     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
9460     unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
9461     //  thisMBB:
9462     //   ...
9463     //   fallthrough --> loopMBB
9464     BB->addSuccessor(loop1MBB);
9465 
9466     // The 4-byte load must be aligned, while a char or short may be
9467     // anywhere in the word.  Hence all this nasty bookkeeping code.
9468     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
9469     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
9470     //   xori shift, shift1, 24 [16]
9471     //   rlwinm ptr, ptr1, 0, 0, 29
9472     //   slw newval2, newval, shift
9473     //   slw oldval2, oldval,shift
9474     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
9475     //   slw mask, mask2, shift
9476     //   and newval3, newval2, mask
9477     //   and oldval3, oldval2, mask
9478     // loop1MBB:
9479     //   lwarx tmpDest, ptr
9480     //   and tmp, tmpDest, mask
9481     //   cmpw tmp, oldval3
9482     //   bne- midMBB
9483     // loop2MBB:
9484     //   andc tmp2, tmpDest, mask
9485     //   or tmp4, tmp2, newval3
9486     //   stwcx. tmp4, ptr
9487     //   bne- loop1MBB
9488     //   b exitBB
9489     // midMBB:
9490     //   stwcx. tmpDest, ptr
9491     // exitBB:
9492     //   srw dest, tmpDest, shift
9493     if (ptrA != ZeroReg) {
9494       Ptr1Reg = RegInfo.createVirtualRegister(RC);
9495       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
9496         .addReg(ptrA).addReg(ptrB);
9497     } else {
9498       Ptr1Reg = ptrB;
9499     }
9500     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
9501         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
9502     if (!isLittleEndian)
9503       BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
9504           .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
9505     if (is64bit)
9506       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
9507         .addReg(Ptr1Reg).addImm(0).addImm(61);
9508     else
9509       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
9510         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
9511     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
9512         .addReg(newval).addReg(ShiftReg);
9513     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
9514         .addReg(oldval).addReg(ShiftReg);
9515     if (is8bit)
9516       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
9517     else {
9518       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
9519       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
9520         .addReg(Mask3Reg).addImm(65535);
9521     }
9522     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
9523         .addReg(Mask2Reg).addReg(ShiftReg);
9524     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
9525         .addReg(NewVal2Reg).addReg(MaskReg);
9526     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
9527         .addReg(OldVal2Reg).addReg(MaskReg);
9528 
9529     BB = loop1MBB;
9530     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
9531         .addReg(ZeroReg).addReg(PtrReg);
9532     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
9533         .addReg(TmpDestReg).addReg(MaskReg);
9534     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
9535         .addReg(TmpReg).addReg(OldVal3Reg);
9536     BuildMI(BB, dl, TII->get(PPC::BCC))
9537         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9538     BB->addSuccessor(loop2MBB);
9539     BB->addSuccessor(midMBB);
9540 
9541     BB = loop2MBB;
9542     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
9543         .addReg(TmpDestReg).addReg(MaskReg);
9544     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
9545         .addReg(Tmp2Reg).addReg(NewVal3Reg);
9546     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
9547         .addReg(ZeroReg).addReg(PtrReg);
9548     BuildMI(BB, dl, TII->get(PPC::BCC))
9549       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9550     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9551     BB->addSuccessor(loop1MBB);
9552     BB->addSuccessor(exitMBB);
9553 
9554     BB = midMBB;
9555     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
9556       .addReg(ZeroReg).addReg(PtrReg);
9557     BB->addSuccessor(exitMBB);
9558 
9559     //  exitMBB:
9560     //   ...
9561     BB = exitMBB;
9562     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
9563       .addReg(ShiftReg);
9564   } else if (MI.getOpcode() == PPC::FADDrtz) {
9565     // This pseudo performs an FADD with rounding mode temporarily forced
9566     // to round-to-zero.  We emit this via custom inserter since the FPSCR
9567     // is not modeled at the SelectionDAG level.
9568     unsigned Dest = MI.getOperand(0).getReg();
9569     unsigned Src1 = MI.getOperand(1).getReg();
9570     unsigned Src2 = MI.getOperand(2).getReg();
9571     DebugLoc dl = MI.getDebugLoc();
9572 
9573     MachineRegisterInfo &RegInfo = F->getRegInfo();
9574     unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
9575 
9576     // Save FPSCR value.
9577     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
9578 
9579     // Set rounding mode to round-to-zero.
9580     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
9581     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
9582 
9583     // Perform addition.
9584     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
9585 
9586     // Restore FPSCR value.
9587     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
9588   } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9589              MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
9590              MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9591              MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
9592     unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9593                        MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
9594                           ? PPC::ANDIo8
9595                           : PPC::ANDIo;
9596     bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9597                  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
9598 
9599     MachineRegisterInfo &RegInfo = F->getRegInfo();
9600     unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
9601                                                   &PPC::GPRCRegClass :
9602                                                   &PPC::G8RCRegClass);
9603 
9604     DebugLoc dl = MI.getDebugLoc();
9605     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
9606         .addReg(MI.getOperand(1).getReg())
9607         .addImm(1);
9608     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
9609             MI.getOperand(0).getReg())
9610         .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
9611   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
9612     DebugLoc Dl = MI.getDebugLoc();
9613     MachineRegisterInfo &RegInfo = F->getRegInfo();
9614     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9615     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
9616     return BB;
9617   } else {
9618     llvm_unreachable("Unexpected instr type to insert");
9619   }
9620 
9621   MI.eraseFromParent(); // The pseudo instruction is gone now.
9622   return BB;
9623 }
9624 
9625 //===----------------------------------------------------------------------===//
9626 // Target Optimization Hooks
9627 //===----------------------------------------------------------------------===//
9628 
9629 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
9630   // For the estimates, convergence is quadratic, so we essentially double the
9631   // number of digits correct after every iteration. For both FRE and FRSQRTE,
9632   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
9633   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
9634   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
9635   if (VT.getScalarType() == MVT::f64)
9636     RefinementSteps++;
9637   return RefinementSteps;
9638 }
9639 
9640 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG,
9641                                             int Enabled, int &RefinementSteps,
9642                                             bool &UseOneConstNR) const {
9643   EVT VT = Operand.getValueType();
9644   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
9645       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
9646       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9647       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9648       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9649       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9650     if (RefinementSteps == ReciprocalEstimate::Unspecified)
9651       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9652 
9653     UseOneConstNR = true;
9654     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
9655   }
9656   return SDValue();
9657 }
9658 
9659 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
9660                                             int Enabled,
9661                                             int &RefinementSteps) const {
9662   EVT VT = Operand.getValueType();
9663   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
9664       (VT == MVT::f64 && Subtarget.hasFRE()) ||
9665       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9666       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9667       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9668       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9669     if (RefinementSteps == ReciprocalEstimate::Unspecified)
9670       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9671     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
9672   }
9673   return SDValue();
9674 }
9675 
9676 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
9677   // Note: This functionality is used only when unsafe-fp-math is enabled, and
9678   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
9679   // enabled for division), this functionality is redundant with the default
9680   // combiner logic (once the division -> reciprocal/multiply transformation
9681   // has taken place). As a result, this matters more for older cores than for
9682   // newer ones.
9683 
9684   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9685   // reciprocal if there are two or more FDIVs (for embedded cores with only
9686   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
9687   switch (Subtarget.getDarwinDirective()) {
9688   default:
9689     return 3;
9690   case PPC::DIR_440:
9691   case PPC::DIR_A2:
9692   case PPC::DIR_E500mc:
9693   case PPC::DIR_E5500:
9694     return 2;
9695   }
9696 }
9697 
9698 // isConsecutiveLSLoc needs to work even if all adds have not yet been
9699 // collapsed, and so we need to look through chains of them.
9700 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
9701                                      int64_t& Offset, SelectionDAG &DAG) {
9702   if (DAG.isBaseWithConstantOffset(Loc)) {
9703     Base = Loc.getOperand(0);
9704     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
9705 
9706     // The base might itself be a base plus an offset, and if so, accumulate
9707     // that as well.
9708     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
9709   }
9710 }
9711 
9712 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
9713                             unsigned Bytes, int Dist,
9714                             SelectionDAG &DAG) {
9715   if (VT.getSizeInBits() / 8 != Bytes)
9716     return false;
9717 
9718   SDValue BaseLoc = Base->getBasePtr();
9719   if (Loc.getOpcode() == ISD::FrameIndex) {
9720     if (BaseLoc.getOpcode() != ISD::FrameIndex)
9721       return false;
9722     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9723     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
9724     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
9725     int FS  = MFI.getObjectSize(FI);
9726     int BFS = MFI.getObjectSize(BFI);
9727     if (FS != BFS || FS != (int)Bytes) return false;
9728     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
9729   }
9730 
9731   SDValue Base1 = Loc, Base2 = BaseLoc;
9732   int64_t Offset1 = 0, Offset2 = 0;
9733   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
9734   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
9735   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
9736     return true;
9737 
9738   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9739   const GlobalValue *GV1 = nullptr;
9740   const GlobalValue *GV2 = nullptr;
9741   Offset1 = 0;
9742   Offset2 = 0;
9743   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
9744   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
9745   if (isGA1 && isGA2 && GV1 == GV2)
9746     return Offset1 == (Offset2 + Dist*Bytes);
9747   return false;
9748 }
9749 
9750 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
9751 // not enforce equality of the chain operands.
9752 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
9753                             unsigned Bytes, int Dist,
9754                             SelectionDAG &DAG) {
9755   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
9756     EVT VT = LS->getMemoryVT();
9757     SDValue Loc = LS->getBasePtr();
9758     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
9759   }
9760 
9761   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
9762     EVT VT;
9763     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9764     default: return false;
9765     case Intrinsic::ppc_qpx_qvlfd:
9766     case Intrinsic::ppc_qpx_qvlfda:
9767       VT = MVT::v4f64;
9768       break;
9769     case Intrinsic::ppc_qpx_qvlfs:
9770     case Intrinsic::ppc_qpx_qvlfsa:
9771       VT = MVT::v4f32;
9772       break;
9773     case Intrinsic::ppc_qpx_qvlfcd:
9774     case Intrinsic::ppc_qpx_qvlfcda:
9775       VT = MVT::v2f64;
9776       break;
9777     case Intrinsic::ppc_qpx_qvlfcs:
9778     case Intrinsic::ppc_qpx_qvlfcsa:
9779       VT = MVT::v2f32;
9780       break;
9781     case Intrinsic::ppc_qpx_qvlfiwa:
9782     case Intrinsic::ppc_qpx_qvlfiwz:
9783     case Intrinsic::ppc_altivec_lvx:
9784     case Intrinsic::ppc_altivec_lvxl:
9785     case Intrinsic::ppc_vsx_lxvw4x:
9786       VT = MVT::v4i32;
9787       break;
9788     case Intrinsic::ppc_vsx_lxvd2x:
9789       VT = MVT::v2f64;
9790       break;
9791     case Intrinsic::ppc_altivec_lvebx:
9792       VT = MVT::i8;
9793       break;
9794     case Intrinsic::ppc_altivec_lvehx:
9795       VT = MVT::i16;
9796       break;
9797     case Intrinsic::ppc_altivec_lvewx:
9798       VT = MVT::i32;
9799       break;
9800     }
9801 
9802     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
9803   }
9804 
9805   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
9806     EVT VT;
9807     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9808     default: return false;
9809     case Intrinsic::ppc_qpx_qvstfd:
9810     case Intrinsic::ppc_qpx_qvstfda:
9811       VT = MVT::v4f64;
9812       break;
9813     case Intrinsic::ppc_qpx_qvstfs:
9814     case Intrinsic::ppc_qpx_qvstfsa:
9815       VT = MVT::v4f32;
9816       break;
9817     case Intrinsic::ppc_qpx_qvstfcd:
9818     case Intrinsic::ppc_qpx_qvstfcda:
9819       VT = MVT::v2f64;
9820       break;
9821     case Intrinsic::ppc_qpx_qvstfcs:
9822     case Intrinsic::ppc_qpx_qvstfcsa:
9823       VT = MVT::v2f32;
9824       break;
9825     case Intrinsic::ppc_qpx_qvstfiw:
9826     case Intrinsic::ppc_qpx_qvstfiwa:
9827     case Intrinsic::ppc_altivec_stvx:
9828     case Intrinsic::ppc_altivec_stvxl:
9829     case Intrinsic::ppc_vsx_stxvw4x:
9830       VT = MVT::v4i32;
9831       break;
9832     case Intrinsic::ppc_vsx_stxvd2x:
9833       VT = MVT::v2f64;
9834       break;
9835     case Intrinsic::ppc_altivec_stvebx:
9836       VT = MVT::i8;
9837       break;
9838     case Intrinsic::ppc_altivec_stvehx:
9839       VT = MVT::i16;
9840       break;
9841     case Intrinsic::ppc_altivec_stvewx:
9842       VT = MVT::i32;
9843       break;
9844     }
9845 
9846     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
9847   }
9848 
9849   return false;
9850 }
9851 
9852 // Return true is there is a nearyby consecutive load to the one provided
9853 // (regardless of alignment). We search up and down the chain, looking though
9854 // token factors and other loads (but nothing else). As a result, a true result
9855 // indicates that it is safe to create a new consecutive load adjacent to the
9856 // load provided.
9857 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
9858   SDValue Chain = LD->getChain();
9859   EVT VT = LD->getMemoryVT();
9860 
9861   SmallSet<SDNode *, 16> LoadRoots;
9862   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
9863   SmallSet<SDNode *, 16> Visited;
9864 
9865   // First, search up the chain, branching to follow all token-factor operands.
9866   // If we find a consecutive load, then we're done, otherwise, record all
9867   // nodes just above the top-level loads and token factors.
9868   while (!Queue.empty()) {
9869     SDNode *ChainNext = Queue.pop_back_val();
9870     if (!Visited.insert(ChainNext).second)
9871       continue;
9872 
9873     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
9874       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9875         return true;
9876 
9877       if (!Visited.count(ChainLD->getChain().getNode()))
9878         Queue.push_back(ChainLD->getChain().getNode());
9879     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
9880       for (const SDUse &O : ChainNext->ops())
9881         if (!Visited.count(O.getNode()))
9882           Queue.push_back(O.getNode());
9883     } else
9884       LoadRoots.insert(ChainNext);
9885   }
9886 
9887   // Second, search down the chain, starting from the top-level nodes recorded
9888   // in the first phase. These top-level nodes are the nodes just above all
9889   // loads and token factors. Starting with their uses, recursively look though
9890   // all loads (just the chain uses) and token factors to find a consecutive
9891   // load.
9892   Visited.clear();
9893   Queue.clear();
9894 
9895   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
9896        IE = LoadRoots.end(); I != IE; ++I) {
9897     Queue.push_back(*I);
9898 
9899     while (!Queue.empty()) {
9900       SDNode *LoadRoot = Queue.pop_back_val();
9901       if (!Visited.insert(LoadRoot).second)
9902         continue;
9903 
9904       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
9905         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9906           return true;
9907 
9908       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
9909            UE = LoadRoot->use_end(); UI != UE; ++UI)
9910         if (((isa<MemSDNode>(*UI) &&
9911             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
9912             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
9913           Queue.push_back(*UI);
9914     }
9915   }
9916 
9917   return false;
9918 }
9919 
9920 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
9921                                                   DAGCombinerInfo &DCI) const {
9922   SelectionDAG &DAG = DCI.DAG;
9923   SDLoc dl(N);
9924 
9925   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
9926   // If we're tracking CR bits, we need to be careful that we don't have:
9927   //   trunc(binary-ops(zext(x), zext(y)))
9928   // or
9929   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
9930   // such that we're unnecessarily moving things into GPRs when it would be
9931   // better to keep them in CR bits.
9932 
9933   // Note that trunc here can be an actual i1 trunc, or can be the effective
9934   // truncation that comes from a setcc or select_cc.
9935   if (N->getOpcode() == ISD::TRUNCATE &&
9936       N->getValueType(0) != MVT::i1)
9937     return SDValue();
9938 
9939   if (N->getOperand(0).getValueType() != MVT::i32 &&
9940       N->getOperand(0).getValueType() != MVT::i64)
9941     return SDValue();
9942 
9943   if (N->getOpcode() == ISD::SETCC ||
9944       N->getOpcode() == ISD::SELECT_CC) {
9945     // If we're looking at a comparison, then we need to make sure that the
9946     // high bits (all except for the first) don't matter the result.
9947     ISD::CondCode CC =
9948       cast<CondCodeSDNode>(N->getOperand(
9949         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
9950     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
9951 
9952     if (ISD::isSignedIntSetCC(CC)) {
9953       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
9954           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
9955         return SDValue();
9956     } else if (ISD::isUnsignedIntSetCC(CC)) {
9957       if (!DAG.MaskedValueIsZero(N->getOperand(0),
9958                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
9959           !DAG.MaskedValueIsZero(N->getOperand(1),
9960                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
9961         return SDValue();
9962     } else {
9963       // This is neither a signed nor an unsigned comparison, just make sure
9964       // that the high bits are equal.
9965       APInt Op1Zero, Op1One;
9966       APInt Op2Zero, Op2One;
9967       DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
9968       DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
9969 
9970       // We don't really care about what is known about the first bit (if
9971       // anything), so clear it in all masks prior to comparing them.
9972       Op1Zero.clearBit(0); Op1One.clearBit(0);
9973       Op2Zero.clearBit(0); Op2One.clearBit(0);
9974 
9975       if (Op1Zero != Op2Zero || Op1One != Op2One)
9976         return SDValue();
9977     }
9978   }
9979 
9980   // We now know that the higher-order bits are irrelevant, we just need to
9981   // make sure that all of the intermediate operations are bit operations, and
9982   // all inputs are extensions.
9983   if (N->getOperand(0).getOpcode() != ISD::AND &&
9984       N->getOperand(0).getOpcode() != ISD::OR  &&
9985       N->getOperand(0).getOpcode() != ISD::XOR &&
9986       N->getOperand(0).getOpcode() != ISD::SELECT &&
9987       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
9988       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
9989       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
9990       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
9991       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
9992     return SDValue();
9993 
9994   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
9995       N->getOperand(1).getOpcode() != ISD::AND &&
9996       N->getOperand(1).getOpcode() != ISD::OR  &&
9997       N->getOperand(1).getOpcode() != ISD::XOR &&
9998       N->getOperand(1).getOpcode() != ISD::SELECT &&
9999       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
10000       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
10001       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
10002       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
10003       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
10004     return SDValue();
10005 
10006   SmallVector<SDValue, 4> Inputs;
10007   SmallVector<SDValue, 8> BinOps, PromOps;
10008   SmallPtrSet<SDNode *, 16> Visited;
10009 
10010   for (unsigned i = 0; i < 2; ++i) {
10011     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10012           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10013           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10014           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10015         isa<ConstantSDNode>(N->getOperand(i)))
10016       Inputs.push_back(N->getOperand(i));
10017     else
10018       BinOps.push_back(N->getOperand(i));
10019 
10020     if (N->getOpcode() == ISD::TRUNCATE)
10021       break;
10022   }
10023 
10024   // Visit all inputs, collect all binary operations (and, or, xor and
10025   // select) that are all fed by extensions.
10026   while (!BinOps.empty()) {
10027     SDValue BinOp = BinOps.back();
10028     BinOps.pop_back();
10029 
10030     if (!Visited.insert(BinOp.getNode()).second)
10031       continue;
10032 
10033     PromOps.push_back(BinOp);
10034 
10035     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10036       // The condition of the select is not promoted.
10037       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10038         continue;
10039       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10040         continue;
10041 
10042       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10043             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10044             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10045            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10046           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10047         Inputs.push_back(BinOp.getOperand(i));
10048       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10049                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10050                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10051                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10052                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
10053                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10054                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10055                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10056                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
10057         BinOps.push_back(BinOp.getOperand(i));
10058       } else {
10059         // We have an input that is not an extension or another binary
10060         // operation; we'll abort this transformation.
10061         return SDValue();
10062       }
10063     }
10064   }
10065 
10066   // Make sure that this is a self-contained cluster of operations (which
10067   // is not quite the same thing as saying that everything has only one
10068   // use).
10069   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10070     if (isa<ConstantSDNode>(Inputs[i]))
10071       continue;
10072 
10073     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10074                               UE = Inputs[i].getNode()->use_end();
10075          UI != UE; ++UI) {
10076       SDNode *User = *UI;
10077       if (User != N && !Visited.count(User))
10078         return SDValue();
10079 
10080       // Make sure that we're not going to promote the non-output-value
10081       // operand(s) or SELECT or SELECT_CC.
10082       // FIXME: Although we could sometimes handle this, and it does occur in
10083       // practice that one of the condition inputs to the select is also one of
10084       // the outputs, we currently can't deal with this.
10085       if (User->getOpcode() == ISD::SELECT) {
10086         if (User->getOperand(0) == Inputs[i])
10087           return SDValue();
10088       } else if (User->getOpcode() == ISD::SELECT_CC) {
10089         if (User->getOperand(0) == Inputs[i] ||
10090             User->getOperand(1) == Inputs[i])
10091           return SDValue();
10092       }
10093     }
10094   }
10095 
10096   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10097     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10098                               UE = PromOps[i].getNode()->use_end();
10099          UI != UE; ++UI) {
10100       SDNode *User = *UI;
10101       if (User != N && !Visited.count(User))
10102         return SDValue();
10103 
10104       // Make sure that we're not going to promote the non-output-value
10105       // operand(s) or SELECT or SELECT_CC.
10106       // FIXME: Although we could sometimes handle this, and it does occur in
10107       // practice that one of the condition inputs to the select is also one of
10108       // the outputs, we currently can't deal with this.
10109       if (User->getOpcode() == ISD::SELECT) {
10110         if (User->getOperand(0) == PromOps[i])
10111           return SDValue();
10112       } else if (User->getOpcode() == ISD::SELECT_CC) {
10113         if (User->getOperand(0) == PromOps[i] ||
10114             User->getOperand(1) == PromOps[i])
10115           return SDValue();
10116       }
10117     }
10118   }
10119 
10120   // Replace all inputs with the extension operand.
10121   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10122     // Constants may have users outside the cluster of to-be-promoted nodes,
10123     // and so we need to replace those as we do the promotions.
10124     if (isa<ConstantSDNode>(Inputs[i]))
10125       continue;
10126     else
10127       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
10128   }
10129 
10130   std::list<HandleSDNode> PromOpHandles;
10131   for (auto &PromOp : PromOps)
10132     PromOpHandles.emplace_back(PromOp);
10133 
10134   // Replace all operations (these are all the same, but have a different
10135   // (i1) return type). DAG.getNode will validate that the types of
10136   // a binary operator match, so go through the list in reverse so that
10137   // we've likely promoted both operands first. Any intermediate truncations or
10138   // extensions disappear.
10139   while (!PromOpHandles.empty()) {
10140     SDValue PromOp = PromOpHandles.back().getValue();
10141     PromOpHandles.pop_back();
10142 
10143     if (PromOp.getOpcode() == ISD::TRUNCATE ||
10144         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
10145         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
10146         PromOp.getOpcode() == ISD::ANY_EXTEND) {
10147       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
10148           PromOp.getOperand(0).getValueType() != MVT::i1) {
10149         // The operand is not yet ready (see comment below).
10150         PromOpHandles.emplace_front(PromOp);
10151         continue;
10152       }
10153 
10154       SDValue RepValue = PromOp.getOperand(0);
10155       if (isa<ConstantSDNode>(RepValue))
10156         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
10157 
10158       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
10159       continue;
10160     }
10161 
10162     unsigned C;
10163     switch (PromOp.getOpcode()) {
10164     default:             C = 0; break;
10165     case ISD::SELECT:    C = 1; break;
10166     case ISD::SELECT_CC: C = 2; break;
10167     }
10168 
10169     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10170          PromOp.getOperand(C).getValueType() != MVT::i1) ||
10171         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10172          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
10173       // The to-be-promoted operands of this node have not yet been
10174       // promoted (this should be rare because we're going through the
10175       // list backward, but if one of the operands has several users in
10176       // this cluster of to-be-promoted nodes, it is possible).
10177       PromOpHandles.emplace_front(PromOp);
10178       continue;
10179     }
10180 
10181     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10182                                 PromOp.getNode()->op_end());
10183 
10184     // If there are any constant inputs, make sure they're replaced now.
10185     for (unsigned i = 0; i < 2; ++i)
10186       if (isa<ConstantSDNode>(Ops[C+i]))
10187         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
10188 
10189     DAG.ReplaceAllUsesOfValueWith(PromOp,
10190       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
10191   }
10192 
10193   // Now we're left with the initial truncation itself.
10194   if (N->getOpcode() == ISD::TRUNCATE)
10195     return N->getOperand(0);
10196 
10197   // Otherwise, this is a comparison. The operands to be compared have just
10198   // changed type (to i1), but everything else is the same.
10199   return SDValue(N, 0);
10200 }
10201 
10202 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
10203                                                   DAGCombinerInfo &DCI) const {
10204   SelectionDAG &DAG = DCI.DAG;
10205   SDLoc dl(N);
10206 
10207   // If we're tracking CR bits, we need to be careful that we don't have:
10208   //   zext(binary-ops(trunc(x), trunc(y)))
10209   // or
10210   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
10211   // such that we're unnecessarily moving things into CR bits that can more
10212   // efficiently stay in GPRs. Note that if we're not certain that the high
10213   // bits are set as required by the final extension, we still may need to do
10214   // some masking to get the proper behavior.
10215 
10216   // This same functionality is important on PPC64 when dealing with
10217   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
10218   // the return values of functions. Because it is so similar, it is handled
10219   // here as well.
10220 
10221   if (N->getValueType(0) != MVT::i32 &&
10222       N->getValueType(0) != MVT::i64)
10223     return SDValue();
10224 
10225   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
10226         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
10227     return SDValue();
10228 
10229   if (N->getOperand(0).getOpcode() != ISD::AND &&
10230       N->getOperand(0).getOpcode() != ISD::OR  &&
10231       N->getOperand(0).getOpcode() != ISD::XOR &&
10232       N->getOperand(0).getOpcode() != ISD::SELECT &&
10233       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
10234     return SDValue();
10235 
10236   SmallVector<SDValue, 4> Inputs;
10237   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
10238   SmallPtrSet<SDNode *, 16> Visited;
10239 
10240   // Visit all inputs, collect all binary operations (and, or, xor and
10241   // select) that are all fed by truncations.
10242   while (!BinOps.empty()) {
10243     SDValue BinOp = BinOps.back();
10244     BinOps.pop_back();
10245 
10246     if (!Visited.insert(BinOp.getNode()).second)
10247       continue;
10248 
10249     PromOps.push_back(BinOp);
10250 
10251     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10252       // The condition of the select is not promoted.
10253       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10254         continue;
10255       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10256         continue;
10257 
10258       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10259           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10260         Inputs.push_back(BinOp.getOperand(i));
10261       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10262                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10263                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10264                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10265                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
10266         BinOps.push_back(BinOp.getOperand(i));
10267       } else {
10268         // We have an input that is not a truncation or another binary
10269         // operation; we'll abort this transformation.
10270         return SDValue();
10271       }
10272     }
10273   }
10274 
10275   // The operands of a select that must be truncated when the select is
10276   // promoted because the operand is actually part of the to-be-promoted set.
10277   DenseMap<SDNode *, EVT> SelectTruncOp[2];
10278 
10279   // Make sure that this is a self-contained cluster of operations (which
10280   // is not quite the same thing as saying that everything has only one
10281   // use).
10282   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10283     if (isa<ConstantSDNode>(Inputs[i]))
10284       continue;
10285 
10286     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10287                               UE = Inputs[i].getNode()->use_end();
10288          UI != UE; ++UI) {
10289       SDNode *User = *UI;
10290       if (User != N && !Visited.count(User))
10291         return SDValue();
10292 
10293       // If we're going to promote the non-output-value operand(s) or SELECT or
10294       // SELECT_CC, record them for truncation.
10295       if (User->getOpcode() == ISD::SELECT) {
10296         if (User->getOperand(0) == Inputs[i])
10297           SelectTruncOp[0].insert(std::make_pair(User,
10298                                     User->getOperand(0).getValueType()));
10299       } else if (User->getOpcode() == ISD::SELECT_CC) {
10300         if (User->getOperand(0) == Inputs[i])
10301           SelectTruncOp[0].insert(std::make_pair(User,
10302                                     User->getOperand(0).getValueType()));
10303         if (User->getOperand(1) == Inputs[i])
10304           SelectTruncOp[1].insert(std::make_pair(User,
10305                                     User->getOperand(1).getValueType()));
10306       }
10307     }
10308   }
10309 
10310   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10311     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10312                               UE = PromOps[i].getNode()->use_end();
10313          UI != UE; ++UI) {
10314       SDNode *User = *UI;
10315       if (User != N && !Visited.count(User))
10316         return SDValue();
10317 
10318       // If we're going to promote the non-output-value operand(s) or SELECT or
10319       // SELECT_CC, record them for truncation.
10320       if (User->getOpcode() == ISD::SELECT) {
10321         if (User->getOperand(0) == PromOps[i])
10322           SelectTruncOp[0].insert(std::make_pair(User,
10323                                     User->getOperand(0).getValueType()));
10324       } else if (User->getOpcode() == ISD::SELECT_CC) {
10325         if (User->getOperand(0) == PromOps[i])
10326           SelectTruncOp[0].insert(std::make_pair(User,
10327                                     User->getOperand(0).getValueType()));
10328         if (User->getOperand(1) == PromOps[i])
10329           SelectTruncOp[1].insert(std::make_pair(User,
10330                                     User->getOperand(1).getValueType()));
10331       }
10332     }
10333   }
10334 
10335   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
10336   bool ReallyNeedsExt = false;
10337   if (N->getOpcode() != ISD::ANY_EXTEND) {
10338     // If all of the inputs are not already sign/zero extended, then
10339     // we'll still need to do that at the end.
10340     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10341       if (isa<ConstantSDNode>(Inputs[i]))
10342         continue;
10343 
10344       unsigned OpBits =
10345         Inputs[i].getOperand(0).getValueSizeInBits();
10346       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
10347 
10348       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
10349            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
10350                                   APInt::getHighBitsSet(OpBits,
10351                                                         OpBits-PromBits))) ||
10352           (N->getOpcode() == ISD::SIGN_EXTEND &&
10353            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
10354              (OpBits-(PromBits-1)))) {
10355         ReallyNeedsExt = true;
10356         break;
10357       }
10358     }
10359   }
10360 
10361   // Replace all inputs, either with the truncation operand, or a
10362   // truncation or extension to the final output type.
10363   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10364     // Constant inputs need to be replaced with the to-be-promoted nodes that
10365     // use them because they might have users outside of the cluster of
10366     // promoted nodes.
10367     if (isa<ConstantSDNode>(Inputs[i]))
10368       continue;
10369 
10370     SDValue InSrc = Inputs[i].getOperand(0);
10371     if (Inputs[i].getValueType() == N->getValueType(0))
10372       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
10373     else if (N->getOpcode() == ISD::SIGN_EXTEND)
10374       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10375         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
10376     else if (N->getOpcode() == ISD::ZERO_EXTEND)
10377       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10378         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
10379     else
10380       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10381         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
10382   }
10383 
10384   std::list<HandleSDNode> PromOpHandles;
10385   for (auto &PromOp : PromOps)
10386     PromOpHandles.emplace_back(PromOp);
10387 
10388   // Replace all operations (these are all the same, but have a different
10389   // (promoted) return type). DAG.getNode will validate that the types of
10390   // a binary operator match, so go through the list in reverse so that
10391   // we've likely promoted both operands first.
10392   while (!PromOpHandles.empty()) {
10393     SDValue PromOp = PromOpHandles.back().getValue();
10394     PromOpHandles.pop_back();
10395 
10396     unsigned C;
10397     switch (PromOp.getOpcode()) {
10398     default:             C = 0; break;
10399     case ISD::SELECT:    C = 1; break;
10400     case ISD::SELECT_CC: C = 2; break;
10401     }
10402 
10403     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10404          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
10405         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10406          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
10407       // The to-be-promoted operands of this node have not yet been
10408       // promoted (this should be rare because we're going through the
10409       // list backward, but if one of the operands has several users in
10410       // this cluster of to-be-promoted nodes, it is possible).
10411       PromOpHandles.emplace_front(PromOp);
10412       continue;
10413     }
10414 
10415     // For SELECT and SELECT_CC nodes, we do a similar check for any
10416     // to-be-promoted comparison inputs.
10417     if (PromOp.getOpcode() == ISD::SELECT ||
10418         PromOp.getOpcode() == ISD::SELECT_CC) {
10419       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
10420            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
10421           (SelectTruncOp[1].count(PromOp.getNode()) &&
10422            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
10423         PromOpHandles.emplace_front(PromOp);
10424         continue;
10425       }
10426     }
10427 
10428     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10429                                 PromOp.getNode()->op_end());
10430 
10431     // If this node has constant inputs, then they'll need to be promoted here.
10432     for (unsigned i = 0; i < 2; ++i) {
10433       if (!isa<ConstantSDNode>(Ops[C+i]))
10434         continue;
10435       if (Ops[C+i].getValueType() == N->getValueType(0))
10436         continue;
10437 
10438       if (N->getOpcode() == ISD::SIGN_EXTEND)
10439         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10440       else if (N->getOpcode() == ISD::ZERO_EXTEND)
10441         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10442       else
10443         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10444     }
10445 
10446     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
10447     // truncate them again to the original value type.
10448     if (PromOp.getOpcode() == ISD::SELECT ||
10449         PromOp.getOpcode() == ISD::SELECT_CC) {
10450       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
10451       if (SI0 != SelectTruncOp[0].end())
10452         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
10453       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
10454       if (SI1 != SelectTruncOp[1].end())
10455         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
10456     }
10457 
10458     DAG.ReplaceAllUsesOfValueWith(PromOp,
10459       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
10460   }
10461 
10462   // Now we're left with the initial extension itself.
10463   if (!ReallyNeedsExt)
10464     return N->getOperand(0);
10465 
10466   // To zero extend, just mask off everything except for the first bit (in the
10467   // i1 case).
10468   if (N->getOpcode() == ISD::ZERO_EXTEND)
10469     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
10470                        DAG.getConstant(APInt::getLowBitsSet(
10471                                          N->getValueSizeInBits(0), PromBits),
10472                                        dl, N->getValueType(0)));
10473 
10474   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
10475          "Invalid extension type");
10476   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
10477   SDValue ShiftCst =
10478       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
10479   return DAG.getNode(
10480       ISD::SRA, dl, N->getValueType(0),
10481       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
10482       ShiftCst);
10483 }
10484 
10485 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
10486                                                  DAGCombinerInfo &DCI) const {
10487   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
10488          "Should be called with a BUILD_VECTOR node");
10489 
10490   SelectionDAG &DAG = DCI.DAG;
10491   SDLoc dl(N);
10492   if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX())
10493     return SDValue();
10494 
10495   // Looking for:
10496   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
10497   if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP &&
10498       N->getOperand(0).getOpcode() != ISD::UINT_TO_FP)
10499     return SDValue();
10500   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
10501       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
10502     return SDValue();
10503   if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode())
10504     return SDValue();
10505 
10506   SDValue Ext1 = N->getOperand(0).getOperand(0);
10507   SDValue Ext2 = N->getOperand(1).getOperand(0);
10508   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
10509      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
10510     return SDValue();
10511 
10512   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
10513   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
10514   if (!Ext1Op || !Ext2Op)
10515     return SDValue();
10516   if (Ext1.getValueType() != MVT::i32 ||
10517       Ext2.getValueType() != MVT::i32)
10518   if (Ext1.getOperand(0) != Ext2.getOperand(0))
10519     return SDValue();
10520 
10521   int FirstElem = Ext1Op->getZExtValue();
10522   int SecondElem = Ext2Op->getZExtValue();
10523   int SubvecIdx;
10524   if (FirstElem == 0 && SecondElem == 1)
10525     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
10526   else if (FirstElem == 2 && SecondElem == 3)
10527     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
10528   else
10529     return SDValue();
10530 
10531   SDValue SrcVec = Ext1.getOperand(0);
10532   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
10533     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
10534   return DAG.getNode(NodeType, dl, MVT::v2f64,
10535                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
10536 }
10537 
10538 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
10539                                               DAGCombinerInfo &DCI) const {
10540   assert((N->getOpcode() == ISD::SINT_TO_FP ||
10541           N->getOpcode() == ISD::UINT_TO_FP) &&
10542          "Need an int -> FP conversion node here");
10543 
10544   if (useSoftFloat() || !Subtarget.has64BitSupport())
10545     return SDValue();
10546 
10547   SelectionDAG &DAG = DCI.DAG;
10548   SDLoc dl(N);
10549   SDValue Op(N, 0);
10550 
10551   SDValue FirstOperand(Op.getOperand(0));
10552   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
10553     (FirstOperand.getValueType() == MVT::i8 ||
10554      FirstOperand.getValueType() == MVT::i16);
10555   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
10556     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
10557     bool DstDouble = Op.getValueType() == MVT::f64;
10558     unsigned ConvOp = Signed ?
10559       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
10560       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
10561     SDValue WidthConst =
10562       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
10563                             dl, false);
10564     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
10565     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
10566     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
10567                                          DAG.getVTList(MVT::f64, MVT::Other),
10568                                          Ops, MVT::i8, LDN->getMemOperand());
10569 
10570     // For signed conversion, we need to sign-extend the value in the VSR
10571     if (Signed) {
10572       SDValue ExtOps[] = { Ld, WidthConst };
10573       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
10574       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
10575     } else
10576       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
10577   }
10578 
10579   // Don't handle ppc_fp128 here or i1 conversions.
10580   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
10581     return SDValue();
10582   if (Op.getOperand(0).getValueType() == MVT::i1)
10583     return SDValue();
10584 
10585   // For i32 intermediate values, unfortunately, the conversion functions
10586   // leave the upper 32 bits of the value are undefined. Within the set of
10587   // scalar instructions, we have no method for zero- or sign-extending the
10588   // value. Thus, we cannot handle i32 intermediate values here.
10589   if (Op.getOperand(0).getValueType() == MVT::i32)
10590     return SDValue();
10591 
10592   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
10593          "UINT_TO_FP is supported only with FPCVT");
10594 
10595   // If we have FCFIDS, then use it when converting to single-precision.
10596   // Otherwise, convert to double-precision and then round.
10597   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
10598                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
10599                                                             : PPCISD::FCFIDS)
10600                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
10601                                                             : PPCISD::FCFID);
10602   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
10603                   ? MVT::f32
10604                   : MVT::f64;
10605 
10606   // If we're converting from a float, to an int, and back to a float again,
10607   // then we don't need the store/load pair at all.
10608   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
10609        Subtarget.hasFPCVT()) ||
10610       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
10611     SDValue Src = Op.getOperand(0).getOperand(0);
10612     if (Src.getValueType() == MVT::f32) {
10613       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
10614       DCI.AddToWorklist(Src.getNode());
10615     } else if (Src.getValueType() != MVT::f64) {
10616       // Make sure that we don't pick up a ppc_fp128 source value.
10617       return SDValue();
10618     }
10619 
10620     unsigned FCTOp =
10621       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
10622                                                         PPCISD::FCTIDUZ;
10623 
10624     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
10625     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
10626 
10627     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
10628       FP = DAG.getNode(ISD::FP_ROUND, dl,
10629                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
10630       DCI.AddToWorklist(FP.getNode());
10631     }
10632 
10633     return FP;
10634   }
10635 
10636   return SDValue();
10637 }
10638 
10639 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
10640 // builtins) into loads with swaps.
10641 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
10642                                               DAGCombinerInfo &DCI) const {
10643   SelectionDAG &DAG = DCI.DAG;
10644   SDLoc dl(N);
10645   SDValue Chain;
10646   SDValue Base;
10647   MachineMemOperand *MMO;
10648 
10649   switch (N->getOpcode()) {
10650   default:
10651     llvm_unreachable("Unexpected opcode for little endian VSX load");
10652   case ISD::LOAD: {
10653     LoadSDNode *LD = cast<LoadSDNode>(N);
10654     Chain = LD->getChain();
10655     Base = LD->getBasePtr();
10656     MMO = LD->getMemOperand();
10657     // If the MMO suggests this isn't a load of a full vector, leave
10658     // things alone.  For a built-in, we have to make the change for
10659     // correctness, so if there is a size problem that will be a bug.
10660     if (MMO->getSize() < 16)
10661       return SDValue();
10662     break;
10663   }
10664   case ISD::INTRINSIC_W_CHAIN: {
10665     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
10666     Chain = Intrin->getChain();
10667     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
10668     // us what we want. Get operand 2 instead.
10669     Base = Intrin->getOperand(2);
10670     MMO = Intrin->getMemOperand();
10671     break;
10672   }
10673   }
10674 
10675   MVT VecTy = N->getValueType(0).getSimpleVT();
10676   SDValue LoadOps[] = { Chain, Base };
10677   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
10678                                          DAG.getVTList(MVT::v2f64, MVT::Other),
10679                                          LoadOps, MVT::v2f64, MMO);
10680 
10681   DCI.AddToWorklist(Load.getNode());
10682   Chain = Load.getValue(1);
10683   SDValue Swap = DAG.getNode(
10684       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
10685   DCI.AddToWorklist(Swap.getNode());
10686 
10687   // Add a bitcast if the resulting load type doesn't match v2f64.
10688   if (VecTy != MVT::v2f64) {
10689     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
10690     DCI.AddToWorklist(N.getNode());
10691     // Package {bitcast value, swap's chain} to match Load's shape.
10692     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
10693                        N, Swap.getValue(1));
10694   }
10695 
10696   return Swap;
10697 }
10698 
10699 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
10700 // builtins) into stores with swaps.
10701 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
10702                                                DAGCombinerInfo &DCI) const {
10703   SelectionDAG &DAG = DCI.DAG;
10704   SDLoc dl(N);
10705   SDValue Chain;
10706   SDValue Base;
10707   unsigned SrcOpnd;
10708   MachineMemOperand *MMO;
10709 
10710   switch (N->getOpcode()) {
10711   default:
10712     llvm_unreachable("Unexpected opcode for little endian VSX store");
10713   case ISD::STORE: {
10714     StoreSDNode *ST = cast<StoreSDNode>(N);
10715     Chain = ST->getChain();
10716     Base = ST->getBasePtr();
10717     MMO = ST->getMemOperand();
10718     SrcOpnd = 1;
10719     // If the MMO suggests this isn't a store of a full vector, leave
10720     // things alone.  For a built-in, we have to make the change for
10721     // correctness, so if there is a size problem that will be a bug.
10722     if (MMO->getSize() < 16)
10723       return SDValue();
10724     break;
10725   }
10726   case ISD::INTRINSIC_VOID: {
10727     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
10728     Chain = Intrin->getChain();
10729     // Intrin->getBasePtr() oddly does not get what we want.
10730     Base = Intrin->getOperand(3);
10731     MMO = Intrin->getMemOperand();
10732     SrcOpnd = 2;
10733     break;
10734   }
10735   }
10736 
10737   SDValue Src = N->getOperand(SrcOpnd);
10738   MVT VecTy = Src.getValueType().getSimpleVT();
10739 
10740   // All stores are done as v2f64 and possible bit cast.
10741   if (VecTy != MVT::v2f64) {
10742     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
10743     DCI.AddToWorklist(Src.getNode());
10744   }
10745 
10746   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
10747                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
10748   DCI.AddToWorklist(Swap.getNode());
10749   Chain = Swap.getValue(1);
10750   SDValue StoreOps[] = { Chain, Swap, Base };
10751   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
10752                                           DAG.getVTList(MVT::Other),
10753                                           StoreOps, VecTy, MMO);
10754   DCI.AddToWorklist(Store.getNode());
10755   return Store;
10756 }
10757 
10758 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
10759                                              DAGCombinerInfo &DCI) const {
10760   SelectionDAG &DAG = DCI.DAG;
10761   SDLoc dl(N);
10762   switch (N->getOpcode()) {
10763   default: break;
10764   case PPCISD::SHL:
10765     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
10766         return N->getOperand(0);
10767     break;
10768   case PPCISD::SRL:
10769     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
10770         return N->getOperand(0);
10771     break;
10772   case PPCISD::SRA:
10773     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
10774       if (C->isNullValue() ||   //  0 >>s V -> 0.
10775           C->isAllOnesValue())    // -1 >>s V -> -1.
10776         return N->getOperand(0);
10777     }
10778     break;
10779   case ISD::SIGN_EXTEND:
10780   case ISD::ZERO_EXTEND:
10781   case ISD::ANY_EXTEND:
10782     return DAGCombineExtBoolTrunc(N, DCI);
10783   case ISD::TRUNCATE:
10784   case ISD::SETCC:
10785   case ISD::SELECT_CC:
10786     return DAGCombineTruncBoolExt(N, DCI);
10787   case ISD::SINT_TO_FP:
10788   case ISD::UINT_TO_FP:
10789     return combineFPToIntToFP(N, DCI);
10790   case ISD::STORE: {
10791     EVT Op1VT = N->getOperand(1).getValueType();
10792     bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
10793       (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
10794 
10795     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
10796     if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
10797         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
10798         ValidTypeForStoreFltAsInt &&
10799         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
10800       SDValue Val = N->getOperand(1).getOperand(0);
10801       if (Val.getValueType() == MVT::f32) {
10802         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
10803         DCI.AddToWorklist(Val.getNode());
10804       }
10805       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
10806       DCI.AddToWorklist(Val.getNode());
10807 
10808       if (Op1VT == MVT::i32) {
10809         SDValue Ops[] = {
10810           N->getOperand(0), Val, N->getOperand(2),
10811           DAG.getValueType(N->getOperand(1).getValueType())
10812         };
10813 
10814         Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
10815                 DAG.getVTList(MVT::Other), Ops,
10816                 cast<StoreSDNode>(N)->getMemoryVT(),
10817                 cast<StoreSDNode>(N)->getMemOperand());
10818       } else {
10819         unsigned WidthInBytes =
10820           N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
10821         SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
10822 
10823         SDValue Ops[] = {
10824           N->getOperand(0), Val, N->getOperand(2), WidthConst,
10825           DAG.getValueType(N->getOperand(1).getValueType())
10826         };
10827         Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
10828                                       DAG.getVTList(MVT::Other), Ops,
10829                                       cast<StoreSDNode>(N)->getMemoryVT(),
10830                                       cast<StoreSDNode>(N)->getMemOperand());
10831       }
10832 
10833       DCI.AddToWorklist(Val.getNode());
10834       return Val;
10835     }
10836 
10837     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
10838     if (cast<StoreSDNode>(N)->isUnindexed() &&
10839         N->getOperand(1).getOpcode() == ISD::BSWAP &&
10840         N->getOperand(1).getNode()->hasOneUse() &&
10841         (N->getOperand(1).getValueType() == MVT::i32 ||
10842          N->getOperand(1).getValueType() == MVT::i16 ||
10843          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
10844           N->getOperand(1).getValueType() == MVT::i64))) {
10845       SDValue BSwapOp = N->getOperand(1).getOperand(0);
10846       // Do an any-extend to 32-bits if this is a half-word input.
10847       if (BSwapOp.getValueType() == MVT::i16)
10848         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
10849 
10850       SDValue Ops[] = {
10851         N->getOperand(0), BSwapOp, N->getOperand(2),
10852         DAG.getValueType(N->getOperand(1).getValueType())
10853       };
10854       return
10855         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
10856                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
10857                                 cast<StoreSDNode>(N)->getMemOperand());
10858     }
10859 
10860     // For little endian, VSX stores require generating xxswapd/lxvd2x.
10861     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
10862     EVT VT = N->getOperand(1).getValueType();
10863     if (VT.isSimple()) {
10864       MVT StoreVT = VT.getSimpleVT();
10865       if (Subtarget.needsSwapsForVSXMemOps() &&
10866           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
10867            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
10868         return expandVSXStoreForLE(N, DCI);
10869     }
10870     break;
10871   }
10872   case ISD::LOAD: {
10873     LoadSDNode *LD = cast<LoadSDNode>(N);
10874     EVT VT = LD->getValueType(0);
10875 
10876     // For little endian, VSX loads require generating lxvd2x/xxswapd.
10877     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
10878     if (VT.isSimple()) {
10879       MVT LoadVT = VT.getSimpleVT();
10880       if (Subtarget.needsSwapsForVSXMemOps() &&
10881           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
10882            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
10883         return expandVSXLoadForLE(N, DCI);
10884     }
10885 
10886     // We sometimes end up with a 64-bit integer load, from which we extract
10887     // two single-precision floating-point numbers. This happens with
10888     // std::complex<float>, and other similar structures, because of the way we
10889     // canonicalize structure copies. However, if we lack direct moves,
10890     // then the final bitcasts from the extracted integer values to the
10891     // floating-point numbers turn into store/load pairs. Even with direct moves,
10892     // just loading the two floating-point numbers is likely better.
10893     auto ReplaceTwoFloatLoad = [&]() {
10894       if (VT != MVT::i64)
10895         return false;
10896 
10897       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
10898           LD->isVolatile())
10899         return false;
10900 
10901       //  We're looking for a sequence like this:
10902       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
10903       //      t16: i64 = srl t13, Constant:i32<32>
10904       //    t17: i32 = truncate t16
10905       //  t18: f32 = bitcast t17
10906       //    t19: i32 = truncate t13
10907       //  t20: f32 = bitcast t19
10908 
10909       if (!LD->hasNUsesOfValue(2, 0))
10910         return false;
10911 
10912       auto UI = LD->use_begin();
10913       while (UI.getUse().getResNo() != 0) ++UI;
10914       SDNode *Trunc = *UI++;
10915       while (UI.getUse().getResNo() != 0) ++UI;
10916       SDNode *RightShift = *UI;
10917       if (Trunc->getOpcode() != ISD::TRUNCATE)
10918         std::swap(Trunc, RightShift);
10919 
10920       if (Trunc->getOpcode() != ISD::TRUNCATE ||
10921           Trunc->getValueType(0) != MVT::i32 ||
10922           !Trunc->hasOneUse())
10923         return false;
10924       if (RightShift->getOpcode() != ISD::SRL ||
10925           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
10926           RightShift->getConstantOperandVal(1) != 32 ||
10927           !RightShift->hasOneUse())
10928         return false;
10929 
10930       SDNode *Trunc2 = *RightShift->use_begin();
10931       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
10932           Trunc2->getValueType(0) != MVT::i32 ||
10933           !Trunc2->hasOneUse())
10934         return false;
10935 
10936       SDNode *Bitcast = *Trunc->use_begin();
10937       SDNode *Bitcast2 = *Trunc2->use_begin();
10938 
10939       if (Bitcast->getOpcode() != ISD::BITCAST ||
10940           Bitcast->getValueType(0) != MVT::f32)
10941         return false;
10942       if (Bitcast2->getOpcode() != ISD::BITCAST ||
10943           Bitcast2->getValueType(0) != MVT::f32)
10944         return false;
10945 
10946       if (Subtarget.isLittleEndian())
10947         std::swap(Bitcast, Bitcast2);
10948 
10949       // Bitcast has the second float (in memory-layout order) and Bitcast2
10950       // has the first one.
10951 
10952       SDValue BasePtr = LD->getBasePtr();
10953       if (LD->isIndexed()) {
10954         assert(LD->getAddressingMode() == ISD::PRE_INC &&
10955                "Non-pre-inc AM on PPC?");
10956         BasePtr =
10957           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10958                       LD->getOffset());
10959       }
10960 
10961       auto MMOFlags =
10962           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
10963       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
10964                                       LD->getPointerInfo(), LD->getAlignment(),
10965                                       MMOFlags, LD->getAAInfo());
10966       SDValue AddPtr =
10967         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
10968                     BasePtr, DAG.getIntPtrConstant(4, dl));
10969       SDValue FloatLoad2 = DAG.getLoad(
10970           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
10971           LD->getPointerInfo().getWithOffset(4),
10972           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
10973 
10974       if (LD->isIndexed()) {
10975         // Note that DAGCombine should re-form any pre-increment load(s) from
10976         // what is produced here if that makes sense.
10977         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
10978       }
10979 
10980       DCI.CombineTo(Bitcast2, FloatLoad);
10981       DCI.CombineTo(Bitcast, FloatLoad2);
10982 
10983       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
10984                                     SDValue(FloatLoad2.getNode(), 1));
10985       return true;
10986     };
10987 
10988     if (ReplaceTwoFloatLoad())
10989       return SDValue(N, 0);
10990 
10991     EVT MemVT = LD->getMemoryVT();
10992     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
10993     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
10994     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
10995     unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
10996     if (LD->isUnindexed() && VT.isVector() &&
10997         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
10998           // P8 and later hardware should just use LOAD.
10999           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
11000                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
11001          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
11002           LD->getAlignment() >= ScalarABIAlignment)) &&
11003         LD->getAlignment() < ABIAlignment) {
11004       // This is a type-legal unaligned Altivec or QPX load.
11005       SDValue Chain = LD->getChain();
11006       SDValue Ptr = LD->getBasePtr();
11007       bool isLittleEndian = Subtarget.isLittleEndian();
11008 
11009       // This implements the loading of unaligned vectors as described in
11010       // the venerable Apple Velocity Engine overview. Specifically:
11011       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
11012       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
11013       //
11014       // The general idea is to expand a sequence of one or more unaligned
11015       // loads into an alignment-based permutation-control instruction (lvsl
11016       // or lvsr), a series of regular vector loads (which always truncate
11017       // their input address to an aligned address), and a series of
11018       // permutations.  The results of these permutations are the requested
11019       // loaded values.  The trick is that the last "extra" load is not taken
11020       // from the address you might suspect (sizeof(vector) bytes after the
11021       // last requested load), but rather sizeof(vector) - 1 bytes after the
11022       // last requested vector. The point of this is to avoid a page fault if
11023       // the base address happened to be aligned. This works because if the
11024       // base address is aligned, then adding less than a full vector length
11025       // will cause the last vector in the sequence to be (re)loaded.
11026       // Otherwise, the next vector will be fetched as you might suspect was
11027       // necessary.
11028 
11029       // We might be able to reuse the permutation generation from
11030       // a different base address offset from this one by an aligned amount.
11031       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
11032       // optimization later.
11033       Intrinsic::ID Intr, IntrLD, IntrPerm;
11034       MVT PermCntlTy, PermTy, LDTy;
11035       if (Subtarget.hasAltivec()) {
11036         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
11037                                  Intrinsic::ppc_altivec_lvsl;
11038         IntrLD = Intrinsic::ppc_altivec_lvx;
11039         IntrPerm = Intrinsic::ppc_altivec_vperm;
11040         PermCntlTy = MVT::v16i8;
11041         PermTy = MVT::v4i32;
11042         LDTy = MVT::v4i32;
11043       } else {
11044         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
11045                                        Intrinsic::ppc_qpx_qvlpcls;
11046         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
11047                                        Intrinsic::ppc_qpx_qvlfs;
11048         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
11049         PermCntlTy = MVT::v4f64;
11050         PermTy = MVT::v4f64;
11051         LDTy = MemVT.getSimpleVT();
11052       }
11053 
11054       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
11055 
11056       // Create the new MMO for the new base load. It is like the original MMO,
11057       // but represents an area in memory almost twice the vector size centered
11058       // on the original address. If the address is unaligned, we might start
11059       // reading up to (sizeof(vector)-1) bytes below the address of the
11060       // original unaligned load.
11061       MachineFunction &MF = DAG.getMachineFunction();
11062       MachineMemOperand *BaseMMO =
11063         MF.getMachineMemOperand(LD->getMemOperand(),
11064                                 -(long)MemVT.getStoreSize()+1,
11065                                 2*MemVT.getStoreSize()-1);
11066 
11067       // Create the new base load.
11068       SDValue LDXIntID =
11069           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
11070       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
11071       SDValue BaseLoad =
11072         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
11073                                 DAG.getVTList(PermTy, MVT::Other),
11074                                 BaseLoadOps, LDTy, BaseMMO);
11075 
11076       // Note that the value of IncOffset (which is provided to the next
11077       // load's pointer info offset value, and thus used to calculate the
11078       // alignment), and the value of IncValue (which is actually used to
11079       // increment the pointer value) are different! This is because we
11080       // require the next load to appear to be aligned, even though it
11081       // is actually offset from the base pointer by a lesser amount.
11082       int IncOffset = VT.getSizeInBits() / 8;
11083       int IncValue = IncOffset;
11084 
11085       // Walk (both up and down) the chain looking for another load at the real
11086       // (aligned) offset (the alignment of the other load does not matter in
11087       // this case). If found, then do not use the offset reduction trick, as
11088       // that will prevent the loads from being later combined (as they would
11089       // otherwise be duplicates).
11090       if (!findConsecutiveLoad(LD, DAG))
11091         --IncValue;
11092 
11093       SDValue Increment =
11094           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
11095       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
11096 
11097       MachineMemOperand *ExtraMMO =
11098         MF.getMachineMemOperand(LD->getMemOperand(),
11099                                 1, 2*MemVT.getStoreSize()-1);
11100       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
11101       SDValue ExtraLoad =
11102         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
11103                                 DAG.getVTList(PermTy, MVT::Other),
11104                                 ExtraLoadOps, LDTy, ExtraMMO);
11105 
11106       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
11107         BaseLoad.getValue(1), ExtraLoad.getValue(1));
11108 
11109       // Because vperm has a big-endian bias, we must reverse the order
11110       // of the input vectors and complement the permute control vector
11111       // when generating little endian code.  We have already handled the
11112       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
11113       // and ExtraLoad here.
11114       SDValue Perm;
11115       if (isLittleEndian)
11116         Perm = BuildIntrinsicOp(IntrPerm,
11117                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
11118       else
11119         Perm = BuildIntrinsicOp(IntrPerm,
11120                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
11121 
11122       if (VT != PermTy)
11123         Perm = Subtarget.hasAltivec() ?
11124                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
11125                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
11126                                DAG.getTargetConstant(1, dl, MVT::i64));
11127                                // second argument is 1 because this rounding
11128                                // is always exact.
11129 
11130       // The output of the permutation is our loaded result, the TokenFactor is
11131       // our new chain.
11132       DCI.CombineTo(N, Perm, TF);
11133       return SDValue(N, 0);
11134     }
11135     }
11136     break;
11137     case ISD::INTRINSIC_WO_CHAIN: {
11138       bool isLittleEndian = Subtarget.isLittleEndian();
11139       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
11140       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
11141                                            : Intrinsic::ppc_altivec_lvsl);
11142       if ((IID == Intr ||
11143            IID == Intrinsic::ppc_qpx_qvlpcld  ||
11144            IID == Intrinsic::ppc_qpx_qvlpcls) &&
11145         N->getOperand(1)->getOpcode() == ISD::ADD) {
11146         SDValue Add = N->getOperand(1);
11147 
11148         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
11149                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
11150 
11151         if (DAG.MaskedValueIsZero(Add->getOperand(1),
11152                                   APInt::getAllOnesValue(Bits /* alignment */)
11153                                       .zext(Add.getScalarValueSizeInBits()))) {
11154           SDNode *BasePtr = Add->getOperand(0).getNode();
11155           for (SDNode::use_iterator UI = BasePtr->use_begin(),
11156                                     UE = BasePtr->use_end();
11157                UI != UE; ++UI) {
11158             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
11159                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
11160               // We've found another LVSL/LVSR, and this address is an aligned
11161               // multiple of that one. The results will be the same, so use the
11162               // one we've just found instead.
11163 
11164               return SDValue(*UI, 0);
11165             }
11166           }
11167         }
11168 
11169         if (isa<ConstantSDNode>(Add->getOperand(1))) {
11170           SDNode *BasePtr = Add->getOperand(0).getNode();
11171           for (SDNode::use_iterator UI = BasePtr->use_begin(),
11172                UE = BasePtr->use_end(); UI != UE; ++UI) {
11173             if (UI->getOpcode() == ISD::ADD &&
11174                 isa<ConstantSDNode>(UI->getOperand(1)) &&
11175                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
11176                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
11177                 (1ULL << Bits) == 0) {
11178               SDNode *OtherAdd = *UI;
11179               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
11180                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
11181                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
11182                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
11183                   return SDValue(*VI, 0);
11184                 }
11185               }
11186             }
11187           }
11188         }
11189       }
11190     }
11191 
11192     break;
11193   case ISD::INTRINSIC_W_CHAIN: {
11194     // For little endian, VSX loads require generating lxvd2x/xxswapd.
11195     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
11196     if (Subtarget.needsSwapsForVSXMemOps()) {
11197       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11198       default:
11199         break;
11200       case Intrinsic::ppc_vsx_lxvw4x:
11201       case Intrinsic::ppc_vsx_lxvd2x:
11202         return expandVSXLoadForLE(N, DCI);
11203       }
11204     }
11205     break;
11206   }
11207   case ISD::INTRINSIC_VOID: {
11208     // For little endian, VSX stores require generating xxswapd/stxvd2x.
11209     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
11210     if (Subtarget.needsSwapsForVSXMemOps()) {
11211       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11212       default:
11213         break;
11214       case Intrinsic::ppc_vsx_stxvw4x:
11215       case Intrinsic::ppc_vsx_stxvd2x:
11216         return expandVSXStoreForLE(N, DCI);
11217       }
11218     }
11219     break;
11220   }
11221   case ISD::BSWAP:
11222     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
11223     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
11224         N->getOperand(0).hasOneUse() &&
11225         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
11226          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
11227           N->getValueType(0) == MVT::i64))) {
11228       SDValue Load = N->getOperand(0);
11229       LoadSDNode *LD = cast<LoadSDNode>(Load);
11230       // Create the byte-swapping load.
11231       SDValue Ops[] = {
11232         LD->getChain(),    // Chain
11233         LD->getBasePtr(),  // Ptr
11234         DAG.getValueType(N->getValueType(0)) // VT
11235       };
11236       SDValue BSLoad =
11237         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
11238                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
11239                                               MVT::i64 : MVT::i32, MVT::Other),
11240                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
11241 
11242       // If this is an i16 load, insert the truncate.
11243       SDValue ResVal = BSLoad;
11244       if (N->getValueType(0) == MVT::i16)
11245         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
11246 
11247       // First, combine the bswap away.  This makes the value produced by the
11248       // load dead.
11249       DCI.CombineTo(N, ResVal);
11250 
11251       // Next, combine the load away, we give it a bogus result value but a real
11252       // chain result.  The result value is dead because the bswap is dead.
11253       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
11254 
11255       // Return N so it doesn't get rechecked!
11256       return SDValue(N, 0);
11257     }
11258 
11259     break;
11260   case PPCISD::VCMP: {
11261     // If a VCMPo node already exists with exactly the same operands as this
11262     // node, use its result instead of this node (VCMPo computes both a CR6 and
11263     // a normal output).
11264     //
11265     if (!N->getOperand(0).hasOneUse() &&
11266         !N->getOperand(1).hasOneUse() &&
11267         !N->getOperand(2).hasOneUse()) {
11268 
11269       // Scan all of the users of the LHS, looking for VCMPo's that match.
11270       SDNode *VCMPoNode = nullptr;
11271 
11272       SDNode *LHSN = N->getOperand(0).getNode();
11273       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
11274            UI != E; ++UI)
11275         if (UI->getOpcode() == PPCISD::VCMPo &&
11276             UI->getOperand(1) == N->getOperand(1) &&
11277             UI->getOperand(2) == N->getOperand(2) &&
11278             UI->getOperand(0) == N->getOperand(0)) {
11279           VCMPoNode = *UI;
11280           break;
11281         }
11282 
11283       // If there is no VCMPo node, or if the flag value has a single use, don't
11284       // transform this.
11285       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
11286         break;
11287 
11288       // Look at the (necessarily single) use of the flag value.  If it has a
11289       // chain, this transformation is more complex.  Note that multiple things
11290       // could use the value result, which we should ignore.
11291       SDNode *FlagUser = nullptr;
11292       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
11293            FlagUser == nullptr; ++UI) {
11294         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
11295         SDNode *User = *UI;
11296         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
11297           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
11298             FlagUser = User;
11299             break;
11300           }
11301         }
11302       }
11303 
11304       // If the user is a MFOCRF instruction, we know this is safe.
11305       // Otherwise we give up for right now.
11306       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
11307         return SDValue(VCMPoNode, 0);
11308     }
11309     break;
11310   }
11311   case ISD::BRCOND: {
11312     SDValue Cond = N->getOperand(1);
11313     SDValue Target = N->getOperand(2);
11314 
11315     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
11316         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
11317           Intrinsic::ppc_is_decremented_ctr_nonzero) {
11318 
11319       // We now need to make the intrinsic dead (it cannot be instruction
11320       // selected).
11321       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
11322       assert(Cond.getNode()->hasOneUse() &&
11323              "Counter decrement has more than one use");
11324 
11325       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
11326                          N->getOperand(0), Target);
11327     }
11328   }
11329   break;
11330   case ISD::BR_CC: {
11331     // If this is a branch on an altivec predicate comparison, lower this so
11332     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
11333     // lowering is done pre-legalize, because the legalizer lowers the predicate
11334     // compare down to code that is difficult to reassemble.
11335     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
11336     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
11337 
11338     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
11339     // value. If so, pass-through the AND to get to the intrinsic.
11340     if (LHS.getOpcode() == ISD::AND &&
11341         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
11342         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
11343           Intrinsic::ppc_is_decremented_ctr_nonzero &&
11344         isa<ConstantSDNode>(LHS.getOperand(1)) &&
11345         !isNullConstant(LHS.getOperand(1)))
11346       LHS = LHS.getOperand(0);
11347 
11348     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
11349         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
11350           Intrinsic::ppc_is_decremented_ctr_nonzero &&
11351         isa<ConstantSDNode>(RHS)) {
11352       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
11353              "Counter decrement comparison is not EQ or NE");
11354 
11355       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
11356       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
11357                     (CC == ISD::SETNE && !Val);
11358 
11359       // We now need to make the intrinsic dead (it cannot be instruction
11360       // selected).
11361       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
11362       assert(LHS.getNode()->hasOneUse() &&
11363              "Counter decrement has more than one use");
11364 
11365       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
11366                          N->getOperand(0), N->getOperand(4));
11367     }
11368 
11369     int CompareOpc;
11370     bool isDot;
11371 
11372     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
11373         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
11374         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
11375       assert(isDot && "Can't compare against a vector result!");
11376 
11377       // If this is a comparison against something other than 0/1, then we know
11378       // that the condition is never/always true.
11379       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
11380       if (Val != 0 && Val != 1) {
11381         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
11382           return N->getOperand(0);
11383         // Always !=, turn it into an unconditional branch.
11384         return DAG.getNode(ISD::BR, dl, MVT::Other,
11385                            N->getOperand(0), N->getOperand(4));
11386       }
11387 
11388       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
11389 
11390       // Create the PPCISD altivec 'dot' comparison node.
11391       SDValue Ops[] = {
11392         LHS.getOperand(2),  // LHS of compare
11393         LHS.getOperand(3),  // RHS of compare
11394         DAG.getConstant(CompareOpc, dl, MVT::i32)
11395       };
11396       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
11397       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
11398 
11399       // Unpack the result based on how the target uses it.
11400       PPC::Predicate CompOpc;
11401       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
11402       default:  // Can't happen, don't crash on invalid number though.
11403       case 0:   // Branch on the value of the EQ bit of CR6.
11404         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
11405         break;
11406       case 1:   // Branch on the inverted value of the EQ bit of CR6.
11407         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
11408         break;
11409       case 2:   // Branch on the value of the LT bit of CR6.
11410         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
11411         break;
11412       case 3:   // Branch on the inverted value of the LT bit of CR6.
11413         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
11414         break;
11415       }
11416 
11417       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
11418                          DAG.getConstant(CompOpc, dl, MVT::i32),
11419                          DAG.getRegister(PPC::CR6, MVT::i32),
11420                          N->getOperand(4), CompNode.getValue(1));
11421     }
11422     break;
11423   }
11424   case ISD::BUILD_VECTOR:
11425     return DAGCombineBuildVector(N, DCI);
11426   }
11427 
11428   return SDValue();
11429 }
11430 
11431 SDValue
11432 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
11433                                   SelectionDAG &DAG,
11434                                   std::vector<SDNode *> *Created) const {
11435   // fold (sdiv X, pow2)
11436   EVT VT = N->getValueType(0);
11437   if (VT == MVT::i64 && !Subtarget.isPPC64())
11438     return SDValue();
11439   if ((VT != MVT::i32 && VT != MVT::i64) ||
11440       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
11441     return SDValue();
11442 
11443   SDLoc DL(N);
11444   SDValue N0 = N->getOperand(0);
11445 
11446   bool IsNegPow2 = (-Divisor).isPowerOf2();
11447   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
11448   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
11449 
11450   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
11451   if (Created)
11452     Created->push_back(Op.getNode());
11453 
11454   if (IsNegPow2) {
11455     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
11456     if (Created)
11457       Created->push_back(Op.getNode());
11458   }
11459 
11460   return Op;
11461 }
11462 
11463 //===----------------------------------------------------------------------===//
11464 // Inline Assembly Support
11465 //===----------------------------------------------------------------------===//
11466 
11467 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
11468                                                       APInt &KnownZero,
11469                                                       APInt &KnownOne,
11470                                                       const SelectionDAG &DAG,
11471                                                       unsigned Depth) const {
11472   KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
11473   switch (Op.getOpcode()) {
11474   default: break;
11475   case PPCISD::LBRX: {
11476     // lhbrx is known to have the top bits cleared out.
11477     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
11478       KnownZero = 0xFFFF0000;
11479     break;
11480   }
11481   case ISD::INTRINSIC_WO_CHAIN: {
11482     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
11483     default: break;
11484     case Intrinsic::ppc_altivec_vcmpbfp_p:
11485     case Intrinsic::ppc_altivec_vcmpeqfp_p:
11486     case Intrinsic::ppc_altivec_vcmpequb_p:
11487     case Intrinsic::ppc_altivec_vcmpequh_p:
11488     case Intrinsic::ppc_altivec_vcmpequw_p:
11489     case Intrinsic::ppc_altivec_vcmpequd_p:
11490     case Intrinsic::ppc_altivec_vcmpgefp_p:
11491     case Intrinsic::ppc_altivec_vcmpgtfp_p:
11492     case Intrinsic::ppc_altivec_vcmpgtsb_p:
11493     case Intrinsic::ppc_altivec_vcmpgtsh_p:
11494     case Intrinsic::ppc_altivec_vcmpgtsw_p:
11495     case Intrinsic::ppc_altivec_vcmpgtsd_p:
11496     case Intrinsic::ppc_altivec_vcmpgtub_p:
11497     case Intrinsic::ppc_altivec_vcmpgtuh_p:
11498     case Intrinsic::ppc_altivec_vcmpgtuw_p:
11499     case Intrinsic::ppc_altivec_vcmpgtud_p:
11500       KnownZero = ~1U;  // All bits but the low one are known to be zero.
11501       break;
11502     }
11503   }
11504   }
11505 }
11506 
11507 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
11508   switch (Subtarget.getDarwinDirective()) {
11509   default: break;
11510   case PPC::DIR_970:
11511   case PPC::DIR_PWR4:
11512   case PPC::DIR_PWR5:
11513   case PPC::DIR_PWR5X:
11514   case PPC::DIR_PWR6:
11515   case PPC::DIR_PWR6X:
11516   case PPC::DIR_PWR7:
11517   case PPC::DIR_PWR8:
11518   case PPC::DIR_PWR9: {
11519     if (!ML)
11520       break;
11521 
11522     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11523 
11524     // For small loops (between 5 and 8 instructions), align to a 32-byte
11525     // boundary so that the entire loop fits in one instruction-cache line.
11526     uint64_t LoopSize = 0;
11527     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
11528       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
11529         LoopSize += TII->getInstSizeInBytes(*J);
11530         if (LoopSize > 32)
11531           break;
11532       }
11533 
11534     if (LoopSize > 16 && LoopSize <= 32)
11535       return 5;
11536 
11537     break;
11538   }
11539   }
11540 
11541   return TargetLowering::getPrefLoopAlignment(ML);
11542 }
11543 
11544 /// getConstraintType - Given a constraint, return the type of
11545 /// constraint it is for this target.
11546 PPCTargetLowering::ConstraintType
11547 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
11548   if (Constraint.size() == 1) {
11549     switch (Constraint[0]) {
11550     default: break;
11551     case 'b':
11552     case 'r':
11553     case 'f':
11554     case 'd':
11555     case 'v':
11556     case 'y':
11557       return C_RegisterClass;
11558     case 'Z':
11559       // FIXME: While Z does indicate a memory constraint, it specifically
11560       // indicates an r+r address (used in conjunction with the 'y' modifier
11561       // in the replacement string). Currently, we're forcing the base
11562       // register to be r0 in the asm printer (which is interpreted as zero)
11563       // and forming the complete address in the second register. This is
11564       // suboptimal.
11565       return C_Memory;
11566     }
11567   } else if (Constraint == "wc") { // individual CR bits.
11568     return C_RegisterClass;
11569   } else if (Constraint == "wa" || Constraint == "wd" ||
11570              Constraint == "wf" || Constraint == "ws") {
11571     return C_RegisterClass; // VSX registers.
11572   }
11573   return TargetLowering::getConstraintType(Constraint);
11574 }
11575 
11576 /// Examine constraint type and operand type and determine a weight value.
11577 /// This object must already have been set up with the operand type
11578 /// and the current alternative constraint selected.
11579 TargetLowering::ConstraintWeight
11580 PPCTargetLowering::getSingleConstraintMatchWeight(
11581     AsmOperandInfo &info, const char *constraint) const {
11582   ConstraintWeight weight = CW_Invalid;
11583   Value *CallOperandVal = info.CallOperandVal;
11584     // If we don't have a value, we can't do a match,
11585     // but allow it at the lowest weight.
11586   if (!CallOperandVal)
11587     return CW_Default;
11588   Type *type = CallOperandVal->getType();
11589 
11590   // Look at the constraint type.
11591   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
11592     return CW_Register; // an individual CR bit.
11593   else if ((StringRef(constraint) == "wa" ||
11594             StringRef(constraint) == "wd" ||
11595             StringRef(constraint) == "wf") &&
11596            type->isVectorTy())
11597     return CW_Register;
11598   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
11599     return CW_Register;
11600 
11601   switch (*constraint) {
11602   default:
11603     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
11604     break;
11605   case 'b':
11606     if (type->isIntegerTy())
11607       weight = CW_Register;
11608     break;
11609   case 'f':
11610     if (type->isFloatTy())
11611       weight = CW_Register;
11612     break;
11613   case 'd':
11614     if (type->isDoubleTy())
11615       weight = CW_Register;
11616     break;
11617   case 'v':
11618     if (type->isVectorTy())
11619       weight = CW_Register;
11620     break;
11621   case 'y':
11622     weight = CW_Register;
11623     break;
11624   case 'Z':
11625     weight = CW_Memory;
11626     break;
11627   }
11628   return weight;
11629 }
11630 
11631 std::pair<unsigned, const TargetRegisterClass *>
11632 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
11633                                                 StringRef Constraint,
11634                                                 MVT VT) const {
11635   if (Constraint.size() == 1) {
11636     // GCC RS6000 Constraint Letters
11637     switch (Constraint[0]) {
11638     case 'b':   // R1-R31
11639       if (VT == MVT::i64 && Subtarget.isPPC64())
11640         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
11641       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
11642     case 'r':   // R0-R31
11643       if (VT == MVT::i64 && Subtarget.isPPC64())
11644         return std::make_pair(0U, &PPC::G8RCRegClass);
11645       return std::make_pair(0U, &PPC::GPRCRegClass);
11646     // 'd' and 'f' constraints are both defined to be "the floating point
11647     // registers", where one is for 32-bit and the other for 64-bit. We don't
11648     // really care overly much here so just give them all the same reg classes.
11649     case 'd':
11650     case 'f':
11651       if (VT == MVT::f32 || VT == MVT::i32)
11652         return std::make_pair(0U, &PPC::F4RCRegClass);
11653       if (VT == MVT::f64 || VT == MVT::i64)
11654         return std::make_pair(0U, &PPC::F8RCRegClass);
11655       if (VT == MVT::v4f64 && Subtarget.hasQPX())
11656         return std::make_pair(0U, &PPC::QFRCRegClass);
11657       if (VT == MVT::v4f32 && Subtarget.hasQPX())
11658         return std::make_pair(0U, &PPC::QSRCRegClass);
11659       break;
11660     case 'v':
11661       if (VT == MVT::v4f64 && Subtarget.hasQPX())
11662         return std::make_pair(0U, &PPC::QFRCRegClass);
11663       if (VT == MVT::v4f32 && Subtarget.hasQPX())
11664         return std::make_pair(0U, &PPC::QSRCRegClass);
11665       if (Subtarget.hasAltivec())
11666         return std::make_pair(0U, &PPC::VRRCRegClass);
11667     case 'y':   // crrc
11668       return std::make_pair(0U, &PPC::CRRCRegClass);
11669     }
11670   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
11671     // An individual CR bit.
11672     return std::make_pair(0U, &PPC::CRBITRCRegClass);
11673   } else if ((Constraint == "wa" || Constraint == "wd" ||
11674              Constraint == "wf") && Subtarget.hasVSX()) {
11675     return std::make_pair(0U, &PPC::VSRCRegClass);
11676   } else if (Constraint == "ws" && Subtarget.hasVSX()) {
11677     if (VT == MVT::f32 && Subtarget.hasP8Vector())
11678       return std::make_pair(0U, &PPC::VSSRCRegClass);
11679     else
11680       return std::make_pair(0U, &PPC::VSFRCRegClass);
11681   }
11682 
11683   std::pair<unsigned, const TargetRegisterClass *> R =
11684       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
11685 
11686   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
11687   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
11688   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
11689   // register.
11690   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
11691   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
11692   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
11693       PPC::GPRCRegClass.contains(R.first))
11694     return std::make_pair(TRI->getMatchingSuperReg(R.first,
11695                             PPC::sub_32, &PPC::G8RCRegClass),
11696                           &PPC::G8RCRegClass);
11697 
11698   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
11699   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
11700     R.first = PPC::CR0;
11701     R.second = &PPC::CRRCRegClass;
11702   }
11703 
11704   return R;
11705 }
11706 
11707 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
11708 /// vector.  If it is invalid, don't add anything to Ops.
11709 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
11710                                                      std::string &Constraint,
11711                                                      std::vector<SDValue>&Ops,
11712                                                      SelectionDAG &DAG) const {
11713   SDValue Result;
11714 
11715   // Only support length 1 constraints.
11716   if (Constraint.length() > 1) return;
11717 
11718   char Letter = Constraint[0];
11719   switch (Letter) {
11720   default: break;
11721   case 'I':
11722   case 'J':
11723   case 'K':
11724   case 'L':
11725   case 'M':
11726   case 'N':
11727   case 'O':
11728   case 'P': {
11729     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
11730     if (!CST) return; // Must be an immediate to match.
11731     SDLoc dl(Op);
11732     int64_t Value = CST->getSExtValue();
11733     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
11734                          // numbers are printed as such.
11735     switch (Letter) {
11736     default: llvm_unreachable("Unknown constraint letter!");
11737     case 'I':  // "I" is a signed 16-bit constant.
11738       if (isInt<16>(Value))
11739         Result = DAG.getTargetConstant(Value, dl, TCVT);
11740       break;
11741     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
11742       if (isShiftedUInt<16, 16>(Value))
11743         Result = DAG.getTargetConstant(Value, dl, TCVT);
11744       break;
11745     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
11746       if (isShiftedInt<16, 16>(Value))
11747         Result = DAG.getTargetConstant(Value, dl, TCVT);
11748       break;
11749     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
11750       if (isUInt<16>(Value))
11751         Result = DAG.getTargetConstant(Value, dl, TCVT);
11752       break;
11753     case 'M':  // "M" is a constant that is greater than 31.
11754       if (Value > 31)
11755         Result = DAG.getTargetConstant(Value, dl, TCVT);
11756       break;
11757     case 'N':  // "N" is a positive constant that is an exact power of two.
11758       if (Value > 0 && isPowerOf2_64(Value))
11759         Result = DAG.getTargetConstant(Value, dl, TCVT);
11760       break;
11761     case 'O':  // "O" is the constant zero.
11762       if (Value == 0)
11763         Result = DAG.getTargetConstant(Value, dl, TCVT);
11764       break;
11765     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
11766       if (isInt<16>(-Value))
11767         Result = DAG.getTargetConstant(Value, dl, TCVT);
11768       break;
11769     }
11770     break;
11771   }
11772   }
11773 
11774   if (Result.getNode()) {
11775     Ops.push_back(Result);
11776     return;
11777   }
11778 
11779   // Handle standard constraint letters.
11780   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
11781 }
11782 
11783 // isLegalAddressingMode - Return true if the addressing mode represented
11784 // by AM is legal for this target, for a load/store of the specified type.
11785 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
11786                                               const AddrMode &AM, Type *Ty,
11787                                               unsigned AS) const {
11788   // PPC does not allow r+i addressing modes for vectors!
11789   if (Ty->isVectorTy() && AM.BaseOffs != 0)
11790     return false;
11791 
11792   // PPC allows a sign-extended 16-bit immediate field.
11793   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
11794     return false;
11795 
11796   // No global is ever allowed as a base.
11797   if (AM.BaseGV)
11798     return false;
11799 
11800   // PPC only support r+r,
11801   switch (AM.Scale) {
11802   case 0:  // "r+i" or just "i", depending on HasBaseReg.
11803     break;
11804   case 1:
11805     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
11806       return false;
11807     // Otherwise we have r+r or r+i.
11808     break;
11809   case 2:
11810     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
11811       return false;
11812     // Allow 2*r as r+r.
11813     break;
11814   default:
11815     // No other scales are supported.
11816     return false;
11817   }
11818 
11819   return true;
11820 }
11821 
11822 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
11823                                            SelectionDAG &DAG) const {
11824   MachineFunction &MF = DAG.getMachineFunction();
11825   MachineFrameInfo &MFI = MF.getFrameInfo();
11826   MFI.setReturnAddressIsTaken(true);
11827 
11828   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
11829     return SDValue();
11830 
11831   SDLoc dl(Op);
11832   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11833 
11834   // Make sure the function does not optimize away the store of the RA to
11835   // the stack.
11836   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
11837   FuncInfo->setLRStoreRequired();
11838   bool isPPC64 = Subtarget.isPPC64();
11839   auto PtrVT = getPointerTy(MF.getDataLayout());
11840 
11841   if (Depth > 0) {
11842     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
11843     SDValue Offset =
11844         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
11845                         isPPC64 ? MVT::i64 : MVT::i32);
11846     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
11847                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
11848                        MachinePointerInfo());
11849   }
11850 
11851   // Just load the return address off the stack.
11852   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
11853   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
11854                      MachinePointerInfo());
11855 }
11856 
11857 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
11858                                           SelectionDAG &DAG) const {
11859   SDLoc dl(Op);
11860   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11861 
11862   MachineFunction &MF = DAG.getMachineFunction();
11863   MachineFrameInfo &MFI = MF.getFrameInfo();
11864   MFI.setFrameAddressIsTaken(true);
11865 
11866   EVT PtrVT = getPointerTy(MF.getDataLayout());
11867   bool isPPC64 = PtrVT == MVT::i64;
11868 
11869   // Naked functions never have a frame pointer, and so we use r1. For all
11870   // other functions, this decision must be delayed until during PEI.
11871   unsigned FrameReg;
11872   if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
11873     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
11874   else
11875     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
11876 
11877   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
11878                                          PtrVT);
11879   while (Depth--)
11880     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
11881                             FrameAddr, MachinePointerInfo());
11882   return FrameAddr;
11883 }
11884 
11885 // FIXME? Maybe this could be a TableGen attribute on some registers and
11886 // this table could be generated automatically from RegInfo.
11887 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
11888                                               SelectionDAG &DAG) const {
11889   bool isPPC64 = Subtarget.isPPC64();
11890   bool isDarwinABI = Subtarget.isDarwinABI();
11891 
11892   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
11893       (!isPPC64 && VT != MVT::i32))
11894     report_fatal_error("Invalid register global variable type");
11895 
11896   bool is64Bit = isPPC64 && VT == MVT::i64;
11897   unsigned Reg = StringSwitch<unsigned>(RegName)
11898                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
11899                    .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
11900                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
11901                                   (is64Bit ? PPC::X13 : PPC::R13))
11902                    .Default(0);
11903 
11904   if (Reg)
11905     return Reg;
11906   report_fatal_error("Invalid register name global variable");
11907 }
11908 
11909 bool
11910 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
11911   // The PowerPC target isn't yet aware of offsets.
11912   return false;
11913 }
11914 
11915 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
11916                                            const CallInst &I,
11917                                            unsigned Intrinsic) const {
11918 
11919   switch (Intrinsic) {
11920   case Intrinsic::ppc_qpx_qvlfd:
11921   case Intrinsic::ppc_qpx_qvlfs:
11922   case Intrinsic::ppc_qpx_qvlfcd:
11923   case Intrinsic::ppc_qpx_qvlfcs:
11924   case Intrinsic::ppc_qpx_qvlfiwa:
11925   case Intrinsic::ppc_qpx_qvlfiwz:
11926   case Intrinsic::ppc_altivec_lvx:
11927   case Intrinsic::ppc_altivec_lvxl:
11928   case Intrinsic::ppc_altivec_lvebx:
11929   case Intrinsic::ppc_altivec_lvehx:
11930   case Intrinsic::ppc_altivec_lvewx:
11931   case Intrinsic::ppc_vsx_lxvd2x:
11932   case Intrinsic::ppc_vsx_lxvw4x: {
11933     EVT VT;
11934     switch (Intrinsic) {
11935     case Intrinsic::ppc_altivec_lvebx:
11936       VT = MVT::i8;
11937       break;
11938     case Intrinsic::ppc_altivec_lvehx:
11939       VT = MVT::i16;
11940       break;
11941     case Intrinsic::ppc_altivec_lvewx:
11942       VT = MVT::i32;
11943       break;
11944     case Intrinsic::ppc_vsx_lxvd2x:
11945       VT = MVT::v2f64;
11946       break;
11947     case Intrinsic::ppc_qpx_qvlfd:
11948       VT = MVT::v4f64;
11949       break;
11950     case Intrinsic::ppc_qpx_qvlfs:
11951       VT = MVT::v4f32;
11952       break;
11953     case Intrinsic::ppc_qpx_qvlfcd:
11954       VT = MVT::v2f64;
11955       break;
11956     case Intrinsic::ppc_qpx_qvlfcs:
11957       VT = MVT::v2f32;
11958       break;
11959     default:
11960       VT = MVT::v4i32;
11961       break;
11962     }
11963 
11964     Info.opc = ISD::INTRINSIC_W_CHAIN;
11965     Info.memVT = VT;
11966     Info.ptrVal = I.getArgOperand(0);
11967     Info.offset = -VT.getStoreSize()+1;
11968     Info.size = 2*VT.getStoreSize()-1;
11969     Info.align = 1;
11970     Info.vol = false;
11971     Info.readMem = true;
11972     Info.writeMem = false;
11973     return true;
11974   }
11975   case Intrinsic::ppc_qpx_qvlfda:
11976   case Intrinsic::ppc_qpx_qvlfsa:
11977   case Intrinsic::ppc_qpx_qvlfcda:
11978   case Intrinsic::ppc_qpx_qvlfcsa:
11979   case Intrinsic::ppc_qpx_qvlfiwaa:
11980   case Intrinsic::ppc_qpx_qvlfiwza: {
11981     EVT VT;
11982     switch (Intrinsic) {
11983     case Intrinsic::ppc_qpx_qvlfda:
11984       VT = MVT::v4f64;
11985       break;
11986     case Intrinsic::ppc_qpx_qvlfsa:
11987       VT = MVT::v4f32;
11988       break;
11989     case Intrinsic::ppc_qpx_qvlfcda:
11990       VT = MVT::v2f64;
11991       break;
11992     case Intrinsic::ppc_qpx_qvlfcsa:
11993       VT = MVT::v2f32;
11994       break;
11995     default:
11996       VT = MVT::v4i32;
11997       break;
11998     }
11999 
12000     Info.opc = ISD::INTRINSIC_W_CHAIN;
12001     Info.memVT = VT;
12002     Info.ptrVal = I.getArgOperand(0);
12003     Info.offset = 0;
12004     Info.size = VT.getStoreSize();
12005     Info.align = 1;
12006     Info.vol = false;
12007     Info.readMem = true;
12008     Info.writeMem = false;
12009     return true;
12010   }
12011   case Intrinsic::ppc_qpx_qvstfd:
12012   case Intrinsic::ppc_qpx_qvstfs:
12013   case Intrinsic::ppc_qpx_qvstfcd:
12014   case Intrinsic::ppc_qpx_qvstfcs:
12015   case Intrinsic::ppc_qpx_qvstfiw:
12016   case Intrinsic::ppc_altivec_stvx:
12017   case Intrinsic::ppc_altivec_stvxl:
12018   case Intrinsic::ppc_altivec_stvebx:
12019   case Intrinsic::ppc_altivec_stvehx:
12020   case Intrinsic::ppc_altivec_stvewx:
12021   case Intrinsic::ppc_vsx_stxvd2x:
12022   case Intrinsic::ppc_vsx_stxvw4x: {
12023     EVT VT;
12024     switch (Intrinsic) {
12025     case Intrinsic::ppc_altivec_stvebx:
12026       VT = MVT::i8;
12027       break;
12028     case Intrinsic::ppc_altivec_stvehx:
12029       VT = MVT::i16;
12030       break;
12031     case Intrinsic::ppc_altivec_stvewx:
12032       VT = MVT::i32;
12033       break;
12034     case Intrinsic::ppc_vsx_stxvd2x:
12035       VT = MVT::v2f64;
12036       break;
12037     case Intrinsic::ppc_qpx_qvstfd:
12038       VT = MVT::v4f64;
12039       break;
12040     case Intrinsic::ppc_qpx_qvstfs:
12041       VT = MVT::v4f32;
12042       break;
12043     case Intrinsic::ppc_qpx_qvstfcd:
12044       VT = MVT::v2f64;
12045       break;
12046     case Intrinsic::ppc_qpx_qvstfcs:
12047       VT = MVT::v2f32;
12048       break;
12049     default:
12050       VT = MVT::v4i32;
12051       break;
12052     }
12053 
12054     Info.opc = ISD::INTRINSIC_VOID;
12055     Info.memVT = VT;
12056     Info.ptrVal = I.getArgOperand(1);
12057     Info.offset = -VT.getStoreSize()+1;
12058     Info.size = 2*VT.getStoreSize()-1;
12059     Info.align = 1;
12060     Info.vol = false;
12061     Info.readMem = false;
12062     Info.writeMem = true;
12063     return true;
12064   }
12065   case Intrinsic::ppc_qpx_qvstfda:
12066   case Intrinsic::ppc_qpx_qvstfsa:
12067   case Intrinsic::ppc_qpx_qvstfcda:
12068   case Intrinsic::ppc_qpx_qvstfcsa:
12069   case Intrinsic::ppc_qpx_qvstfiwa: {
12070     EVT VT;
12071     switch (Intrinsic) {
12072     case Intrinsic::ppc_qpx_qvstfda:
12073       VT = MVT::v4f64;
12074       break;
12075     case Intrinsic::ppc_qpx_qvstfsa:
12076       VT = MVT::v4f32;
12077       break;
12078     case Intrinsic::ppc_qpx_qvstfcda:
12079       VT = MVT::v2f64;
12080       break;
12081     case Intrinsic::ppc_qpx_qvstfcsa:
12082       VT = MVT::v2f32;
12083       break;
12084     default:
12085       VT = MVT::v4i32;
12086       break;
12087     }
12088 
12089     Info.opc = ISD::INTRINSIC_VOID;
12090     Info.memVT = VT;
12091     Info.ptrVal = I.getArgOperand(1);
12092     Info.offset = 0;
12093     Info.size = VT.getStoreSize();
12094     Info.align = 1;
12095     Info.vol = false;
12096     Info.readMem = false;
12097     Info.writeMem = true;
12098     return true;
12099   }
12100   default:
12101     break;
12102   }
12103 
12104   return false;
12105 }
12106 
12107 /// getOptimalMemOpType - Returns the target specific optimal type for load
12108 /// and store operations as a result of memset, memcpy, and memmove
12109 /// lowering. If DstAlign is zero that means it's safe to destination
12110 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
12111 /// means there isn't a need to check it against alignment requirement,
12112 /// probably because the source does not need to be loaded. If 'IsMemset' is
12113 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
12114 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
12115 /// source is constant so it does not need to be loaded.
12116 /// It returns EVT::Other if the type should be determined using generic
12117 /// target-independent logic.
12118 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
12119                                            unsigned DstAlign, unsigned SrcAlign,
12120                                            bool IsMemset, bool ZeroMemset,
12121                                            bool MemcpyStrSrc,
12122                                            MachineFunction &MF) const {
12123   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
12124     const Function *F = MF.getFunction();
12125     // When expanding a memset, require at least two QPX instructions to cover
12126     // the cost of loading the value to be stored from the constant pool.
12127     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
12128        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
12129         !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
12130       return MVT::v4f64;
12131     }
12132 
12133     // We should use Altivec/VSX loads and stores when available. For unaligned
12134     // addresses, unaligned VSX loads are only fast starting with the P8.
12135     if (Subtarget.hasAltivec() && Size >= 16 &&
12136         (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
12137          ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
12138       return MVT::v4i32;
12139   }
12140 
12141   if (Subtarget.isPPC64()) {
12142     return MVT::i64;
12143   }
12144 
12145   return MVT::i32;
12146 }
12147 
12148 /// \brief Returns true if it is beneficial to convert a load of a constant
12149 /// to just the constant itself.
12150 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
12151                                                           Type *Ty) const {
12152   assert(Ty->isIntegerTy());
12153 
12154   unsigned BitSize = Ty->getPrimitiveSizeInBits();
12155   return !(BitSize == 0 || BitSize > 64);
12156 }
12157 
12158 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
12159   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
12160     return false;
12161   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
12162   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
12163   return NumBits1 == 64 && NumBits2 == 32;
12164 }
12165 
12166 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
12167   if (!VT1.isInteger() || !VT2.isInteger())
12168     return false;
12169   unsigned NumBits1 = VT1.getSizeInBits();
12170   unsigned NumBits2 = VT2.getSizeInBits();
12171   return NumBits1 == 64 && NumBits2 == 32;
12172 }
12173 
12174 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
12175   // Generally speaking, zexts are not free, but they are free when they can be
12176   // folded with other operations.
12177   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
12178     EVT MemVT = LD->getMemoryVT();
12179     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
12180          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
12181         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
12182          LD->getExtensionType() == ISD::ZEXTLOAD))
12183       return true;
12184   }
12185 
12186   // FIXME: Add other cases...
12187   //  - 32-bit shifts with a zext to i64
12188   //  - zext after ctlz, bswap, etc.
12189   //  - zext after and by a constant mask
12190 
12191   return TargetLowering::isZExtFree(Val, VT2);
12192 }
12193 
12194 bool PPCTargetLowering::isFPExtFree(EVT VT) const {
12195   assert(VT.isFloatingPoint());
12196   return true;
12197 }
12198 
12199 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
12200   return isInt<16>(Imm) || isUInt<16>(Imm);
12201 }
12202 
12203 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
12204   return isInt<16>(Imm) || isUInt<16>(Imm);
12205 }
12206 
12207 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
12208                                                        unsigned,
12209                                                        unsigned,
12210                                                        bool *Fast) const {
12211   if (DisablePPCUnaligned)
12212     return false;
12213 
12214   // PowerPC supports unaligned memory access for simple non-vector types.
12215   // Although accessing unaligned addresses is not as efficient as accessing
12216   // aligned addresses, it is generally more efficient than manual expansion,
12217   // and generally only traps for software emulation when crossing page
12218   // boundaries.
12219 
12220   if (!VT.isSimple())
12221     return false;
12222 
12223   if (VT.getSimpleVT().isVector()) {
12224     if (Subtarget.hasVSX()) {
12225       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
12226           VT != MVT::v4f32 && VT != MVT::v4i32)
12227         return false;
12228     } else {
12229       return false;
12230     }
12231   }
12232 
12233   if (VT == MVT::ppcf128)
12234     return false;
12235 
12236   if (Fast)
12237     *Fast = true;
12238 
12239   return true;
12240 }
12241 
12242 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
12243   VT = VT.getScalarType();
12244 
12245   if (!VT.isSimple())
12246     return false;
12247 
12248   switch (VT.getSimpleVT().SimpleTy) {
12249   case MVT::f32:
12250   case MVT::f64:
12251     return true;
12252   default:
12253     break;
12254   }
12255 
12256   return false;
12257 }
12258 
12259 const MCPhysReg *
12260 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
12261   // LR is a callee-save register, but we must treat it as clobbered by any call
12262   // site. Hence we include LR in the scratch registers, which are in turn added
12263   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
12264   // to CTR, which is used by any indirect call.
12265   static const MCPhysReg ScratchRegs[] = {
12266     PPC::X12, PPC::LR8, PPC::CTR8, 0
12267   };
12268 
12269   return ScratchRegs;
12270 }
12271 
12272 unsigned PPCTargetLowering::getExceptionPointerRegister(
12273     const Constant *PersonalityFn) const {
12274   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
12275 }
12276 
12277 unsigned PPCTargetLowering::getExceptionSelectorRegister(
12278     const Constant *PersonalityFn) const {
12279   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
12280 }
12281 
12282 bool
12283 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
12284                      EVT VT , unsigned DefinedValues) const {
12285   if (VT == MVT::v2i64)
12286     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
12287 
12288   if (Subtarget.hasVSX() || Subtarget.hasQPX())
12289     return true;
12290 
12291   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
12292 }
12293 
12294 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
12295   if (DisableILPPref || Subtarget.enableMachineScheduler())
12296     return TargetLowering::getSchedulingPreference(N);
12297 
12298   return Sched::ILP;
12299 }
12300 
12301 // Create a fast isel object.
12302 FastISel *
12303 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
12304                                   const TargetLibraryInfo *LibInfo) const {
12305   return PPC::createFastISel(FuncInfo, LibInfo);
12306 }
12307 
12308 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
12309   if (Subtarget.isDarwinABI()) return;
12310   if (!Subtarget.isPPC64()) return;
12311 
12312   // Update IsSplitCSR in PPCFunctionInfo
12313   PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
12314   PFI->setIsSplitCSR(true);
12315 }
12316 
12317 void PPCTargetLowering::insertCopiesSplitCSR(
12318   MachineBasicBlock *Entry,
12319   const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
12320   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12321   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
12322   if (!IStart)
12323     return;
12324 
12325   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12326   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
12327   MachineBasicBlock::iterator MBBI = Entry->begin();
12328   for (const MCPhysReg *I = IStart; *I; ++I) {
12329     const TargetRegisterClass *RC = nullptr;
12330     if (PPC::G8RCRegClass.contains(*I))
12331       RC = &PPC::G8RCRegClass;
12332     else if (PPC::F8RCRegClass.contains(*I))
12333       RC = &PPC::F8RCRegClass;
12334     else if (PPC::CRRCRegClass.contains(*I))
12335       RC = &PPC::CRRCRegClass;
12336     else if (PPC::VRRCRegClass.contains(*I))
12337       RC = &PPC::VRRCRegClass;
12338     else
12339       llvm_unreachable("Unexpected register class in CSRsViaCopy!");
12340 
12341     unsigned NewVR = MRI->createVirtualRegister(RC);
12342     // Create copy from CSR to a virtual register.
12343     // FIXME: this currently does not emit CFI pseudo-instructions, it works
12344     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
12345     // nounwind. If we want to generalize this later, we may need to emit
12346     // CFI pseudo-instructions.
12347     assert(Entry->getParent()->getFunction()->hasFnAttribute(
12348              Attribute::NoUnwind) &&
12349            "Function should be nounwind in insertCopiesSplitCSR!");
12350     Entry->addLiveIn(*I);
12351     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
12352       .addReg(*I);
12353 
12354     // Insert the copy-back instructions right before the terminator
12355     for (auto *Exit : Exits)
12356       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
12357               TII->get(TargetOpcode::COPY), *I)
12358         .addReg(NewVR);
12359   }
12360 }
12361 
12362 // Override to enable LOAD_STACK_GUARD lowering on Linux.
12363 bool PPCTargetLowering::useLoadStackGuardNode() const {
12364   if (!Subtarget.isTargetLinux())
12365     return TargetLowering::useLoadStackGuardNode();
12366   return true;
12367 }
12368 
12369 // Override to disable global variable loading on Linux.
12370 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
12371   if (!Subtarget.isTargetLinux())
12372     return TargetLowering::insertSSPDeclarations(M);
12373 }
12374 
12375 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
12376 
12377   if (!VT.isSimple() || !Subtarget.hasVSX())
12378     return false;
12379 
12380   switch(VT.getSimpleVT().SimpleTy) {
12381   default:
12382     // For FP types that are currently not supported by PPC backend, return
12383     // false. Examples: f16, f80.
12384     return false;
12385   case MVT::f32:
12386   case MVT::f64:
12387   case MVT::ppcf128:
12388     return Imm.isPosZero();
12389   }
12390 }
12391