1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PPCISelLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPCISelLowering.h"
15 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPCCallingConv.h"
17 #include "PPCCCState.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCPerfectShuffle.h"
20 #include "PPCTargetMachine.h"
21 #include "PPCTargetObjectFile.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/ADT/Triple.h"
26 #include "llvm/CodeGen/CallingConvLower.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineLoopInfo.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/SelectionDAG.h"
33 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
34 #include "llvm/IR/CallingConv.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/Function.h"
38 #include "llvm/IR/Intrinsics.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/MathExtras.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetOptions.h"
45 #include <list>
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "ppc-lowering"
50 
51 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
52 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
53 
54 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
55 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
56 
57 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
58 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
59 
60 static cl::opt<bool> DisableSCO("disable-ppc-sco",
61 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
62 
63 STATISTIC(NumTailCalls, "Number of tail calls");
64 STATISTIC(NumSiblingCalls, "Number of sibling calls");
65 
66 // FIXME: Remove this once the bug has been fixed!
67 extern cl::opt<bool> ANDIGlueBug;
68 
69 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
70                                      const PPCSubtarget &STI)
71     : TargetLowering(TM), Subtarget(STI) {
72   // Use _setjmp/_longjmp instead of setjmp/longjmp.
73   setUseUnderscoreSetJmp(true);
74   setUseUnderscoreLongJmp(true);
75 
76   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
77   // arguments are at least 4/8 bytes aligned.
78   bool isPPC64 = Subtarget.isPPC64();
79   setMinStackArgumentAlignment(isPPC64 ? 8:4);
80 
81   // Set up the register classes.
82   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
83   if (!useSoftFloat()) {
84     addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
85     addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
86   }
87 
88   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
89   for (MVT VT : MVT::integer_valuetypes()) {
90     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
91     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
92   }
93 
94   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
95 
96   // PowerPC has pre-inc load and store's.
97   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
98   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
99   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
100   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
101   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
102   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
103   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
104   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
105   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
106   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
107   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
108   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
109   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
110   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
111 
112   if (Subtarget.useCRBits()) {
113     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
114 
115     if (isPPC64 || Subtarget.hasFPCVT()) {
116       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
117       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
118                          isPPC64 ? MVT::i64 : MVT::i32);
119       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
120       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
121                         isPPC64 ? MVT::i64 : MVT::i32);
122     } else {
123       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
124       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
125     }
126 
127     // PowerPC does not support direct load / store of condition registers
128     setOperationAction(ISD::LOAD, MVT::i1, Custom);
129     setOperationAction(ISD::STORE, MVT::i1, Custom);
130 
131     // FIXME: Remove this once the ANDI glue bug is fixed:
132     if (ANDIGlueBug)
133       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
134 
135     for (MVT VT : MVT::integer_valuetypes()) {
136       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
137       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
138       setTruncStoreAction(VT, MVT::i1, Expand);
139     }
140 
141     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
142   }
143 
144   // This is used in the ppcf128->int sequence.  Note it has different semantics
145   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
146   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
147 
148   // We do not currently implement these libm ops for PowerPC.
149   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
150   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
151   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
152   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
153   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
154   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
155 
156   // PowerPC has no SREM/UREM instructions
157   setOperationAction(ISD::SREM, MVT::i32, Expand);
158   setOperationAction(ISD::UREM, MVT::i32, Expand);
159   setOperationAction(ISD::SREM, MVT::i64, Expand);
160   setOperationAction(ISD::UREM, MVT::i64, Expand);
161 
162   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
163   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
164   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
165   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
166   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
167   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
168   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
169   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
170   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
171 
172   // We don't support sin/cos/sqrt/fmod/pow
173   setOperationAction(ISD::FSIN , MVT::f64, Expand);
174   setOperationAction(ISD::FCOS , MVT::f64, Expand);
175   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
176   setOperationAction(ISD::FREM , MVT::f64, Expand);
177   setOperationAction(ISD::FPOW , MVT::f64, Expand);
178   setOperationAction(ISD::FMA  , MVT::f64, Legal);
179   setOperationAction(ISD::FSIN , MVT::f32, Expand);
180   setOperationAction(ISD::FCOS , MVT::f32, Expand);
181   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
182   setOperationAction(ISD::FREM , MVT::f32, Expand);
183   setOperationAction(ISD::FPOW , MVT::f32, Expand);
184   setOperationAction(ISD::FMA  , MVT::f32, Legal);
185 
186   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
187 
188   // If we're enabling GP optimizations, use hardware square root
189   if (!Subtarget.hasFSQRT() &&
190       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
191         Subtarget.hasFRE()))
192     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
193 
194   if (!Subtarget.hasFSQRT() &&
195       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
196         Subtarget.hasFRES()))
197     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
198 
199   if (Subtarget.hasFCPSGN()) {
200     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
201     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
202   } else {
203     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
204     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
205   }
206 
207   if (Subtarget.hasFPRND()) {
208     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
209     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
210     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
211     setOperationAction(ISD::FROUND, MVT::f64, Legal);
212 
213     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
214     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
215     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
216     setOperationAction(ISD::FROUND, MVT::f32, Legal);
217   }
218 
219   // PowerPC does not have BSWAP
220   // CTPOP or CTTZ were introduced in P8/P9 respectivelly
221   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
222   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
223   if (Subtarget.isISA3_0()) {
224     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
225     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
226   } else {
227     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
228     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
229   }
230 
231   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
232     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
233     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
234   } else {
235     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
236     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
237   }
238 
239   // PowerPC does not have ROTR
240   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
241   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
242 
243   if (!Subtarget.useCRBits()) {
244     // PowerPC does not have Select
245     setOperationAction(ISD::SELECT, MVT::i32, Expand);
246     setOperationAction(ISD::SELECT, MVT::i64, Expand);
247     setOperationAction(ISD::SELECT, MVT::f32, Expand);
248     setOperationAction(ISD::SELECT, MVT::f64, Expand);
249   }
250 
251   // PowerPC wants to turn select_cc of FP into fsel when possible.
252   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
253   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
254 
255   // PowerPC wants to optimize integer setcc a bit
256   if (!Subtarget.useCRBits())
257     setOperationAction(ISD::SETCC, MVT::i32, Custom);
258 
259   // PowerPC does not have BRCOND which requires SetCC
260   if (!Subtarget.useCRBits())
261     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
262 
263   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
264 
265   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
266   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
267 
268   // PowerPC does not have [U|S]INT_TO_FP
269   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
270   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
271 
272   if (Subtarget.hasDirectMove() && isPPC64) {
273     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
274     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
275     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
276     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
277   } else {
278     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
279     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
280     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
281     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
282   }
283 
284   // We cannot sextinreg(i1).  Expand to shifts.
285   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
286 
287   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
288   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
289   // support continuation, user-level threading, and etc.. As a result, no
290   // other SjLj exception interfaces are implemented and please don't build
291   // your own exception handling based on them.
292   // LLVM/Clang supports zero-cost DWARF exception handling.
293   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
294   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
295 
296   // We want to legalize GlobalAddress and ConstantPool nodes into the
297   // appropriate instructions to materialize the address.
298   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
299   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
300   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
301   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
302   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
303   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
304   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
305   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
306   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
307   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
308 
309   // TRAP is legal.
310   setOperationAction(ISD::TRAP, MVT::Other, Legal);
311 
312   // TRAMPOLINE is custom lowered.
313   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
314   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
315 
316   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
317   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
318 
319   if (Subtarget.isSVR4ABI()) {
320     if (isPPC64) {
321       // VAARG always uses double-word chunks, so promote anything smaller.
322       setOperationAction(ISD::VAARG, MVT::i1, Promote);
323       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
324       setOperationAction(ISD::VAARG, MVT::i8, Promote);
325       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
326       setOperationAction(ISD::VAARG, MVT::i16, Promote);
327       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
328       setOperationAction(ISD::VAARG, MVT::i32, Promote);
329       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
330       setOperationAction(ISD::VAARG, MVT::Other, Expand);
331     } else {
332       // VAARG is custom lowered with the 32-bit SVR4 ABI.
333       setOperationAction(ISD::VAARG, MVT::Other, Custom);
334       setOperationAction(ISD::VAARG, MVT::i64, Custom);
335     }
336   } else
337     setOperationAction(ISD::VAARG, MVT::Other, Expand);
338 
339   if (Subtarget.isSVR4ABI() && !isPPC64)
340     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
341     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
342   else
343     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
344 
345   // Use the default implementation.
346   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
347   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
348   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
349   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
350   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
351   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
352   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
353   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
354   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
355 
356   // We want to custom lower some of our intrinsics.
357   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
358 
359   // To handle counter-based loop conditions.
360   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
361 
362   // Comparisons that require checking two conditions.
363   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
364   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
365   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
366   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
367   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
368   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
369   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
370   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
371   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
372   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
373   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
374   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
375 
376   if (Subtarget.has64BitSupport()) {
377     // They also have instructions for converting between i64 and fp.
378     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
379     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
380     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
381     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
382     // This is just the low 32 bits of a (signed) fp->i64 conversion.
383     // We cannot do this with Promote because i64 is not a legal type.
384     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
385 
386     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
387       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
388   } else {
389     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
390     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
391   }
392 
393   // With the instructions enabled under FPCVT, we can do everything.
394   if (Subtarget.hasFPCVT()) {
395     if (Subtarget.has64BitSupport()) {
396       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
397       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
398       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
399       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
400     }
401 
402     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
403     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
404     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
405     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
406   }
407 
408   if (Subtarget.use64BitRegs()) {
409     // 64-bit PowerPC implementations can support i64 types directly
410     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
411     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
412     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
413     // 64-bit PowerPC wants to expand i128 shifts itself.
414     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
415     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
416     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
417   } else {
418     // 32-bit PowerPC wants to expand i64 shifts itself.
419     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
420     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
421     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
422   }
423 
424   if (Subtarget.hasAltivec()) {
425     // First set operation action for all vector types to expand. Then we
426     // will selectively turn on ones that can be effectively codegen'd.
427     for (MVT VT : MVT::vector_valuetypes()) {
428       // add/sub are legal for all supported vector VT's.
429       setOperationAction(ISD::ADD, VT, Legal);
430       setOperationAction(ISD::SUB, VT, Legal);
431 
432       // Vector instructions introduced in P8
433       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
434         setOperationAction(ISD::CTPOP, VT, Legal);
435         setOperationAction(ISD::CTLZ, VT, Legal);
436       }
437       else {
438         setOperationAction(ISD::CTPOP, VT, Expand);
439         setOperationAction(ISD::CTLZ, VT, Expand);
440       }
441 
442       // Vector instructions introduced in P9
443       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
444         setOperationAction(ISD::CTTZ, VT, Legal);
445       else
446         setOperationAction(ISD::CTTZ, VT, Expand);
447 
448       // We promote all shuffles to v16i8.
449       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
450       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
451 
452       // We promote all non-typed operations to v4i32.
453       setOperationAction(ISD::AND   , VT, Promote);
454       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
455       setOperationAction(ISD::OR    , VT, Promote);
456       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
457       setOperationAction(ISD::XOR   , VT, Promote);
458       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
459       setOperationAction(ISD::LOAD  , VT, Promote);
460       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
461       setOperationAction(ISD::SELECT, VT, Promote);
462       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
463       setOperationAction(ISD::SELECT_CC, VT, Promote);
464       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
465       setOperationAction(ISD::STORE, VT, Promote);
466       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
467 
468       // No other operations are legal.
469       setOperationAction(ISD::MUL , VT, Expand);
470       setOperationAction(ISD::SDIV, VT, Expand);
471       setOperationAction(ISD::SREM, VT, Expand);
472       setOperationAction(ISD::UDIV, VT, Expand);
473       setOperationAction(ISD::UREM, VT, Expand);
474       setOperationAction(ISD::FDIV, VT, Expand);
475       setOperationAction(ISD::FREM, VT, Expand);
476       setOperationAction(ISD::FNEG, VT, Expand);
477       setOperationAction(ISD::FSQRT, VT, Expand);
478       setOperationAction(ISD::FLOG, VT, Expand);
479       setOperationAction(ISD::FLOG10, VT, Expand);
480       setOperationAction(ISD::FLOG2, VT, Expand);
481       setOperationAction(ISD::FEXP, VT, Expand);
482       setOperationAction(ISD::FEXP2, VT, Expand);
483       setOperationAction(ISD::FSIN, VT, Expand);
484       setOperationAction(ISD::FCOS, VT, Expand);
485       setOperationAction(ISD::FABS, VT, Expand);
486       setOperationAction(ISD::FPOWI, VT, Expand);
487       setOperationAction(ISD::FFLOOR, VT, Expand);
488       setOperationAction(ISD::FCEIL,  VT, Expand);
489       setOperationAction(ISD::FTRUNC, VT, Expand);
490       setOperationAction(ISD::FRINT,  VT, Expand);
491       setOperationAction(ISD::FNEARBYINT, VT, Expand);
492       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
493       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
494       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
495       setOperationAction(ISD::MULHU, VT, Expand);
496       setOperationAction(ISD::MULHS, VT, Expand);
497       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
498       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
499       setOperationAction(ISD::UDIVREM, VT, Expand);
500       setOperationAction(ISD::SDIVREM, VT, Expand);
501       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
502       setOperationAction(ISD::FPOW, VT, Expand);
503       setOperationAction(ISD::BSWAP, VT, Expand);
504       setOperationAction(ISD::VSELECT, VT, Expand);
505       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
506       setOperationAction(ISD::ROTL, VT, Expand);
507       setOperationAction(ISD::ROTR, VT, Expand);
508 
509       for (MVT InnerVT : MVT::vector_valuetypes()) {
510         setTruncStoreAction(VT, InnerVT, Expand);
511         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
512         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
513         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
514       }
515     }
516 
517     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
518     // with merges, splats, etc.
519     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
520 
521     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
522     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
523     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
524     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
525     setOperationAction(ISD::SELECT, MVT::v4i32,
526                        Subtarget.useCRBits() ? Legal : Expand);
527     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
528     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
529     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
530     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
531     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
532     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
533     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
534     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
535     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
536 
537     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
538     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
539     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
540     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
541 
542     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
543     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
544 
545     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
546       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
547       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
548     }
549 
550     if (Subtarget.hasP8Altivec())
551       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
552     else
553       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
554 
555     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
556     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
557 
558     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
559     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
560 
561     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
562     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
563     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
564     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
565 
566     // Altivec does not contain unordered floating-point compare instructions
567     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
568     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
569     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
570     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
571 
572     if (Subtarget.hasVSX()) {
573       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
574       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
575       if (Subtarget.hasP8Vector()) {
576         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
577         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
578       }
579       if (Subtarget.hasDirectMove() && isPPC64) {
580         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
581         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
582         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
583         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
584         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
585         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
586         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
587         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
588       }
589       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
590 
591       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
592       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
593       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
594       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
595       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
596 
597       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
598 
599       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
600       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
601 
602       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
603       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
604 
605       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
606       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
607       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
608       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
609       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
610 
611       // Share the Altivec comparison restrictions.
612       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
613       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
614       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
615       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
616 
617       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
618       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
619 
620       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
621 
622       if (Subtarget.hasP8Vector())
623         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
624 
625       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
626 
627       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
628       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
629       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
630 
631       if (Subtarget.hasP8Altivec()) {
632         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
633         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
634         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
635 
636         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
637       }
638       else {
639         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
640         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
641         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
642 
643         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
644 
645         // VSX v2i64 only supports non-arithmetic operations.
646         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
647         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
648       }
649 
650       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
651       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
652       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
653       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
654 
655       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
656 
657       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
658       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
659       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
660       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
661 
662       // Vector operation legalization checks the result type of
663       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
664       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
665       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
666       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
667       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
668 
669       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
670       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
671       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
672       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
673 
674       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
675     }
676 
677     if (Subtarget.hasP8Altivec()) {
678       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
679       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
680     }
681 
682     if (Subtarget.hasP9Vector()) {
683       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
684       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
685     }
686 
687     if (Subtarget.isISA3_0() && Subtarget.hasDirectMove())
688       setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
689   }
690 
691   if (Subtarget.hasQPX()) {
692     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
693     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
694     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
695     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
696 
697     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
698     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
699 
700     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
701     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
702 
703     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
704     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
705 
706     if (!Subtarget.useCRBits())
707       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
708     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
709 
710     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
711     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
712     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
713     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
714     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
715     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
716     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
717 
718     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
719     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
720 
721     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
722     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
723     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
724 
725     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
726     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
727     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
728     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
729     setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
730     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
731     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
732     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
733     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
734     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
735     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
736 
737     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
738     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
739 
740     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
741     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
742 
743     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
744 
745     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
746     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
747     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
748     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
749 
750     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
751     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
752 
753     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
754     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
755 
756     if (!Subtarget.useCRBits())
757       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
758     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
759 
760     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
761     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
762     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
763     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
764     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
765     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
766     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
767 
768     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
769     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
770 
771     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
772     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
773     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
774     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
775     setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
776     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
777     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
778     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
779     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
780     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
781     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
782 
783     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
784     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
785 
786     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
787     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
788 
789     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
790 
791     setOperationAction(ISD::AND , MVT::v4i1, Legal);
792     setOperationAction(ISD::OR , MVT::v4i1, Legal);
793     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
794 
795     if (!Subtarget.useCRBits())
796       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
797     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
798 
799     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
800     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
801 
802     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
803     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
804     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
805     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
806     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
807     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
808     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
809 
810     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
811     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
812 
813     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
814 
815     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
816     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
817     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
818     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
819 
820     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
821     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
822     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
823     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
824 
825     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
826     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
827 
828     // These need to set FE_INEXACT, and so cannot be vectorized here.
829     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
830     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
831 
832     if (TM.Options.UnsafeFPMath) {
833       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
834       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
835 
836       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
837       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
838     } else {
839       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
840       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
841 
842       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
843       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
844     }
845   }
846 
847   if (Subtarget.has64BitSupport())
848     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
849 
850   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
851 
852   if (!isPPC64) {
853     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
854     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
855   }
856 
857   setBooleanContents(ZeroOrOneBooleanContent);
858 
859   if (Subtarget.hasAltivec()) {
860     // Altivec instructions set fields to all zeros or all ones.
861     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
862   }
863 
864   if (!isPPC64) {
865     // These libcalls are not available in 32-bit.
866     setLibcallName(RTLIB::SHL_I128, nullptr);
867     setLibcallName(RTLIB::SRL_I128, nullptr);
868     setLibcallName(RTLIB::SRA_I128, nullptr);
869   }
870 
871   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
872 
873   // We have target-specific dag combine patterns for the following nodes:
874   setTargetDAGCombine(ISD::SINT_TO_FP);
875   setTargetDAGCombine(ISD::BUILD_VECTOR);
876   if (Subtarget.hasFPCVT())
877     setTargetDAGCombine(ISD::UINT_TO_FP);
878   setTargetDAGCombine(ISD::LOAD);
879   setTargetDAGCombine(ISD::STORE);
880   setTargetDAGCombine(ISD::BR_CC);
881   if (Subtarget.useCRBits())
882     setTargetDAGCombine(ISD::BRCOND);
883   setTargetDAGCombine(ISD::BSWAP);
884   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
885   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
886   setTargetDAGCombine(ISD::INTRINSIC_VOID);
887 
888   setTargetDAGCombine(ISD::SIGN_EXTEND);
889   setTargetDAGCombine(ISD::ZERO_EXTEND);
890   setTargetDAGCombine(ISD::ANY_EXTEND);
891 
892   if (Subtarget.useCRBits()) {
893     setTargetDAGCombine(ISD::TRUNCATE);
894     setTargetDAGCombine(ISD::SETCC);
895     setTargetDAGCombine(ISD::SELECT_CC);
896   }
897 
898   // Use reciprocal estimates.
899   if (TM.Options.UnsafeFPMath) {
900     setTargetDAGCombine(ISD::FDIV);
901     setTargetDAGCombine(ISD::FSQRT);
902   }
903 
904   // Darwin long double math library functions have $LDBL128 appended.
905   if (Subtarget.isDarwin()) {
906     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
907     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
908     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
909     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
910     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
911     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
912     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
913     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
914     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
915     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
916   }
917 
918   // With 32 condition bits, we don't need to sink (and duplicate) compares
919   // aggressively in CodeGenPrep.
920   if (Subtarget.useCRBits()) {
921     setHasMultipleConditionRegisters();
922     setJumpIsExpensive();
923   }
924 
925   setMinFunctionAlignment(2);
926   if (Subtarget.isDarwin())
927     setPrefFunctionAlignment(4);
928 
929   switch (Subtarget.getDarwinDirective()) {
930   default: break;
931   case PPC::DIR_970:
932   case PPC::DIR_A2:
933   case PPC::DIR_E500mc:
934   case PPC::DIR_E5500:
935   case PPC::DIR_PWR4:
936   case PPC::DIR_PWR5:
937   case PPC::DIR_PWR5X:
938   case PPC::DIR_PWR6:
939   case PPC::DIR_PWR6X:
940   case PPC::DIR_PWR7:
941   case PPC::DIR_PWR8:
942   case PPC::DIR_PWR9:
943     setPrefFunctionAlignment(4);
944     setPrefLoopAlignment(4);
945     break;
946   }
947 
948   if (Subtarget.enableMachineScheduler())
949     setSchedulingPreference(Sched::Source);
950   else
951     setSchedulingPreference(Sched::Hybrid);
952 
953   computeRegisterProperties(STI.getRegisterInfo());
954 
955   // The Freescale cores do better with aggressive inlining of memcpy and
956   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
957   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
958       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
959     MaxStoresPerMemset = 32;
960     MaxStoresPerMemsetOptSize = 16;
961     MaxStoresPerMemcpy = 32;
962     MaxStoresPerMemcpyOptSize = 8;
963     MaxStoresPerMemmove = 32;
964     MaxStoresPerMemmoveOptSize = 8;
965   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
966     // The A2 also benefits from (very) aggressive inlining of memcpy and
967     // friends. The overhead of a the function call, even when warm, can be
968     // over one hundred cycles.
969     MaxStoresPerMemset = 128;
970     MaxStoresPerMemcpy = 128;
971     MaxStoresPerMemmove = 128;
972   }
973 }
974 
975 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
976 /// the desired ByVal argument alignment.
977 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
978                              unsigned MaxMaxAlign) {
979   if (MaxAlign == MaxMaxAlign)
980     return;
981   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
982     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
983       MaxAlign = 32;
984     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
985       MaxAlign = 16;
986   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
987     unsigned EltAlign = 0;
988     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
989     if (EltAlign > MaxAlign)
990       MaxAlign = EltAlign;
991   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
992     for (auto *EltTy : STy->elements()) {
993       unsigned EltAlign = 0;
994       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
995       if (EltAlign > MaxAlign)
996         MaxAlign = EltAlign;
997       if (MaxAlign == MaxMaxAlign)
998         break;
999     }
1000   }
1001 }
1002 
1003 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1004 /// function arguments in the caller parameter area.
1005 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1006                                                   const DataLayout &DL) const {
1007   // Darwin passes everything on 4 byte boundary.
1008   if (Subtarget.isDarwin())
1009     return 4;
1010 
1011   // 16byte and wider vectors are passed on 16byte boundary.
1012   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1013   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1014   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1015     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1016   return Align;
1017 }
1018 
1019 bool PPCTargetLowering::useSoftFloat() const {
1020   return Subtarget.useSoftFloat();
1021 }
1022 
1023 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1024   switch ((PPCISD::NodeType)Opcode) {
1025   case PPCISD::FIRST_NUMBER:    break;
1026   case PPCISD::FSEL:            return "PPCISD::FSEL";
1027   case PPCISD::FCFID:           return "PPCISD::FCFID";
1028   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1029   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1030   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1031   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1032   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1033   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1034   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1035   case PPCISD::FRE:             return "PPCISD::FRE";
1036   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1037   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1038   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1039   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1040   case PPCISD::VPERM:           return "PPCISD::VPERM";
1041   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1042   case PPCISD::XXINSERT:        return "PPCISD::XXINSERT";
1043   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1044   case PPCISD::CMPB:            return "PPCISD::CMPB";
1045   case PPCISD::Hi:              return "PPCISD::Hi";
1046   case PPCISD::Lo:              return "PPCISD::Lo";
1047   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1048   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1049   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1050   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1051   case PPCISD::SRL:             return "PPCISD::SRL";
1052   case PPCISD::SRA:             return "PPCISD::SRA";
1053   case PPCISD::SHL:             return "PPCISD::SHL";
1054   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1055   case PPCISD::CALL:            return "PPCISD::CALL";
1056   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1057   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1058   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1059   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1060   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1061   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1062   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1063   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1064   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1065   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1066   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1067   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1068   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1069   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1070   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1071   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1072   case PPCISD::VCMP:            return "PPCISD::VCMP";
1073   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1074   case PPCISD::LBRX:            return "PPCISD::LBRX";
1075   case PPCISD::STBRX:           return "PPCISD::STBRX";
1076   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1077   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1078   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1079   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1080   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1081   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1082   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1083   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1084   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1085   case PPCISD::BDZ:             return "PPCISD::BDZ";
1086   case PPCISD::MFFS:            return "PPCISD::MFFS";
1087   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1088   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1089   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1090   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1091   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1092   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1093   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1094   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1095   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1096   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1097   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1098   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1099   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1100   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1101   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1102   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1103   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1104   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1105   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1106   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1107   case PPCISD::SC:              return "PPCISD::SC";
1108   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1109   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1110   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1111   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1112   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1113   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1114   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1115   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1116   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1117   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1118   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1119   }
1120   return nullptr;
1121 }
1122 
1123 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1124                                           EVT VT) const {
1125   if (!VT.isVector())
1126     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1127 
1128   if (Subtarget.hasQPX())
1129     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1130 
1131   return VT.changeVectorElementTypeToInteger();
1132 }
1133 
1134 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1135   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1136   return true;
1137 }
1138 
1139 //===----------------------------------------------------------------------===//
1140 // Node matching predicates, for use by the tblgen matching code.
1141 //===----------------------------------------------------------------------===//
1142 
1143 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1144 static bool isFloatingPointZero(SDValue Op) {
1145   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1146     return CFP->getValueAPF().isZero();
1147   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1148     // Maybe this has already been legalized into the constant pool?
1149     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1150       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1151         return CFP->getValueAPF().isZero();
1152   }
1153   return false;
1154 }
1155 
1156 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1157 /// true if Op is undef or if it matches the specified value.
1158 static bool isConstantOrUndef(int Op, int Val) {
1159   return Op < 0 || Op == Val;
1160 }
1161 
1162 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1163 /// VPKUHUM instruction.
1164 /// The ShuffleKind distinguishes between big-endian operations with
1165 /// two different inputs (0), either-endian operations with two identical
1166 /// inputs (1), and little-endian operations with two different inputs (2).
1167 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1168 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1169                                SelectionDAG &DAG) {
1170   bool IsLE = DAG.getDataLayout().isLittleEndian();
1171   if (ShuffleKind == 0) {
1172     if (IsLE)
1173       return false;
1174     for (unsigned i = 0; i != 16; ++i)
1175       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1176         return false;
1177   } else if (ShuffleKind == 2) {
1178     if (!IsLE)
1179       return false;
1180     for (unsigned i = 0; i != 16; ++i)
1181       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1182         return false;
1183   } else if (ShuffleKind == 1) {
1184     unsigned j = IsLE ? 0 : 1;
1185     for (unsigned i = 0; i != 8; ++i)
1186       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1187           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1188         return false;
1189   }
1190   return true;
1191 }
1192 
1193 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1194 /// VPKUWUM instruction.
1195 /// The ShuffleKind distinguishes between big-endian operations with
1196 /// two different inputs (0), either-endian operations with two identical
1197 /// inputs (1), and little-endian operations with two different inputs (2).
1198 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1199 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1200                                SelectionDAG &DAG) {
1201   bool IsLE = DAG.getDataLayout().isLittleEndian();
1202   if (ShuffleKind == 0) {
1203     if (IsLE)
1204       return false;
1205     for (unsigned i = 0; i != 16; i += 2)
1206       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1207           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1208         return false;
1209   } else if (ShuffleKind == 2) {
1210     if (!IsLE)
1211       return false;
1212     for (unsigned i = 0; i != 16; i += 2)
1213       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1214           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1215         return false;
1216   } else if (ShuffleKind == 1) {
1217     unsigned j = IsLE ? 0 : 2;
1218     for (unsigned i = 0; i != 8; i += 2)
1219       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1220           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1221           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1222           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1223         return false;
1224   }
1225   return true;
1226 }
1227 
1228 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1229 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1230 /// current subtarget.
1231 ///
1232 /// The ShuffleKind distinguishes between big-endian operations with
1233 /// two different inputs (0), either-endian operations with two identical
1234 /// inputs (1), and little-endian operations with two different inputs (2).
1235 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1236 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1237                                SelectionDAG &DAG) {
1238   const PPCSubtarget& Subtarget =
1239     static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1240   if (!Subtarget.hasP8Vector())
1241     return false;
1242 
1243   bool IsLE = DAG.getDataLayout().isLittleEndian();
1244   if (ShuffleKind == 0) {
1245     if (IsLE)
1246       return false;
1247     for (unsigned i = 0; i != 16; i += 4)
1248       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1249           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1250           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1251           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1252         return false;
1253   } else if (ShuffleKind == 2) {
1254     if (!IsLE)
1255       return false;
1256     for (unsigned i = 0; i != 16; i += 4)
1257       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1258           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1259           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1260           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1261         return false;
1262   } else if (ShuffleKind == 1) {
1263     unsigned j = IsLE ? 0 : 4;
1264     for (unsigned i = 0; i != 8; i += 4)
1265       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1266           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1267           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1268           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1269           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1270           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1271           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1272           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1273         return false;
1274   }
1275   return true;
1276 }
1277 
1278 /// isVMerge - Common function, used to match vmrg* shuffles.
1279 ///
1280 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1281                      unsigned LHSStart, unsigned RHSStart) {
1282   if (N->getValueType(0) != MVT::v16i8)
1283     return false;
1284   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1285          "Unsupported merge size!");
1286 
1287   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1288     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1289       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1290                              LHSStart+j+i*UnitSize) ||
1291           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1292                              RHSStart+j+i*UnitSize))
1293         return false;
1294     }
1295   return true;
1296 }
1297 
1298 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1299 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1300 /// The ShuffleKind distinguishes between big-endian merges with two
1301 /// different inputs (0), either-endian merges with two identical inputs (1),
1302 /// and little-endian merges with two different inputs (2).  For the latter,
1303 /// the input operands are swapped (see PPCInstrAltivec.td).
1304 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1305                              unsigned ShuffleKind, SelectionDAG &DAG) {
1306   if (DAG.getDataLayout().isLittleEndian()) {
1307     if (ShuffleKind == 1) // unary
1308       return isVMerge(N, UnitSize, 0, 0);
1309     else if (ShuffleKind == 2) // swapped
1310       return isVMerge(N, UnitSize, 0, 16);
1311     else
1312       return false;
1313   } else {
1314     if (ShuffleKind == 1) // unary
1315       return isVMerge(N, UnitSize, 8, 8);
1316     else if (ShuffleKind == 0) // normal
1317       return isVMerge(N, UnitSize, 8, 24);
1318     else
1319       return false;
1320   }
1321 }
1322 
1323 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1324 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1325 /// The ShuffleKind distinguishes between big-endian merges with two
1326 /// different inputs (0), either-endian merges with two identical inputs (1),
1327 /// and little-endian merges with two different inputs (2).  For the latter,
1328 /// the input operands are swapped (see PPCInstrAltivec.td).
1329 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1330                              unsigned ShuffleKind, SelectionDAG &DAG) {
1331   if (DAG.getDataLayout().isLittleEndian()) {
1332     if (ShuffleKind == 1) // unary
1333       return isVMerge(N, UnitSize, 8, 8);
1334     else if (ShuffleKind == 2) // swapped
1335       return isVMerge(N, UnitSize, 8, 24);
1336     else
1337       return false;
1338   } else {
1339     if (ShuffleKind == 1) // unary
1340       return isVMerge(N, UnitSize, 0, 0);
1341     else if (ShuffleKind == 0) // normal
1342       return isVMerge(N, UnitSize, 0, 16);
1343     else
1344       return false;
1345   }
1346 }
1347 
1348 /**
1349  * \brief Common function used to match vmrgew and vmrgow shuffles
1350  *
1351  * The indexOffset determines whether to look for even or odd words in
1352  * the shuffle mask. This is based on the of the endianness of the target
1353  * machine.
1354  *   - Little Endian:
1355  *     - Use offset of 0 to check for odd elements
1356  *     - Use offset of 4 to check for even elements
1357  *   - Big Endian:
1358  *     - Use offset of 0 to check for even elements
1359  *     - Use offset of 4 to check for odd elements
1360  * A detailed description of the vector element ordering for little endian and
1361  * big endian can be found at
1362  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1363  * Targeting your applications - what little endian and big endian IBM XL C/C++
1364  * compiler differences mean to you
1365  *
1366  * The mask to the shuffle vector instruction specifies the indices of the
1367  * elements from the two input vectors to place in the result. The elements are
1368  * numbered in array-access order, starting with the first vector. These vectors
1369  * are always of type v16i8, thus each vector will contain 16 elements of size
1370  * 8. More info on the shuffle vector can be found in the
1371  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1372  * Language Reference.
1373  *
1374  * The RHSStartValue indicates whether the same input vectors are used (unary)
1375  * or two different input vectors are used, based on the following:
1376  *   - If the instruction uses the same vector for both inputs, the range of the
1377  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1378  *     be 0.
1379  *   - If the instruction has two different vectors then the range of the
1380  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1381  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1382  *     to 31 specify elements in the second vector).
1383  *
1384  * \param[in] N The shuffle vector SD Node to analyze
1385  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1386  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1387  * vector to the shuffle_vector instruction
1388  * \return true iff this shuffle vector represents an even or odd word merge
1389  */
1390 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1391                      unsigned RHSStartValue) {
1392   if (N->getValueType(0) != MVT::v16i8)
1393     return false;
1394 
1395   for (unsigned i = 0; i < 2; ++i)
1396     for (unsigned j = 0; j < 4; ++j)
1397       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1398                              i*RHSStartValue+j+IndexOffset) ||
1399           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1400                              i*RHSStartValue+j+IndexOffset+8))
1401         return false;
1402   return true;
1403 }
1404 
1405 /**
1406  * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
1407  * vmrgow instructions.
1408  *
1409  * \param[in] N The shuffle vector SD Node to analyze
1410  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1411  * \param[in] ShuffleKind Identify the type of merge:
1412  *   - 0 = big-endian merge with two different inputs;
1413  *   - 1 = either-endian merge with two identical inputs;
1414  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1415  *     little-endian merges).
1416  * \param[in] DAG The current SelectionDAG
1417  * \return true iff this shuffle mask
1418  */
1419 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1420                               unsigned ShuffleKind, SelectionDAG &DAG) {
1421   if (DAG.getDataLayout().isLittleEndian()) {
1422     unsigned indexOffset = CheckEven ? 4 : 0;
1423     if (ShuffleKind == 1) // Unary
1424       return isVMerge(N, indexOffset, 0);
1425     else if (ShuffleKind == 2) // swapped
1426       return isVMerge(N, indexOffset, 16);
1427     else
1428       return false;
1429   }
1430   else {
1431     unsigned indexOffset = CheckEven ? 0 : 4;
1432     if (ShuffleKind == 1) // Unary
1433       return isVMerge(N, indexOffset, 0);
1434     else if (ShuffleKind == 0) // Normal
1435       return isVMerge(N, indexOffset, 16);
1436     else
1437       return false;
1438   }
1439   return false;
1440 }
1441 
1442 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1443 /// amount, otherwise return -1.
1444 /// The ShuffleKind distinguishes between big-endian operations with two
1445 /// different inputs (0), either-endian operations with two identical inputs
1446 /// (1), and little-endian operations with two different inputs (2).  For the
1447 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1448 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1449                              SelectionDAG &DAG) {
1450   if (N->getValueType(0) != MVT::v16i8)
1451     return -1;
1452 
1453   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1454 
1455   // Find the first non-undef value in the shuffle mask.
1456   unsigned i;
1457   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1458     /*search*/;
1459 
1460   if (i == 16) return -1;  // all undef.
1461 
1462   // Otherwise, check to see if the rest of the elements are consecutively
1463   // numbered from this value.
1464   unsigned ShiftAmt = SVOp->getMaskElt(i);
1465   if (ShiftAmt < i) return -1;
1466 
1467   ShiftAmt -= i;
1468   bool isLE = DAG.getDataLayout().isLittleEndian();
1469 
1470   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1471     // Check the rest of the elements to see if they are consecutive.
1472     for (++i; i != 16; ++i)
1473       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1474         return -1;
1475   } else if (ShuffleKind == 1) {
1476     // Check the rest of the elements to see if they are consecutive.
1477     for (++i; i != 16; ++i)
1478       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1479         return -1;
1480   } else
1481     return -1;
1482 
1483   if (isLE)
1484     ShiftAmt = 16 - ShiftAmt;
1485 
1486   return ShiftAmt;
1487 }
1488 
1489 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1490 /// specifies a splat of a single element that is suitable for input to
1491 /// VSPLTB/VSPLTH/VSPLTW.
1492 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1493   assert(N->getValueType(0) == MVT::v16i8 &&
1494          (EltSize == 1 || EltSize == 2 || EltSize == 4));
1495 
1496   // The consecutive indices need to specify an element, not part of two
1497   // different elements.  So abandon ship early if this isn't the case.
1498   if (N->getMaskElt(0) % EltSize != 0)
1499     return false;
1500 
1501   // This is a splat operation if each element of the permute is the same, and
1502   // if the value doesn't reference the second vector.
1503   unsigned ElementBase = N->getMaskElt(0);
1504 
1505   // FIXME: Handle UNDEF elements too!
1506   if (ElementBase >= 16)
1507     return false;
1508 
1509   // Check that the indices are consecutive, in the case of a multi-byte element
1510   // splatted with a v16i8 mask.
1511   for (unsigned i = 1; i != EltSize; ++i)
1512     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1513       return false;
1514 
1515   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1516     if (N->getMaskElt(i) < 0) continue;
1517     for (unsigned j = 0; j != EltSize; ++j)
1518       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1519         return false;
1520   }
1521   return true;
1522 }
1523 
1524 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1525                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1526 
1527   // Check that the mask is shuffling words
1528   for (unsigned i = 0; i < 4; ++i) {
1529     unsigned B0 = N->getMaskElt(i*4);
1530     unsigned B1 = N->getMaskElt(i*4+1);
1531     unsigned B2 = N->getMaskElt(i*4+2);
1532     unsigned B3 = N->getMaskElt(i*4+3);
1533     if (B0 % 4)
1534       return false;
1535     if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
1536       return false;
1537   }
1538 
1539   // Now we look at mask elements 0,4,8,12
1540   unsigned M0 = N->getMaskElt(0) / 4;
1541   unsigned M1 = N->getMaskElt(4) / 4;
1542   unsigned M2 = N->getMaskElt(8) / 4;
1543   unsigned M3 = N->getMaskElt(12) / 4;
1544   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1545   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1546 
1547   // Below, let H and L be arbitrary elements of the shuffle mask
1548   // where H is in the range [4,7] and L is in the range [0,3].
1549   // H, 1, 2, 3 or L, 5, 6, 7
1550   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1551       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1552     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1553     InsertAtByte = IsLE ? 12 : 0;
1554     Swap = M0 < 4;
1555     return true;
1556   }
1557   // 0, H, 2, 3 or 4, L, 6, 7
1558   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1559       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1560     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1561     InsertAtByte = IsLE ? 8 : 4;
1562     Swap = M1 < 4;
1563     return true;
1564   }
1565   // 0, 1, H, 3 or 4, 5, L, 7
1566   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1567       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1568     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1569     InsertAtByte = IsLE ? 4 : 8;
1570     Swap = M2 < 4;
1571     return true;
1572   }
1573   // 0, 1, 2, H or 4, 5, 6, L
1574   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1575       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1576     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1577     InsertAtByte = IsLE ? 0 : 12;
1578     Swap = M3 < 4;
1579     return true;
1580   }
1581 
1582   // If both vector operands for the shuffle are the same vector, the mask will
1583   // contain only elements from the first one and the second one will be undef.
1584   if (N->getOperand(1).isUndef()) {
1585     ShiftElts = 0;
1586     Swap = true;
1587     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1588     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1589       InsertAtByte = IsLE ? 12 : 0;
1590       return true;
1591     }
1592     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1593       InsertAtByte = IsLE ? 8 : 4;
1594       return true;
1595     }
1596     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1597       InsertAtByte = IsLE ? 4 : 8;
1598       return true;
1599     }
1600     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1601       InsertAtByte = IsLE ? 0 : 12;
1602       return true;
1603     }
1604   }
1605 
1606   return false;
1607 }
1608 
1609 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1610 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1611 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1612                                 SelectionDAG &DAG) {
1613   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1614   assert(isSplatShuffleMask(SVOp, EltSize));
1615   if (DAG.getDataLayout().isLittleEndian())
1616     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1617   else
1618     return SVOp->getMaskElt(0) / EltSize;
1619 }
1620 
1621 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1622 /// by using a vspltis[bhw] instruction of the specified element size, return
1623 /// the constant being splatted.  The ByteSize field indicates the number of
1624 /// bytes of each element [124] -> [bhw].
1625 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1626   SDValue OpVal(nullptr, 0);
1627 
1628   // If ByteSize of the splat is bigger than the element size of the
1629   // build_vector, then we have a case where we are checking for a splat where
1630   // multiple elements of the buildvector are folded together into a single
1631   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1632   unsigned EltSize = 16/N->getNumOperands();
1633   if (EltSize < ByteSize) {
1634     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1635     SDValue UniquedVals[4];
1636     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1637 
1638     // See if all of the elements in the buildvector agree across.
1639     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1640       if (N->getOperand(i).isUndef()) continue;
1641       // If the element isn't a constant, bail fully out.
1642       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1643 
1644 
1645       if (!UniquedVals[i&(Multiple-1)].getNode())
1646         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1647       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1648         return SDValue();  // no match.
1649     }
1650 
1651     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1652     // either constant or undef values that are identical for each chunk.  See
1653     // if these chunks can form into a larger vspltis*.
1654 
1655     // Check to see if all of the leading entries are either 0 or -1.  If
1656     // neither, then this won't fit into the immediate field.
1657     bool LeadingZero = true;
1658     bool LeadingOnes = true;
1659     for (unsigned i = 0; i != Multiple-1; ++i) {
1660       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1661 
1662       LeadingZero &= isNullConstant(UniquedVals[i]);
1663       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
1664     }
1665     // Finally, check the least significant entry.
1666     if (LeadingZero) {
1667       if (!UniquedVals[Multiple-1].getNode())
1668         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
1669       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1670       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
1671         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1672     }
1673     if (LeadingOnes) {
1674       if (!UniquedVals[Multiple-1].getNode())
1675         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1676       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1677       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1678         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1679     }
1680 
1681     return SDValue();
1682   }
1683 
1684   // Check to see if this buildvec has a single non-undef value in its elements.
1685   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1686     if (N->getOperand(i).isUndef()) continue;
1687     if (!OpVal.getNode())
1688       OpVal = N->getOperand(i);
1689     else if (OpVal != N->getOperand(i))
1690       return SDValue();
1691   }
1692 
1693   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1694 
1695   unsigned ValSizeInBytes = EltSize;
1696   uint64_t Value = 0;
1697   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1698     Value = CN->getZExtValue();
1699   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1700     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1701     Value = FloatToBits(CN->getValueAPF().convertToFloat());
1702   }
1703 
1704   // If the splat value is larger than the element value, then we can never do
1705   // this splat.  The only case that we could fit the replicated bits into our
1706   // immediate field for would be zero, and we prefer to use vxor for it.
1707   if (ValSizeInBytes < ByteSize) return SDValue();
1708 
1709   // If the element value is larger than the splat value, check if it consists
1710   // of a repeated bit pattern of size ByteSize.
1711   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
1712     return SDValue();
1713 
1714   // Properly sign extend the value.
1715   int MaskVal = SignExtend32(Value, ByteSize * 8);
1716 
1717   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1718   if (MaskVal == 0) return SDValue();
1719 
1720   // Finally, if this value fits in a 5 bit sext field, return it
1721   if (SignExtend32<5>(MaskVal) == MaskVal)
1722     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
1723   return SDValue();
1724 }
1725 
1726 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1727 /// amount, otherwise return -1.
1728 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
1729   EVT VT = N->getValueType(0);
1730   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
1731     return -1;
1732 
1733   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1734 
1735   // Find the first non-undef value in the shuffle mask.
1736   unsigned i;
1737   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
1738     /*search*/;
1739 
1740   if (i == 4) return -1;  // all undef.
1741 
1742   // Otherwise, check to see if the rest of the elements are consecutively
1743   // numbered from this value.
1744   unsigned ShiftAmt = SVOp->getMaskElt(i);
1745   if (ShiftAmt < i) return -1;
1746   ShiftAmt -= i;
1747 
1748   // Check the rest of the elements to see if they are consecutive.
1749   for (++i; i != 4; ++i)
1750     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1751       return -1;
1752 
1753   return ShiftAmt;
1754 }
1755 
1756 //===----------------------------------------------------------------------===//
1757 //  Addressing Mode Selection
1758 //===----------------------------------------------------------------------===//
1759 
1760 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
1761 /// or 64-bit immediate, and if the value can be accurately represented as a
1762 /// sign extension from a 16-bit value.  If so, this returns true and the
1763 /// immediate.
1764 static bool isIntS16Immediate(SDNode *N, short &Imm) {
1765   if (!isa<ConstantSDNode>(N))
1766     return false;
1767 
1768   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
1769   if (N->getValueType(0) == MVT::i32)
1770     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
1771   else
1772     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
1773 }
1774 static bool isIntS16Immediate(SDValue Op, short &Imm) {
1775   return isIntS16Immediate(Op.getNode(), Imm);
1776 }
1777 
1778 /// SelectAddressRegReg - Given the specified addressed, check to see if it
1779 /// can be represented as an indexed [r+r] operation.  Returns false if it
1780 /// can be more efficiently represented with [r+imm].
1781 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
1782                                             SDValue &Index,
1783                                             SelectionDAG &DAG) const {
1784   short imm = 0;
1785   if (N.getOpcode() == ISD::ADD) {
1786     if (isIntS16Immediate(N.getOperand(1), imm))
1787       return false;    // r+i
1788     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
1789       return false;    // r+i
1790 
1791     Base = N.getOperand(0);
1792     Index = N.getOperand(1);
1793     return true;
1794   } else if (N.getOpcode() == ISD::OR) {
1795     if (isIntS16Immediate(N.getOperand(1), imm))
1796       return false;    // r+i can fold it if we can.
1797 
1798     // If this is an or of disjoint bitfields, we can codegen this as an add
1799     // (for better address arithmetic) if the LHS and RHS of the OR are provably
1800     // disjoint.
1801     APInt LHSKnownZero, LHSKnownOne;
1802     APInt RHSKnownZero, RHSKnownOne;
1803     DAG.computeKnownBits(N.getOperand(0),
1804                          LHSKnownZero, LHSKnownOne);
1805 
1806     if (LHSKnownZero.getBoolValue()) {
1807       DAG.computeKnownBits(N.getOperand(1),
1808                            RHSKnownZero, RHSKnownOne);
1809       // If all of the bits are known zero on the LHS or RHS, the add won't
1810       // carry.
1811       if (~(LHSKnownZero | RHSKnownZero) == 0) {
1812         Base = N.getOperand(0);
1813         Index = N.getOperand(1);
1814         return true;
1815       }
1816     }
1817   }
1818 
1819   return false;
1820 }
1821 
1822 // If we happen to be doing an i64 load or store into a stack slot that has
1823 // less than a 4-byte alignment, then the frame-index elimination may need to
1824 // use an indexed load or store instruction (because the offset may not be a
1825 // multiple of 4). The extra register needed to hold the offset comes from the
1826 // register scavenger, and it is possible that the scavenger will need to use
1827 // an emergency spill slot. As a result, we need to make sure that a spill slot
1828 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
1829 // stack slot.
1830 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
1831   // FIXME: This does not handle the LWA case.
1832   if (VT != MVT::i64)
1833     return;
1834 
1835   // NOTE: We'll exclude negative FIs here, which come from argument
1836   // lowering, because there are no known test cases triggering this problem
1837   // using packed structures (or similar). We can remove this exclusion if
1838   // we find such a test case. The reason why this is so test-case driven is
1839   // because this entire 'fixup' is only to prevent crashes (from the
1840   // register scavenger) on not-really-valid inputs. For example, if we have:
1841   //   %a = alloca i1
1842   //   %b = bitcast i1* %a to i64*
1843   //   store i64* a, i64 b
1844   // then the store should really be marked as 'align 1', but is not. If it
1845   // were marked as 'align 1' then the indexed form would have been
1846   // instruction-selected initially, and the problem this 'fixup' is preventing
1847   // won't happen regardless.
1848   if (FrameIdx < 0)
1849     return;
1850 
1851   MachineFunction &MF = DAG.getMachineFunction();
1852   MachineFrameInfo &MFI = MF.getFrameInfo();
1853 
1854   unsigned Align = MFI.getObjectAlignment(FrameIdx);
1855   if (Align >= 4)
1856     return;
1857 
1858   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1859   FuncInfo->setHasNonRISpills();
1860 }
1861 
1862 /// Returns true if the address N can be represented by a base register plus
1863 /// a signed 16-bit displacement [r+imm], and if it is not better
1864 /// represented as reg+reg.  If Aligned is true, only accept displacements
1865 /// suitable for STD and friends, i.e. multiples of 4.
1866 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
1867                                             SDValue &Base,
1868                                             SelectionDAG &DAG,
1869                                             bool Aligned) const {
1870   // FIXME dl should come from parent load or store, not from address
1871   SDLoc dl(N);
1872   // If this can be more profitably realized as r+r, fail.
1873   if (SelectAddressRegReg(N, Disp, Base, DAG))
1874     return false;
1875 
1876   if (N.getOpcode() == ISD::ADD) {
1877     short imm = 0;
1878     if (isIntS16Immediate(N.getOperand(1), imm) &&
1879         (!Aligned || (imm & 3) == 0)) {
1880       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1881       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1882         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1883         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1884       } else {
1885         Base = N.getOperand(0);
1886       }
1887       return true; // [r+i]
1888     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1889       // Match LOAD (ADD (X, Lo(G))).
1890       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1891              && "Cannot handle constant offsets yet!");
1892       Disp = N.getOperand(1).getOperand(0);  // The global address.
1893       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1894              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1895              Disp.getOpcode() == ISD::TargetConstantPool ||
1896              Disp.getOpcode() == ISD::TargetJumpTable);
1897       Base = N.getOperand(0);
1898       return true;  // [&g+r]
1899     }
1900   } else if (N.getOpcode() == ISD::OR) {
1901     short imm = 0;
1902     if (isIntS16Immediate(N.getOperand(1), imm) &&
1903         (!Aligned || (imm & 3) == 0)) {
1904       // If this is an or of disjoint bitfields, we can codegen this as an add
1905       // (for better address arithmetic) if the LHS and RHS of the OR are
1906       // provably disjoint.
1907       APInt LHSKnownZero, LHSKnownOne;
1908       DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1909 
1910       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1911         // If all of the bits are known zero on the LHS or RHS, the add won't
1912         // carry.
1913         if (FrameIndexSDNode *FI =
1914               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1915           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1916           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1917         } else {
1918           Base = N.getOperand(0);
1919         }
1920         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1921         return true;
1922       }
1923     }
1924   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1925     // Loading from a constant address.
1926 
1927     // If this address fits entirely in a 16-bit sext immediate field, codegen
1928     // this as "d, 0"
1929     short Imm;
1930     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
1931       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
1932       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1933                              CN->getValueType(0));
1934       return true;
1935     }
1936 
1937     // Handle 32-bit sext immediates with LIS + addr mode.
1938     if ((CN->getValueType(0) == MVT::i32 ||
1939          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
1940         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
1941       int Addr = (int)CN->getZExtValue();
1942 
1943       // Otherwise, break this down into an LIS + disp.
1944       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
1945 
1946       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
1947                                    MVT::i32);
1948       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1949       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1950       return true;
1951     }
1952   }
1953 
1954   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
1955   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
1956     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1957     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1958   } else
1959     Base = N;
1960   return true;      // [r+0]
1961 }
1962 
1963 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1964 /// represented as an indexed [r+r] operation.
1965 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
1966                                                 SDValue &Index,
1967                                                 SelectionDAG &DAG) const {
1968   // Check to see if we can easily represent this as an [r+r] address.  This
1969   // will fail if it thinks that the address is more profitably represented as
1970   // reg+imm, e.g. where imm = 0.
1971   if (SelectAddressRegReg(N, Base, Index, DAG))
1972     return true;
1973 
1974   // If the operand is an addition, always emit this as [r+r], since this is
1975   // better (for code size, and execution, as the memop does the add for free)
1976   // than emitting an explicit add.
1977   if (N.getOpcode() == ISD::ADD) {
1978     Base = N.getOperand(0);
1979     Index = N.getOperand(1);
1980     return true;
1981   }
1982 
1983   // Otherwise, do it the hard way, using R0 as the base register.
1984   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1985                          N.getValueType());
1986   Index = N;
1987   return true;
1988 }
1989 
1990 /// getPreIndexedAddressParts - returns true by value, base pointer and
1991 /// offset pointer and addressing mode by reference if the node's address
1992 /// can be legally represented as pre-indexed load / store address.
1993 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1994                                                   SDValue &Offset,
1995                                                   ISD::MemIndexedMode &AM,
1996                                                   SelectionDAG &DAG) const {
1997   if (DisablePPCPreinc) return false;
1998 
1999   bool isLoad = true;
2000   SDValue Ptr;
2001   EVT VT;
2002   unsigned Alignment;
2003   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2004     Ptr = LD->getBasePtr();
2005     VT = LD->getMemoryVT();
2006     Alignment = LD->getAlignment();
2007   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2008     Ptr = ST->getBasePtr();
2009     VT  = ST->getMemoryVT();
2010     Alignment = ST->getAlignment();
2011     isLoad = false;
2012   } else
2013     return false;
2014 
2015   // PowerPC doesn't have preinc load/store instructions for vectors (except
2016   // for QPX, which does have preinc r+r forms).
2017   if (VT.isVector()) {
2018     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2019       return false;
2020     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2021       AM = ISD::PRE_INC;
2022       return true;
2023     }
2024   }
2025 
2026   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2027 
2028     // Common code will reject creating a pre-inc form if the base pointer
2029     // is a frame index, or if N is a store and the base pointer is either
2030     // the same as or a predecessor of the value being stored.  Check for
2031     // those situations here, and try with swapped Base/Offset instead.
2032     bool Swap = false;
2033 
2034     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2035       Swap = true;
2036     else if (!isLoad) {
2037       SDValue Val = cast<StoreSDNode>(N)->getValue();
2038       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2039         Swap = true;
2040     }
2041 
2042     if (Swap)
2043       std::swap(Base, Offset);
2044 
2045     AM = ISD::PRE_INC;
2046     return true;
2047   }
2048 
2049   // LDU/STU can only handle immediates that are a multiple of 4.
2050   if (VT != MVT::i64) {
2051     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
2052       return false;
2053   } else {
2054     // LDU/STU need an address with at least 4-byte alignment.
2055     if (Alignment < 4)
2056       return false;
2057 
2058     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
2059       return false;
2060   }
2061 
2062   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2063     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2064     // sext i32 to i64 when addr mode is r+i.
2065     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2066         LD->getExtensionType() == ISD::SEXTLOAD &&
2067         isa<ConstantSDNode>(Offset))
2068       return false;
2069   }
2070 
2071   AM = ISD::PRE_INC;
2072   return true;
2073 }
2074 
2075 //===----------------------------------------------------------------------===//
2076 //  LowerOperation implementation
2077 //===----------------------------------------------------------------------===//
2078 
2079 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2080 /// and LoOpFlags to the target MO flags.
2081 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2082                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2083                                const GlobalValue *GV = nullptr) {
2084   HiOpFlags = PPCII::MO_HA;
2085   LoOpFlags = PPCII::MO_LO;
2086 
2087   // Don't use the pic base if not in PIC relocation model.
2088   if (IsPIC) {
2089     HiOpFlags |= PPCII::MO_PIC_FLAG;
2090     LoOpFlags |= PPCII::MO_PIC_FLAG;
2091   }
2092 
2093   // If this is a reference to a global value that requires a non-lazy-ptr, make
2094   // sure that instruction lowering adds it.
2095   if (GV && Subtarget.hasLazyResolverStub(GV)) {
2096     HiOpFlags |= PPCII::MO_NLP_FLAG;
2097     LoOpFlags |= PPCII::MO_NLP_FLAG;
2098 
2099     if (GV->hasHiddenVisibility()) {
2100       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2101       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2102     }
2103   }
2104 }
2105 
2106 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2107                              SelectionDAG &DAG) {
2108   SDLoc DL(HiPart);
2109   EVT PtrVT = HiPart.getValueType();
2110   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2111 
2112   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2113   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2114 
2115   // With PIC, the first instruction is actually "GR+hi(&G)".
2116   if (isPIC)
2117     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2118                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2119 
2120   // Generate non-pic code that has direct accesses to the constant pool.
2121   // The address of the global is just (hi(&g)+lo(&g)).
2122   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2123 }
2124 
2125 static void setUsesTOCBasePtr(MachineFunction &MF) {
2126   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2127   FuncInfo->setUsesTOCBasePtr();
2128 }
2129 
2130 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2131   setUsesTOCBasePtr(DAG.getMachineFunction());
2132 }
2133 
2134 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2135                            SDValue GA) {
2136   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2137   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2138                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2139 
2140   SDValue Ops[] = { GA, Reg };
2141   return DAG.getMemIntrinsicNode(
2142       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2143       MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
2144       false, 0);
2145 }
2146 
2147 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2148                                              SelectionDAG &DAG) const {
2149   EVT PtrVT = Op.getValueType();
2150   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2151   const Constant *C = CP->getConstVal();
2152 
2153   // 64-bit SVR4 ABI code is always position-independent.
2154   // The actual address of the GlobalValue is stored in the TOC.
2155   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2156     setUsesTOCBasePtr(DAG);
2157     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2158     return getTOCEntry(DAG, SDLoc(CP), true, GA);
2159   }
2160 
2161   unsigned MOHiFlag, MOLoFlag;
2162   bool IsPIC = isPositionIndependent();
2163   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2164 
2165   if (IsPIC && Subtarget.isSVR4ABI()) {
2166     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2167                                            PPCII::MO_PIC_FLAG);
2168     return getTOCEntry(DAG, SDLoc(CP), false, GA);
2169   }
2170 
2171   SDValue CPIHi =
2172     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2173   SDValue CPILo =
2174     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2175   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2176 }
2177 
2178 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2179   EVT PtrVT = Op.getValueType();
2180   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2181 
2182   // 64-bit SVR4 ABI code is always position-independent.
2183   // The actual address of the GlobalValue is stored in the TOC.
2184   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2185     setUsesTOCBasePtr(DAG);
2186     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2187     return getTOCEntry(DAG, SDLoc(JT), true, GA);
2188   }
2189 
2190   unsigned MOHiFlag, MOLoFlag;
2191   bool IsPIC = isPositionIndependent();
2192   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2193 
2194   if (IsPIC && Subtarget.isSVR4ABI()) {
2195     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2196                                         PPCII::MO_PIC_FLAG);
2197     return getTOCEntry(DAG, SDLoc(GA), false, GA);
2198   }
2199 
2200   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2201   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2202   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2203 }
2204 
2205 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2206                                              SelectionDAG &DAG) const {
2207   EVT PtrVT = Op.getValueType();
2208   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2209   const BlockAddress *BA = BASDN->getBlockAddress();
2210 
2211   // 64-bit SVR4 ABI code is always position-independent.
2212   // The actual BlockAddress is stored in the TOC.
2213   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2214     setUsesTOCBasePtr(DAG);
2215     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2216     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
2217   }
2218 
2219   unsigned MOHiFlag, MOLoFlag;
2220   bool IsPIC = isPositionIndependent();
2221   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2222   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2223   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2224   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2225 }
2226 
2227 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2228                                               SelectionDAG &DAG) const {
2229 
2230   // FIXME: TLS addresses currently use medium model code sequences,
2231   // which is the most useful form.  Eventually support for small and
2232   // large models could be added if users need it, at the cost of
2233   // additional complexity.
2234   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2235   if (DAG.getTarget().Options.EmulatedTLS)
2236     return LowerToTLSEmulatedModel(GA, DAG);
2237 
2238   SDLoc dl(GA);
2239   const GlobalValue *GV = GA->getGlobal();
2240   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2241   bool is64bit = Subtarget.isPPC64();
2242   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2243   PICLevel::Level picLevel = M->getPICLevel();
2244 
2245   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
2246 
2247   if (Model == TLSModel::LocalExec) {
2248     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2249                                                PPCII::MO_TPREL_HA);
2250     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2251                                                PPCII::MO_TPREL_LO);
2252     SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
2253                                      is64bit ? MVT::i64 : MVT::i32);
2254     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2255     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2256   }
2257 
2258   if (Model == TLSModel::InitialExec) {
2259     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2260     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2261                                                 PPCII::MO_TLS);
2262     SDValue GOTPtr;
2263     if (is64bit) {
2264       setUsesTOCBasePtr(DAG);
2265       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2266       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2267                            PtrVT, GOTReg, TGA);
2268     } else
2269       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2270     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2271                                    PtrVT, TGA, GOTPtr);
2272     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2273   }
2274 
2275   if (Model == TLSModel::GeneralDynamic) {
2276     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2277     SDValue GOTPtr;
2278     if (is64bit) {
2279       setUsesTOCBasePtr(DAG);
2280       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2281       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2282                                    GOTReg, TGA);
2283     } else {
2284       if (picLevel == PICLevel::SmallPIC)
2285         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2286       else
2287         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2288     }
2289     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2290                        GOTPtr, TGA, TGA);
2291   }
2292 
2293   if (Model == TLSModel::LocalDynamic) {
2294     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2295     SDValue GOTPtr;
2296     if (is64bit) {
2297       setUsesTOCBasePtr(DAG);
2298       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2299       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2300                            GOTReg, TGA);
2301     } else {
2302       if (picLevel == PICLevel::SmallPIC)
2303         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2304       else
2305         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2306     }
2307     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2308                                   PtrVT, GOTPtr, TGA, TGA);
2309     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2310                                       PtrVT, TLSAddr, TGA);
2311     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2312   }
2313 
2314   llvm_unreachable("Unknown TLS model!");
2315 }
2316 
2317 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2318                                               SelectionDAG &DAG) const {
2319   EVT PtrVT = Op.getValueType();
2320   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2321   SDLoc DL(GSDN);
2322   const GlobalValue *GV = GSDN->getGlobal();
2323 
2324   // 64-bit SVR4 ABI code is always position-independent.
2325   // The actual address of the GlobalValue is stored in the TOC.
2326   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2327     setUsesTOCBasePtr(DAG);
2328     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2329     return getTOCEntry(DAG, DL, true, GA);
2330   }
2331 
2332   unsigned MOHiFlag, MOLoFlag;
2333   bool IsPIC = isPositionIndependent();
2334   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2335 
2336   if (IsPIC && Subtarget.isSVR4ABI()) {
2337     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2338                                             GSDN->getOffset(),
2339                                             PPCII::MO_PIC_FLAG);
2340     return getTOCEntry(DAG, DL, false, GA);
2341   }
2342 
2343   SDValue GAHi =
2344     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2345   SDValue GALo =
2346     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2347 
2348   SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2349 
2350   // If the global reference is actually to a non-lazy-pointer, we have to do an
2351   // extra load to get the address of the global.
2352   if (MOHiFlag & PPCII::MO_NLP_FLAG)
2353     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2354   return Ptr;
2355 }
2356 
2357 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2358   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2359   SDLoc dl(Op);
2360 
2361   if (Op.getValueType() == MVT::v2i64) {
2362     // When the operands themselves are v2i64 values, we need to do something
2363     // special because VSX has no underlying comparison operations for these.
2364     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2365       // Equality can be handled by casting to the legal type for Altivec
2366       // comparisons, everything else needs to be expanded.
2367       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2368         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2369                  DAG.getSetCC(dl, MVT::v4i32,
2370                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2371                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2372                    CC));
2373       }
2374 
2375       return SDValue();
2376     }
2377 
2378     // We handle most of these in the usual way.
2379     return Op;
2380   }
2381 
2382   // If we're comparing for equality to zero, expose the fact that this is
2383   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2384   // fold the new nodes.
2385   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2386     return V;
2387 
2388   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2389     // Leave comparisons against 0 and -1 alone for now, since they're usually
2390     // optimized.  FIXME: revisit this when we can custom lower all setcc
2391     // optimizations.
2392     if (C->isAllOnesValue() || C->isNullValue())
2393       return SDValue();
2394   }
2395 
2396   // If we have an integer seteq/setne, turn it into a compare against zero
2397   // by xor'ing the rhs with the lhs, which is faster than setting a
2398   // condition register, reading it back out, and masking the correct bit.  The
2399   // normal approach here uses sub to do this instead of xor.  Using xor exposes
2400   // the result to other bit-twiddling opportunities.
2401   EVT LHSVT = Op.getOperand(0).getValueType();
2402   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2403     EVT VT = Op.getValueType();
2404     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2405                                 Op.getOperand(1));
2406     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2407   }
2408   return SDValue();
2409 }
2410 
2411 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2412   SDNode *Node = Op.getNode();
2413   EVT VT = Node->getValueType(0);
2414   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2415   SDValue InChain = Node->getOperand(0);
2416   SDValue VAListPtr = Node->getOperand(1);
2417   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2418   SDLoc dl(Node);
2419 
2420   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2421 
2422   // gpr_index
2423   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2424                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
2425   InChain = GprIndex.getValue(1);
2426 
2427   if (VT == MVT::i64) {
2428     // Check if GprIndex is even
2429     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2430                                  DAG.getConstant(1, dl, MVT::i32));
2431     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2432                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2433     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2434                                           DAG.getConstant(1, dl, MVT::i32));
2435     // Align GprIndex to be even if it isn't
2436     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2437                            GprIndex);
2438   }
2439 
2440   // fpr index is 1 byte after gpr
2441   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2442                                DAG.getConstant(1, dl, MVT::i32));
2443 
2444   // fpr
2445   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2446                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
2447   InChain = FprIndex.getValue(1);
2448 
2449   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2450                                        DAG.getConstant(8, dl, MVT::i32));
2451 
2452   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2453                                         DAG.getConstant(4, dl, MVT::i32));
2454 
2455   // areas
2456   SDValue OverflowArea =
2457       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
2458   InChain = OverflowArea.getValue(1);
2459 
2460   SDValue RegSaveArea =
2461       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
2462   InChain = RegSaveArea.getValue(1);
2463 
2464   // select overflow_area if index > 8
2465   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2466                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2467 
2468   // adjustment constant gpr_index * 4/8
2469   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2470                                     VT.isInteger() ? GprIndex : FprIndex,
2471                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2472                                                     MVT::i32));
2473 
2474   // OurReg = RegSaveArea + RegConstant
2475   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2476                                RegConstant);
2477 
2478   // Floating types are 32 bytes into RegSaveArea
2479   if (VT.isFloatingPoint())
2480     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2481                          DAG.getConstant(32, dl, MVT::i32));
2482 
2483   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2484   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2485                                    VT.isInteger() ? GprIndex : FprIndex,
2486                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2487                                                    MVT::i32));
2488 
2489   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2490                               VT.isInteger() ? VAListPtr : FprPtr,
2491                               MachinePointerInfo(SV), MVT::i8);
2492 
2493   // determine if we should load from reg_save_area or overflow_area
2494   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2495 
2496   // increase overflow_area by 4/8 if gpr/fpr > 8
2497   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2498                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
2499                                           dl, MVT::i32));
2500 
2501   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2502                              OverflowAreaPlusN);
2503 
2504   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
2505                               MachinePointerInfo(), MVT::i32);
2506 
2507   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
2508 }
2509 
2510 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
2511   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2512 
2513   // We have to copy the entire va_list struct:
2514   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2515   return DAG.getMemcpy(Op.getOperand(0), Op,
2516                        Op.getOperand(1), Op.getOperand(2),
2517                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2518                        false, MachinePointerInfo(), MachinePointerInfo());
2519 }
2520 
2521 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2522                                                   SelectionDAG &DAG) const {
2523   return Op.getOperand(0);
2524 }
2525 
2526 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2527                                                 SelectionDAG &DAG) const {
2528   SDValue Chain = Op.getOperand(0);
2529   SDValue Trmp = Op.getOperand(1); // trampoline
2530   SDValue FPtr = Op.getOperand(2); // nested function
2531   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2532   SDLoc dl(Op);
2533 
2534   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2535   bool isPPC64 = (PtrVT == MVT::i64);
2536   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
2537 
2538   TargetLowering::ArgListTy Args;
2539   TargetLowering::ArgListEntry Entry;
2540 
2541   Entry.Ty = IntPtrTy;
2542   Entry.Node = Trmp; Args.push_back(Entry);
2543 
2544   // TrampSize == (isPPC64 ? 48 : 40);
2545   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
2546                                isPPC64 ? MVT::i64 : MVT::i32);
2547   Args.push_back(Entry);
2548 
2549   Entry.Node = FPtr; Args.push_back(Entry);
2550   Entry.Node = Nest; Args.push_back(Entry);
2551 
2552   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2553   TargetLowering::CallLoweringInfo CLI(DAG);
2554   CLI.setDebugLoc(dl).setChain(Chain)
2555     .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2556                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
2557                std::move(Args));
2558 
2559   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2560   return CallResult.second;
2561 }
2562 
2563 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2564   MachineFunction &MF = DAG.getMachineFunction();
2565   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2566   EVT PtrVT = getPointerTy(MF.getDataLayout());
2567 
2568   SDLoc dl(Op);
2569 
2570   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2571     // vastart just stores the address of the VarArgsFrameIndex slot into the
2572     // memory location argument.
2573     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2574     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2575     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2576                         MachinePointerInfo(SV));
2577   }
2578 
2579   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2580   // We suppose the given va_list is already allocated.
2581   //
2582   // typedef struct {
2583   //  char gpr;     /* index into the array of 8 GPRs
2584   //                 * stored in the register save area
2585   //                 * gpr=0 corresponds to r3,
2586   //                 * gpr=1 to r4, etc.
2587   //                 */
2588   //  char fpr;     /* index into the array of 8 FPRs
2589   //                 * stored in the register save area
2590   //                 * fpr=0 corresponds to f1,
2591   //                 * fpr=1 to f2, etc.
2592   //                 */
2593   //  char *overflow_arg_area;
2594   //                /* location on stack that holds
2595   //                 * the next overflow argument
2596   //                 */
2597   //  char *reg_save_area;
2598   //               /* where r3:r10 and f1:f8 (if saved)
2599   //                * are stored
2600   //                */
2601   // } va_list[1];
2602 
2603   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2604   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2605   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2606                                             PtrVT);
2607   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2608                                  PtrVT);
2609 
2610   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2611   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2612 
2613   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2614   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2615 
2616   uint64_t FPROffset = 1;
2617   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2618 
2619   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2620 
2621   // Store first byte : number of int regs
2622   SDValue firstStore =
2623       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
2624                         MachinePointerInfo(SV), MVT::i8);
2625   uint64_t nextOffset = FPROffset;
2626   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2627                                   ConstFPROffset);
2628 
2629   // Store second byte : number of float regs
2630   SDValue secondStore =
2631       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2632                         MachinePointerInfo(SV, nextOffset), MVT::i8);
2633   nextOffset += StackOffset;
2634   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2635 
2636   // Store second word : arguments given on stack
2637   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2638                                     MachinePointerInfo(SV, nextOffset));
2639   nextOffset += FrameOffset;
2640   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2641 
2642   // Store third word : arguments given in registers
2643   return DAG.getStore(thirdStore, dl, FR, nextPtr,
2644                       MachinePointerInfo(SV, nextOffset));
2645 }
2646 
2647 #include "PPCGenCallingConv.inc"
2648 
2649 // Function whose sole purpose is to kill compiler warnings
2650 // stemming from unused functions included from PPCGenCallingConv.inc.
2651 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2652   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2653 }
2654 
2655 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2656                                       CCValAssign::LocInfo &LocInfo,
2657                                       ISD::ArgFlagsTy &ArgFlags,
2658                                       CCState &State) {
2659   return true;
2660 }
2661 
2662 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2663                                              MVT &LocVT,
2664                                              CCValAssign::LocInfo &LocInfo,
2665                                              ISD::ArgFlagsTy &ArgFlags,
2666                                              CCState &State) {
2667   static const MCPhysReg ArgRegs[] = {
2668     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2669     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2670   };
2671   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2672 
2673   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2674 
2675   // Skip one register if the first unallocated register has an even register
2676   // number and there are still argument registers available which have not been
2677   // allocated yet. RegNum is actually an index into ArgRegs, which means we
2678   // need to skip a register if RegNum is odd.
2679   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2680     State.AllocateReg(ArgRegs[RegNum]);
2681   }
2682 
2683   // Always return false here, as this function only makes sure that the first
2684   // unallocated register has an odd register number and does not actually
2685   // allocate a register for the current argument.
2686   return false;
2687 }
2688 
2689 bool
2690 llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
2691                                                   MVT &LocVT,
2692                                                   CCValAssign::LocInfo &LocInfo,
2693                                                   ISD::ArgFlagsTy &ArgFlags,
2694                                                   CCState &State) {
2695   static const MCPhysReg ArgRegs[] = {
2696     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2697     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2698   };
2699   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2700 
2701   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2702   int RegsLeft = NumArgRegs - RegNum;
2703 
2704   // Skip if there is not enough registers left for long double type (4 gpr regs
2705   // in soft float mode) and put long double argument on the stack.
2706   if (RegNum != NumArgRegs && RegsLeft < 4) {
2707     for (int i = 0; i < RegsLeft; i++) {
2708       State.AllocateReg(ArgRegs[RegNum + i]);
2709     }
2710   }
2711 
2712   return false;
2713 }
2714 
2715 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
2716                                                MVT &LocVT,
2717                                                CCValAssign::LocInfo &LocInfo,
2718                                                ISD::ArgFlagsTy &ArgFlags,
2719                                                CCState &State) {
2720   static const MCPhysReg ArgRegs[] = {
2721     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2722     PPC::F8
2723   };
2724 
2725   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2726 
2727   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2728 
2729   // If there is only one Floating-point register left we need to put both f64
2730   // values of a split ppc_fp128 value on the stack.
2731   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
2732     State.AllocateReg(ArgRegs[RegNum]);
2733   }
2734 
2735   // Always return false here, as this function only makes sure that the two f64
2736   // values a ppc_fp128 value is split into are both passed in registers or both
2737   // passed on the stack and does not actually allocate a register for the
2738   // current argument.
2739   return false;
2740 }
2741 
2742 /// FPR - The set of FP registers that should be allocated for arguments,
2743 /// on Darwin.
2744 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
2745                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
2746                                 PPC::F11, PPC::F12, PPC::F13};
2747 
2748 /// QFPR - The set of QPX registers that should be allocated for arguments.
2749 static const MCPhysReg QFPR[] = {
2750     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
2751     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
2752 
2753 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
2754 /// the stack.
2755 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
2756                                        unsigned PtrByteSize) {
2757   unsigned ArgSize = ArgVT.getStoreSize();
2758   if (Flags.isByVal())
2759     ArgSize = Flags.getByValSize();
2760 
2761   // Round up to multiples of the pointer size, except for array members,
2762   // which are always packed.
2763   if (!Flags.isInConsecutiveRegs())
2764     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2765 
2766   return ArgSize;
2767 }
2768 
2769 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
2770 /// on the stack.
2771 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
2772                                             ISD::ArgFlagsTy Flags,
2773                                             unsigned PtrByteSize) {
2774   unsigned Align = PtrByteSize;
2775 
2776   // Altivec parameters are padded to a 16 byte boundary.
2777   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2778       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2779       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2780       ArgVT == MVT::v1i128)
2781     Align = 16;
2782   // QPX vector types stored in double-precision are padded to a 32 byte
2783   // boundary.
2784   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
2785     Align = 32;
2786 
2787   // ByVal parameters are aligned as requested.
2788   if (Flags.isByVal()) {
2789     unsigned BVAlign = Flags.getByValAlign();
2790     if (BVAlign > PtrByteSize) {
2791       if (BVAlign % PtrByteSize != 0)
2792           llvm_unreachable(
2793             "ByVal alignment is not a multiple of the pointer size");
2794 
2795       Align = BVAlign;
2796     }
2797   }
2798 
2799   // Array members are always packed to their original alignment.
2800   if (Flags.isInConsecutiveRegs()) {
2801     // If the array member was split into multiple registers, the first
2802     // needs to be aligned to the size of the full type.  (Except for
2803     // ppcf128, which is only aligned as its f64 components.)
2804     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
2805       Align = OrigVT.getStoreSize();
2806     else
2807       Align = ArgVT.getStoreSize();
2808   }
2809 
2810   return Align;
2811 }
2812 
2813 /// CalculateStackSlotUsed - Return whether this argument will use its
2814 /// stack slot (instead of being passed in registers).  ArgOffset,
2815 /// AvailableFPRs, and AvailableVRs must hold the current argument
2816 /// position, and will be updated to account for this argument.
2817 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
2818                                    ISD::ArgFlagsTy Flags,
2819                                    unsigned PtrByteSize,
2820                                    unsigned LinkageSize,
2821                                    unsigned ParamAreaSize,
2822                                    unsigned &ArgOffset,
2823                                    unsigned &AvailableFPRs,
2824                                    unsigned &AvailableVRs, bool HasQPX) {
2825   bool UseMemory = false;
2826 
2827   // Respect alignment of argument on the stack.
2828   unsigned Align =
2829     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
2830   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2831   // If there's no space left in the argument save area, we must
2832   // use memory (this check also catches zero-sized arguments).
2833   if (ArgOffset >= LinkageSize + ParamAreaSize)
2834     UseMemory = true;
2835 
2836   // Allocate argument on the stack.
2837   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2838   if (Flags.isInConsecutiveRegsLast())
2839     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2840   // If we overran the argument save area, we must use memory
2841   // (this check catches arguments passed partially in memory)
2842   if (ArgOffset > LinkageSize + ParamAreaSize)
2843     UseMemory = true;
2844 
2845   // However, if the argument is actually passed in an FPR or a VR,
2846   // we don't use memory after all.
2847   if (!Flags.isByVal()) {
2848     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
2849         // QPX registers overlap with the scalar FP registers.
2850         (HasQPX && (ArgVT == MVT::v4f32 ||
2851                     ArgVT == MVT::v4f64 ||
2852                     ArgVT == MVT::v4i1)))
2853       if (AvailableFPRs > 0) {
2854         --AvailableFPRs;
2855         return false;
2856       }
2857     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2858         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2859         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2860         ArgVT == MVT::v1i128)
2861       if (AvailableVRs > 0) {
2862         --AvailableVRs;
2863         return false;
2864       }
2865   }
2866 
2867   return UseMemory;
2868 }
2869 
2870 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
2871 /// ensure minimum alignment required for target.
2872 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
2873                                      unsigned NumBytes) {
2874   unsigned TargetAlign = Lowering->getStackAlignment();
2875   unsigned AlignMask = TargetAlign - 1;
2876   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2877   return NumBytes;
2878 }
2879 
2880 SDValue PPCTargetLowering::LowerFormalArguments(
2881     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2882     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2883     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2884   if (Subtarget.isSVR4ABI()) {
2885     if (Subtarget.isPPC64())
2886       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
2887                                          dl, DAG, InVals);
2888     else
2889       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
2890                                          dl, DAG, InVals);
2891   } else {
2892     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
2893                                        dl, DAG, InVals);
2894   }
2895 }
2896 
2897 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
2898     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2899     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2900     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2901 
2902   // 32-bit SVR4 ABI Stack Frame Layout:
2903   //              +-----------------------------------+
2904   //        +-->  |            Back chain             |
2905   //        |     +-----------------------------------+
2906   //        |     | Floating-point register save area |
2907   //        |     +-----------------------------------+
2908   //        |     |    General register save area     |
2909   //        |     +-----------------------------------+
2910   //        |     |          CR save word             |
2911   //        |     +-----------------------------------+
2912   //        |     |         VRSAVE save word          |
2913   //        |     +-----------------------------------+
2914   //        |     |         Alignment padding         |
2915   //        |     +-----------------------------------+
2916   //        |     |     Vector register save area     |
2917   //        |     +-----------------------------------+
2918   //        |     |       Local variable space        |
2919   //        |     +-----------------------------------+
2920   //        |     |        Parameter list area        |
2921   //        |     +-----------------------------------+
2922   //        |     |           LR save word            |
2923   //        |     +-----------------------------------+
2924   // SP-->  +---  |            Back chain             |
2925   //              +-----------------------------------+
2926   //
2927   // Specifications:
2928   //   System V Application Binary Interface PowerPC Processor Supplement
2929   //   AltiVec Technology Programming Interface Manual
2930 
2931   MachineFunction &MF = DAG.getMachineFunction();
2932   MachineFrameInfo &MFI = MF.getFrameInfo();
2933   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2934 
2935   EVT PtrVT = getPointerTy(MF.getDataLayout());
2936   // Potential tail calls could cause overwriting of argument stack slots.
2937   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2938                        (CallConv == CallingConv::Fast));
2939   unsigned PtrByteSize = 4;
2940 
2941   // Assign locations to all of the incoming arguments.
2942   SmallVector<CCValAssign, 16> ArgLocs;
2943   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2944                  *DAG.getContext());
2945 
2946   // Reserve space for the linkage area on the stack.
2947   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
2948   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
2949   if (useSoftFloat())
2950     CCInfo.PreAnalyzeFormalArguments(Ins);
2951 
2952   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
2953   CCInfo.clearWasPPCF128();
2954 
2955   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2956     CCValAssign &VA = ArgLocs[i];
2957 
2958     // Arguments stored in registers.
2959     if (VA.isRegLoc()) {
2960       const TargetRegisterClass *RC;
2961       EVT ValVT = VA.getValVT();
2962 
2963       switch (ValVT.getSimpleVT().SimpleTy) {
2964         default:
2965           llvm_unreachable("ValVT not supported by formal arguments Lowering");
2966         case MVT::i1:
2967         case MVT::i32:
2968           RC = &PPC::GPRCRegClass;
2969           break;
2970         case MVT::f32:
2971           if (Subtarget.hasP8Vector())
2972             RC = &PPC::VSSRCRegClass;
2973           else
2974             RC = &PPC::F4RCRegClass;
2975           break;
2976         case MVT::f64:
2977           if (Subtarget.hasVSX())
2978             RC = &PPC::VSFRCRegClass;
2979           else
2980             RC = &PPC::F8RCRegClass;
2981           break;
2982         case MVT::v16i8:
2983         case MVT::v8i16:
2984         case MVT::v4i32:
2985           RC = &PPC::VRRCRegClass;
2986           break;
2987         case MVT::v4f32:
2988           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
2989           break;
2990         case MVT::v2f64:
2991         case MVT::v2i64:
2992           RC = &PPC::VRRCRegClass;
2993           break;
2994         case MVT::v4f64:
2995           RC = &PPC::QFRCRegClass;
2996           break;
2997         case MVT::v4i1:
2998           RC = &PPC::QBRCRegClass;
2999           break;
3000       }
3001 
3002       // Transform the arguments stored in physical registers into virtual ones.
3003       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3004       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3005                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
3006 
3007       if (ValVT == MVT::i1)
3008         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3009 
3010       InVals.push_back(ArgValue);
3011     } else {
3012       // Argument stored in memory.
3013       assert(VA.isMemLoc());
3014 
3015       unsigned ArgSize = VA.getLocVT().getStoreSize();
3016       int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
3017                                      isImmutable);
3018 
3019       // Create load nodes to retrieve arguments from the stack.
3020       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3021       InVals.push_back(
3022           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3023     }
3024   }
3025 
3026   // Assign locations to all of the incoming aggregate by value arguments.
3027   // Aggregates passed by value are stored in the local variable space of the
3028   // caller's stack frame, right above the parameter list area.
3029   SmallVector<CCValAssign, 16> ByValArgLocs;
3030   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3031                       ByValArgLocs, *DAG.getContext());
3032 
3033   // Reserve stack space for the allocations in CCInfo.
3034   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3035 
3036   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3037 
3038   // Area that is at least reserved in the caller of this function.
3039   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3040   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3041 
3042   // Set the size that is at least reserved in caller of this function.  Tail
3043   // call optimized function's reserved stack space needs to be aligned so that
3044   // taking the difference between two stack areas will result in an aligned
3045   // stack.
3046   MinReservedArea =
3047       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3048   FuncInfo->setMinReservedArea(MinReservedArea);
3049 
3050   SmallVector<SDValue, 8> MemOps;
3051 
3052   // If the function takes variable number of arguments, make a frame index for
3053   // the start of the first vararg value... for expansion of llvm.va_start.
3054   if (isVarArg) {
3055     static const MCPhysReg GPArgRegs[] = {
3056       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3057       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3058     };
3059     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3060 
3061     static const MCPhysReg FPArgRegs[] = {
3062       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3063       PPC::F8
3064     };
3065     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3066 
3067     if (useSoftFloat())
3068        NumFPArgRegs = 0;
3069 
3070     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3071     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3072 
3073     // Make room for NumGPArgRegs and NumFPArgRegs.
3074     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3075                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3076 
3077     FuncInfo->setVarArgsStackOffset(
3078       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3079                             CCInfo.getNextStackOffset(), true));
3080 
3081     FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3082     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3083 
3084     // The fixed integer arguments of a variadic function are stored to the
3085     // VarArgsFrameIndex on the stack so that they may be loaded by
3086     // dereferencing the result of va_next.
3087     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3088       // Get an existing live-in vreg, or add a new one.
3089       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3090       if (!VReg)
3091         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3092 
3093       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3094       SDValue Store =
3095           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3096       MemOps.push_back(Store);
3097       // Increment the address by four for the next argument to store
3098       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3099       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3100     }
3101 
3102     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3103     // is set.
3104     // The double arguments are stored to the VarArgsFrameIndex
3105     // on the stack.
3106     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3107       // Get an existing live-in vreg, or add a new one.
3108       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3109       if (!VReg)
3110         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3111 
3112       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3113       SDValue Store =
3114           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3115       MemOps.push_back(Store);
3116       // Increment the address by eight for the next argument to store
3117       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3118                                          PtrVT);
3119       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3120     }
3121   }
3122 
3123   if (!MemOps.empty())
3124     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3125 
3126   return Chain;
3127 }
3128 
3129 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3130 // value to MVT::i64 and then truncate to the correct register size.
3131 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3132                                              EVT ObjectVT, SelectionDAG &DAG,
3133                                              SDValue ArgVal,
3134                                              const SDLoc &dl) const {
3135   if (Flags.isSExt())
3136     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3137                          DAG.getValueType(ObjectVT));
3138   else if (Flags.isZExt())
3139     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3140                          DAG.getValueType(ObjectVT));
3141 
3142   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3143 }
3144 
3145 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3146     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3147     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3148     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3149   // TODO: add description of PPC stack frame format, or at least some docs.
3150   //
3151   bool isELFv2ABI = Subtarget.isELFv2ABI();
3152   bool isLittleEndian = Subtarget.isLittleEndian();
3153   MachineFunction &MF = DAG.getMachineFunction();
3154   MachineFrameInfo &MFI = MF.getFrameInfo();
3155   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3156 
3157   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3158          "fastcc not supported on varargs functions");
3159 
3160   EVT PtrVT = getPointerTy(MF.getDataLayout());
3161   // Potential tail calls could cause overwriting of argument stack slots.
3162   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3163                        (CallConv == CallingConv::Fast));
3164   unsigned PtrByteSize = 8;
3165   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3166 
3167   static const MCPhysReg GPR[] = {
3168     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3169     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3170   };
3171   static const MCPhysReg VR[] = {
3172     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3173     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3174   };
3175 
3176   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3177   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3178   const unsigned Num_VR_Regs  = array_lengthof(VR);
3179   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3180 
3181   // Do a first pass over the arguments to determine whether the ABI
3182   // guarantees that our caller has allocated the parameter save area
3183   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3184   // in the ELFv2 ABI, it is true if this is a vararg function or if
3185   // any parameter is located in a stack slot.
3186 
3187   bool HasParameterArea = !isELFv2ABI || isVarArg;
3188   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3189   unsigned NumBytes = LinkageSize;
3190   unsigned AvailableFPRs = Num_FPR_Regs;
3191   unsigned AvailableVRs = Num_VR_Regs;
3192   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3193     if (Ins[i].Flags.isNest())
3194       continue;
3195 
3196     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3197                                PtrByteSize, LinkageSize, ParamAreaSize,
3198                                NumBytes, AvailableFPRs, AvailableVRs,
3199                                Subtarget.hasQPX()))
3200       HasParameterArea = true;
3201   }
3202 
3203   // Add DAG nodes to load the arguments or copy them out of registers.  On
3204   // entry to a function on PPC, the arguments start after the linkage area,
3205   // although the first ones are often in registers.
3206 
3207   unsigned ArgOffset = LinkageSize;
3208   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3209   unsigned &QFPR_idx = FPR_idx;
3210   SmallVector<SDValue, 8> MemOps;
3211   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3212   unsigned CurArgIdx = 0;
3213   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3214     SDValue ArgVal;
3215     bool needsLoad = false;
3216     EVT ObjectVT = Ins[ArgNo].VT;
3217     EVT OrigVT = Ins[ArgNo].ArgVT;
3218     unsigned ObjSize = ObjectVT.getStoreSize();
3219     unsigned ArgSize = ObjSize;
3220     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3221     if (Ins[ArgNo].isOrigArg()) {
3222       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3223       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3224     }
3225     // We re-align the argument offset for each argument, except when using the
3226     // fast calling convention, when we need to make sure we do that only when
3227     // we'll actually use a stack slot.
3228     unsigned CurArgOffset, Align;
3229     auto ComputeArgOffset = [&]() {
3230       /* Respect alignment of argument on the stack.  */
3231       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3232       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3233       CurArgOffset = ArgOffset;
3234     };
3235 
3236     if (CallConv != CallingConv::Fast) {
3237       ComputeArgOffset();
3238 
3239       /* Compute GPR index associated with argument offset.  */
3240       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3241       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3242     }
3243 
3244     // FIXME the codegen can be much improved in some cases.
3245     // We do not have to keep everything in memory.
3246     if (Flags.isByVal()) {
3247       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3248 
3249       if (CallConv == CallingConv::Fast)
3250         ComputeArgOffset();
3251 
3252       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3253       ObjSize = Flags.getByValSize();
3254       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3255       // Empty aggregate parameters do not take up registers.  Examples:
3256       //   struct { } a;
3257       //   union  { } b;
3258       //   int c[0];
3259       // etc.  However, we have to provide a place-holder in InVals, so
3260       // pretend we have an 8-byte item at the current address for that
3261       // purpose.
3262       if (!ObjSize) {
3263         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3264         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3265         InVals.push_back(FIN);
3266         continue;
3267       }
3268 
3269       // Create a stack object covering all stack doublewords occupied
3270       // by the argument.  If the argument is (fully or partially) on
3271       // the stack, or if the argument is fully in registers but the
3272       // caller has allocated the parameter save anyway, we can refer
3273       // directly to the caller's stack frame.  Otherwise, create a
3274       // local copy in our own frame.
3275       int FI;
3276       if (HasParameterArea ||
3277           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3278         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3279       else
3280         FI = MFI.CreateStackObject(ArgSize, Align, false);
3281       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3282 
3283       // Handle aggregates smaller than 8 bytes.
3284       if (ObjSize < PtrByteSize) {
3285         // The value of the object is its address, which differs from the
3286         // address of the enclosing doubleword on big-endian systems.
3287         SDValue Arg = FIN;
3288         if (!isLittleEndian) {
3289           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3290           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3291         }
3292         InVals.push_back(Arg);
3293 
3294         if (GPR_idx != Num_GPR_Regs) {
3295           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3296           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3297           SDValue Store;
3298 
3299           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3300             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3301                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
3302             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3303                                       MachinePointerInfo(&*FuncArg), ObjType);
3304           } else {
3305             // For sizes that don't fit a truncating store (3, 5, 6, 7),
3306             // store the whole register as-is to the parameter save area
3307             // slot.
3308             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3309                                  MachinePointerInfo(&*FuncArg));
3310           }
3311 
3312           MemOps.push_back(Store);
3313         }
3314         // Whether we copied from a register or not, advance the offset
3315         // into the parameter save area by a full doubleword.
3316         ArgOffset += PtrByteSize;
3317         continue;
3318       }
3319 
3320       // The value of the object is its address, which is the address of
3321       // its first stack doubleword.
3322       InVals.push_back(FIN);
3323 
3324       // Store whatever pieces of the object are in registers to memory.
3325       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3326         if (GPR_idx == Num_GPR_Regs)
3327           break;
3328 
3329         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3330         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3331         SDValue Addr = FIN;
3332         if (j) {
3333           SDValue Off = DAG.getConstant(j, dl, PtrVT);
3334           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3335         }
3336         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3337                                      MachinePointerInfo(&*FuncArg, j));
3338         MemOps.push_back(Store);
3339         ++GPR_idx;
3340       }
3341       ArgOffset += ArgSize;
3342       continue;
3343     }
3344 
3345     switch (ObjectVT.getSimpleVT().SimpleTy) {
3346     default: llvm_unreachable("Unhandled argument type!");
3347     case MVT::i1:
3348     case MVT::i32:
3349     case MVT::i64:
3350       if (Flags.isNest()) {
3351         // The 'nest' parameter, if any, is passed in R11.
3352         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3353         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3354 
3355         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3356           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3357 
3358         break;
3359       }
3360 
3361       // These can be scalar arguments or elements of an integer array type
3362       // passed directly.  Clang may use those instead of "byval" aggregate
3363       // types to avoid forcing arguments to memory unnecessarily.
3364       if (GPR_idx != Num_GPR_Regs) {
3365         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3366         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3367 
3368         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3369           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3370           // value to MVT::i64 and then truncate to the correct register size.
3371           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3372       } else {
3373         if (CallConv == CallingConv::Fast)
3374           ComputeArgOffset();
3375 
3376         needsLoad = true;
3377         ArgSize = PtrByteSize;
3378       }
3379       if (CallConv != CallingConv::Fast || needsLoad)
3380         ArgOffset += 8;
3381       break;
3382 
3383     case MVT::f32:
3384     case MVT::f64:
3385       // These can be scalar arguments or elements of a float array type
3386       // passed directly.  The latter are used to implement ELFv2 homogenous
3387       // float aggregates.
3388       if (FPR_idx != Num_FPR_Regs) {
3389         unsigned VReg;
3390 
3391         if (ObjectVT == MVT::f32)
3392           VReg = MF.addLiveIn(FPR[FPR_idx],
3393                               Subtarget.hasP8Vector()
3394                                   ? &PPC::VSSRCRegClass
3395                                   : &PPC::F4RCRegClass);
3396         else
3397           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3398                                                 ? &PPC::VSFRCRegClass
3399                                                 : &PPC::F8RCRegClass);
3400 
3401         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3402         ++FPR_idx;
3403       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3404         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3405         // once we support fp <-> gpr moves.
3406 
3407         // This can only ever happen in the presence of f32 array types,
3408         // since otherwise we never run out of FPRs before running out
3409         // of GPRs.
3410         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3411         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3412 
3413         if (ObjectVT == MVT::f32) {
3414           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3415             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3416                                  DAG.getConstant(32, dl, MVT::i32));
3417           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3418         }
3419 
3420         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3421       } else {
3422         if (CallConv == CallingConv::Fast)
3423           ComputeArgOffset();
3424 
3425         needsLoad = true;
3426       }
3427 
3428       // When passing an array of floats, the array occupies consecutive
3429       // space in the argument area; only round up to the next doubleword
3430       // at the end of the array.  Otherwise, each float takes 8 bytes.
3431       if (CallConv != CallingConv::Fast || needsLoad) {
3432         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3433         ArgOffset += ArgSize;
3434         if (Flags.isInConsecutiveRegsLast())
3435           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3436       }
3437       break;
3438     case MVT::v4f32:
3439     case MVT::v4i32:
3440     case MVT::v8i16:
3441     case MVT::v16i8:
3442     case MVT::v2f64:
3443     case MVT::v2i64:
3444     case MVT::v1i128:
3445       if (!Subtarget.hasQPX()) {
3446       // These can be scalar arguments or elements of a vector array type
3447       // passed directly.  The latter are used to implement ELFv2 homogenous
3448       // vector aggregates.
3449       if (VR_idx != Num_VR_Regs) {
3450         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3451         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3452         ++VR_idx;
3453       } else {
3454         if (CallConv == CallingConv::Fast)
3455           ComputeArgOffset();
3456 
3457         needsLoad = true;
3458       }
3459       if (CallConv != CallingConv::Fast || needsLoad)
3460         ArgOffset += 16;
3461       break;
3462       } // not QPX
3463 
3464       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3465              "Invalid QPX parameter type");
3466       /* fall through */
3467 
3468     case MVT::v4f64:
3469     case MVT::v4i1:
3470       // QPX vectors are treated like their scalar floating-point subregisters
3471       // (except that they're larger).
3472       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3473       if (QFPR_idx != Num_QFPR_Regs) {
3474         const TargetRegisterClass *RC;
3475         switch (ObjectVT.getSimpleVT().SimpleTy) {
3476         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3477         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3478         default:         RC = &PPC::QBRCRegClass; break;
3479         }
3480 
3481         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3482         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3483         ++QFPR_idx;
3484       } else {
3485         if (CallConv == CallingConv::Fast)
3486           ComputeArgOffset();
3487         needsLoad = true;
3488       }
3489       if (CallConv != CallingConv::Fast || needsLoad)
3490         ArgOffset += Sz;
3491       break;
3492     }
3493 
3494     // We need to load the argument to a virtual register if we determined
3495     // above that we ran out of physical registers of the appropriate type.
3496     if (needsLoad) {
3497       if (ObjSize < ArgSize && !isLittleEndian)
3498         CurArgOffset += ArgSize - ObjSize;
3499       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3500       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3501       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3502     }
3503 
3504     InVals.push_back(ArgVal);
3505   }
3506 
3507   // Area that is at least reserved in the caller of this function.
3508   unsigned MinReservedArea;
3509   if (HasParameterArea)
3510     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3511   else
3512     MinReservedArea = LinkageSize;
3513 
3514   // Set the size that is at least reserved in caller of this function.  Tail
3515   // call optimized functions' reserved stack space needs to be aligned so that
3516   // taking the difference between two stack areas will result in an aligned
3517   // stack.
3518   MinReservedArea =
3519       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3520   FuncInfo->setMinReservedArea(MinReservedArea);
3521 
3522   // If the function takes variable number of arguments, make a frame index for
3523   // the start of the first vararg value... for expansion of llvm.va_start.
3524   if (isVarArg) {
3525     int Depth = ArgOffset;
3526 
3527     FuncInfo->setVarArgsFrameIndex(
3528       MFI.CreateFixedObject(PtrByteSize, Depth, true));
3529     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3530 
3531     // If this function is vararg, store any remaining integer argument regs
3532     // to their spots on the stack so that they may be loaded by dereferencing
3533     // the result of va_next.
3534     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3535          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3536       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3537       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3538       SDValue Store =
3539           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3540       MemOps.push_back(Store);
3541       // Increment the address by four for the next argument to store
3542       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3543       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3544     }
3545   }
3546 
3547   if (!MemOps.empty())
3548     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3549 
3550   return Chain;
3551 }
3552 
3553 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
3554     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3555     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3556     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3557   // TODO: add description of PPC stack frame format, or at least some docs.
3558   //
3559   MachineFunction &MF = DAG.getMachineFunction();
3560   MachineFrameInfo &MFI = MF.getFrameInfo();
3561   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3562 
3563   EVT PtrVT = getPointerTy(MF.getDataLayout());
3564   bool isPPC64 = PtrVT == MVT::i64;
3565   // Potential tail calls could cause overwriting of argument stack slots.
3566   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3567                        (CallConv == CallingConv::Fast));
3568   unsigned PtrByteSize = isPPC64 ? 8 : 4;
3569   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3570   unsigned ArgOffset = LinkageSize;
3571   // Area that is at least reserved in caller of this function.
3572   unsigned MinReservedArea = ArgOffset;
3573 
3574   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
3575     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3576     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3577   };
3578   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
3579     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3580     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3581   };
3582   static const MCPhysReg VR[] = {
3583     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3584     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3585   };
3586 
3587   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3588   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3589   const unsigned Num_VR_Regs  = array_lengthof( VR);
3590 
3591   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3592 
3593   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3594 
3595   // In 32-bit non-varargs functions, the stack space for vectors is after the
3596   // stack space for non-vectors.  We do not use this space unless we have
3597   // too many vectors to fit in registers, something that only occurs in
3598   // constructed examples:), but we have to walk the arglist to figure
3599   // that out...for the pathological case, compute VecArgOffset as the
3600   // start of the vector parameter area.  Computing VecArgOffset is the
3601   // entire point of the following loop.
3602   unsigned VecArgOffset = ArgOffset;
3603   if (!isVarArg && !isPPC64) {
3604     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3605          ++ArgNo) {
3606       EVT ObjectVT = Ins[ArgNo].VT;
3607       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3608 
3609       if (Flags.isByVal()) {
3610         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3611         unsigned ObjSize = Flags.getByValSize();
3612         unsigned ArgSize =
3613                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3614         VecArgOffset += ArgSize;
3615         continue;
3616       }
3617 
3618       switch(ObjectVT.getSimpleVT().SimpleTy) {
3619       default: llvm_unreachable("Unhandled argument type!");
3620       case MVT::i1:
3621       case MVT::i32:
3622       case MVT::f32:
3623         VecArgOffset += 4;
3624         break;
3625       case MVT::i64:  // PPC64
3626       case MVT::f64:
3627         // FIXME: We are guaranteed to be !isPPC64 at this point.
3628         // Does MVT::i64 apply?
3629         VecArgOffset += 8;
3630         break;
3631       case MVT::v4f32:
3632       case MVT::v4i32:
3633       case MVT::v8i16:
3634       case MVT::v16i8:
3635         // Nothing to do, we're only looking at Nonvector args here.
3636         break;
3637       }
3638     }
3639   }
3640   // We've found where the vector parameter area in memory is.  Skip the
3641   // first 12 parameters; these don't use that memory.
3642   VecArgOffset = ((VecArgOffset+15)/16)*16;
3643   VecArgOffset += 12*16;
3644 
3645   // Add DAG nodes to load the arguments or copy them out of registers.  On
3646   // entry to a function on PPC, the arguments start after the linkage area,
3647   // although the first ones are often in registers.
3648 
3649   SmallVector<SDValue, 8> MemOps;
3650   unsigned nAltivecParamsAtEnd = 0;
3651   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3652   unsigned CurArgIdx = 0;
3653   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3654     SDValue ArgVal;
3655     bool needsLoad = false;
3656     EVT ObjectVT = Ins[ArgNo].VT;
3657     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3658     unsigned ArgSize = ObjSize;
3659     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3660     if (Ins[ArgNo].isOrigArg()) {
3661       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3662       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3663     }
3664     unsigned CurArgOffset = ArgOffset;
3665 
3666     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3667     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3668         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3669       if (isVarArg || isPPC64) {
3670         MinReservedArea = ((MinReservedArea+15)/16)*16;
3671         MinReservedArea += CalculateStackSlotSize(ObjectVT,
3672                                                   Flags,
3673                                                   PtrByteSize);
3674       } else  nAltivecParamsAtEnd++;
3675     } else
3676       // Calculate min reserved area.
3677       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3678                                                 Flags,
3679                                                 PtrByteSize);
3680 
3681     // FIXME the codegen can be much improved in some cases.
3682     // We do not have to keep everything in memory.
3683     if (Flags.isByVal()) {
3684       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3685 
3686       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3687       ObjSize = Flags.getByValSize();
3688       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3689       // Objects of size 1 and 2 are right justified, everything else is
3690       // left justified.  This means the memory address is adjusted forwards.
3691       if (ObjSize==1 || ObjSize==2) {
3692         CurArgOffset = CurArgOffset + (4 - ObjSize);
3693       }
3694       // The value of the object is its address.
3695       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
3696       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3697       InVals.push_back(FIN);
3698       if (ObjSize==1 || ObjSize==2) {
3699         if (GPR_idx != Num_GPR_Regs) {
3700           unsigned VReg;
3701           if (isPPC64)
3702             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3703           else
3704             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3705           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3706           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
3707           SDValue Store =
3708               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
3709                                 MachinePointerInfo(&*FuncArg), ObjType);
3710           MemOps.push_back(Store);
3711           ++GPR_idx;
3712         }
3713 
3714         ArgOffset += PtrByteSize;
3715 
3716         continue;
3717       }
3718       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3719         // Store whatever pieces of the object are in registers
3720         // to memory.  ArgOffset will be the address of the beginning
3721         // of the object.
3722         if (GPR_idx != Num_GPR_Regs) {
3723           unsigned VReg;
3724           if (isPPC64)
3725             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3726           else
3727             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3728           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3729           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3730           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3731           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3732                                        MachinePointerInfo(&*FuncArg, j));
3733           MemOps.push_back(Store);
3734           ++GPR_idx;
3735           ArgOffset += PtrByteSize;
3736         } else {
3737           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
3738           break;
3739         }
3740       }
3741       continue;
3742     }
3743 
3744     switch (ObjectVT.getSimpleVT().SimpleTy) {
3745     default: llvm_unreachable("Unhandled argument type!");
3746     case MVT::i1:
3747     case MVT::i32:
3748       if (!isPPC64) {
3749         if (GPR_idx != Num_GPR_Regs) {
3750           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3751           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3752 
3753           if (ObjectVT == MVT::i1)
3754             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
3755 
3756           ++GPR_idx;
3757         } else {
3758           needsLoad = true;
3759           ArgSize = PtrByteSize;
3760         }
3761         // All int arguments reserve stack space in the Darwin ABI.
3762         ArgOffset += PtrByteSize;
3763         break;
3764       }
3765       LLVM_FALLTHROUGH;
3766     case MVT::i64:  // PPC64
3767       if (GPR_idx != Num_GPR_Regs) {
3768         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3769         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3770 
3771         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3772           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3773           // value to MVT::i64 and then truncate to the correct register size.
3774           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3775 
3776         ++GPR_idx;
3777       } else {
3778         needsLoad = true;
3779         ArgSize = PtrByteSize;
3780       }
3781       // All int arguments reserve stack space in the Darwin ABI.
3782       ArgOffset += 8;
3783       break;
3784 
3785     case MVT::f32:
3786     case MVT::f64:
3787       // Every 4 bytes of argument space consumes one of the GPRs available for
3788       // argument passing.
3789       if (GPR_idx != Num_GPR_Regs) {
3790         ++GPR_idx;
3791         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
3792           ++GPR_idx;
3793       }
3794       if (FPR_idx != Num_FPR_Regs) {
3795         unsigned VReg;
3796 
3797         if (ObjectVT == MVT::f32)
3798           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
3799         else
3800           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
3801 
3802         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3803         ++FPR_idx;
3804       } else {
3805         needsLoad = true;
3806       }
3807 
3808       // All FP arguments reserve stack space in the Darwin ABI.
3809       ArgOffset += isPPC64 ? 8 : ObjSize;
3810       break;
3811     case MVT::v4f32:
3812     case MVT::v4i32:
3813     case MVT::v8i16:
3814     case MVT::v16i8:
3815       // Note that vector arguments in registers don't reserve stack space,
3816       // except in varargs functions.
3817       if (VR_idx != Num_VR_Regs) {
3818         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3819         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3820         if (isVarArg) {
3821           while ((ArgOffset % 16) != 0) {
3822             ArgOffset += PtrByteSize;
3823             if (GPR_idx != Num_GPR_Regs)
3824               GPR_idx++;
3825           }
3826           ArgOffset += 16;
3827           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
3828         }
3829         ++VR_idx;
3830       } else {
3831         if (!isVarArg && !isPPC64) {
3832           // Vectors go after all the nonvectors.
3833           CurArgOffset = VecArgOffset;
3834           VecArgOffset += 16;
3835         } else {
3836           // Vectors are aligned.
3837           ArgOffset = ((ArgOffset+15)/16)*16;
3838           CurArgOffset = ArgOffset;
3839           ArgOffset += 16;
3840         }
3841         needsLoad = true;
3842       }
3843       break;
3844     }
3845 
3846     // We need to load the argument to a virtual register if we determined above
3847     // that we ran out of physical registers of the appropriate type.
3848     if (needsLoad) {
3849       int FI = MFI.CreateFixedObject(ObjSize,
3850                                      CurArgOffset + (ArgSize - ObjSize),
3851                                      isImmutable);
3852       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3853       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3854     }
3855 
3856     InVals.push_back(ArgVal);
3857   }
3858 
3859   // Allow for Altivec parameters at the end, if needed.
3860   if (nAltivecParamsAtEnd) {
3861     MinReservedArea = ((MinReservedArea+15)/16)*16;
3862     MinReservedArea += 16*nAltivecParamsAtEnd;
3863   }
3864 
3865   // Area that is at least reserved in the caller of this function.
3866   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
3867 
3868   // Set the size that is at least reserved in caller of this function.  Tail
3869   // call optimized functions' reserved stack space needs to be aligned so that
3870   // taking the difference between two stack areas will result in an aligned
3871   // stack.
3872   MinReservedArea =
3873       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3874   FuncInfo->setMinReservedArea(MinReservedArea);
3875 
3876   // If the function takes variable number of arguments, make a frame index for
3877   // the start of the first vararg value... for expansion of llvm.va_start.
3878   if (isVarArg) {
3879     int Depth = ArgOffset;
3880 
3881     FuncInfo->setVarArgsFrameIndex(
3882       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3883                             Depth, true));
3884     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3885 
3886     // If this function is vararg, store any remaining integer argument regs
3887     // to their spots on the stack so that they may be loaded by dereferencing
3888     // the result of va_next.
3889     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
3890       unsigned VReg;
3891 
3892       if (isPPC64)
3893         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3894       else
3895         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3896 
3897       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3898       SDValue Store =
3899           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3900       MemOps.push_back(Store);
3901       // Increment the address by four for the next argument to store
3902       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3903       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3904     }
3905   }
3906 
3907   if (!MemOps.empty())
3908     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3909 
3910   return Chain;
3911 }
3912 
3913 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
3914 /// adjusted to accommodate the arguments for the tailcall.
3915 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
3916                                    unsigned ParamSize) {
3917 
3918   if (!isTailCall) return 0;
3919 
3920   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3921   unsigned CallerMinReservedArea = FI->getMinReservedArea();
3922   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
3923   // Remember only if the new adjustement is bigger.
3924   if (SPDiff < FI->getTailCallSPDelta())
3925     FI->setTailCallSPDelta(SPDiff);
3926 
3927   return SPDiff;
3928 }
3929 
3930 static bool isFunctionGlobalAddress(SDValue Callee);
3931 
3932 static bool
3933 resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
3934   // If !G, Callee can be an external symbol.
3935   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3936   if (!G) return false;
3937 
3938   const GlobalValue *GV = G->getGlobal();
3939 
3940   if (GV->isDeclaration()) return false;
3941 
3942   switch(GV->getLinkage()) {
3943   default: llvm_unreachable("unknow linkage type");
3944   case GlobalValue::AvailableExternallyLinkage:
3945   case GlobalValue::ExternalWeakLinkage:
3946     return false;
3947 
3948   // Callee with weak linkage is allowed if it has hidden or protected
3949   // visibility
3950   case GlobalValue::LinkOnceAnyLinkage:
3951   case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
3952   case GlobalValue::WeakAnyLinkage:
3953   case GlobalValue::WeakODRLinkage:     // e.g. c++ template instantiation
3954     if (GV->hasDefaultVisibility())
3955       return false;
3956 
3957   case GlobalValue::ExternalLinkage:
3958   case GlobalValue::InternalLinkage:
3959   case GlobalValue::PrivateLinkage:
3960     break;
3961   }
3962 
3963   // With '-fPIC', calling default visiblity function need insert 'nop' after
3964   // function call, no matter that function resides in same module or not, so
3965   // we treat it as in different module.
3966   if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
3967     return false;
3968 
3969   return true;
3970 }
3971 
3972 static bool
3973 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
3974                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
3975   assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
3976 
3977   const unsigned PtrByteSize = 8;
3978   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3979 
3980   static const MCPhysReg GPR[] = {
3981     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3982     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3983   };
3984   static const MCPhysReg VR[] = {
3985     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3986     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3987   };
3988 
3989   const unsigned NumGPRs = array_lengthof(GPR);
3990   const unsigned NumFPRs = 13;
3991   const unsigned NumVRs = array_lengthof(VR);
3992   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
3993 
3994   unsigned NumBytes = LinkageSize;
3995   unsigned AvailableFPRs = NumFPRs;
3996   unsigned AvailableVRs = NumVRs;
3997 
3998   for (const ISD::OutputArg& Param : Outs) {
3999     if (Param.Flags.isNest()) continue;
4000 
4001     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4002                                PtrByteSize, LinkageSize, ParamAreaSize,
4003                                NumBytes, AvailableFPRs, AvailableVRs,
4004                                Subtarget.hasQPX()))
4005       return true;
4006   }
4007   return false;
4008 }
4009 
4010 static bool
4011 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
4012   if (CS->arg_size() != CallerFn->getArgumentList().size())
4013     return false;
4014 
4015   ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
4016   ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end();
4017   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4018 
4019   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4020     const Value* CalleeArg = *CalleeArgIter;
4021     const Value* CallerArg = &(*CallerArgIter);
4022     if (CalleeArg == CallerArg)
4023       continue;
4024 
4025     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4026     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4027     //      }
4028     // 1st argument of callee is undef and has the same type as caller.
4029     if (CalleeArg->getType() == CallerArg->getType() &&
4030         isa<UndefValue>(CalleeArg))
4031       continue;
4032 
4033     return false;
4034   }
4035 
4036   return true;
4037 }
4038 
4039 bool
4040 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4041                                     SDValue Callee,
4042                                     CallingConv::ID CalleeCC,
4043                                     ImmutableCallSite *CS,
4044                                     bool isVarArg,
4045                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4046                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4047                                     SelectionDAG& DAG) const {
4048   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4049 
4050   if (DisableSCO && !TailCallOpt) return false;
4051 
4052   // Variadic argument functions are not supported.
4053   if (isVarArg) return false;
4054 
4055   MachineFunction &MF = DAG.getMachineFunction();
4056   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4057 
4058   // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
4059   // the same calling convention
4060   if (CallerCC != CalleeCC) return false;
4061 
4062   // SCO support C calling convention
4063   if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
4064     return false;
4065 
4066   // Caller contains any byval parameter is not supported.
4067   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4068     return false;
4069 
4070   // Callee contains any byval parameter is not supported, too.
4071   // Note: This is a quick work around, because in some cases, e.g.
4072   // caller's stack size > callee's stack size, we are still able to apply
4073   // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
4074   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4075     return false;
4076 
4077   // No TCO/SCO on indirect call because Caller have to restore its TOC
4078   if (!isFunctionGlobalAddress(Callee) &&
4079       !isa<ExternalSymbolSDNode>(Callee))
4080     return false;
4081 
4082   // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
4083   // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
4084   // module.
4085   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4086   if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
4087     return false;
4088 
4089   // TCO allows altering callee ABI, so we don't have to check further.
4090   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4091     return true;
4092 
4093   if (DisableSCO) return false;
4094 
4095   // If callee use the same argument list that caller is using, then we can
4096   // apply SCO on this case. If it is not, then we need to check if callee needs
4097   // stack for passing arguments.
4098   if (!hasSameArgumentList(MF.getFunction(), CS) &&
4099       needStackSlotPassParameters(Subtarget, Outs)) {
4100     return false;
4101   }
4102 
4103   return true;
4104 }
4105 
4106 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4107 /// for tail call optimization. Targets which want to do tail call
4108 /// optimization should implement this function.
4109 bool
4110 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4111                                                      CallingConv::ID CalleeCC,
4112                                                      bool isVarArg,
4113                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4114                                                      SelectionDAG& DAG) const {
4115   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4116     return false;
4117 
4118   // Variable argument functions are not supported.
4119   if (isVarArg)
4120     return false;
4121 
4122   MachineFunction &MF = DAG.getMachineFunction();
4123   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4124   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4125     // Functions containing by val parameters are not supported.
4126     for (unsigned i = 0; i != Ins.size(); i++) {
4127        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4128        if (Flags.isByVal()) return false;
4129     }
4130 
4131     // Non-PIC/GOT tail calls are supported.
4132     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4133       return true;
4134 
4135     // At the moment we can only do local tail calls (in same module, hidden
4136     // or protected) if we are generating PIC.
4137     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4138       return G->getGlobal()->hasHiddenVisibility()
4139           || G->getGlobal()->hasProtectedVisibility();
4140   }
4141 
4142   return false;
4143 }
4144 
4145 /// isCallCompatibleAddress - Return the immediate to use if the specified
4146 /// 32-bit value is representable in the immediate field of a BxA instruction.
4147 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4148   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4149   if (!C) return nullptr;
4150 
4151   int Addr = C->getZExtValue();
4152   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4153       SignExtend32<26>(Addr) != Addr)
4154     return nullptr;  // Top 6 bits have to be sext of immediate.
4155 
4156   return DAG
4157       .getConstant(
4158           (int)C->getZExtValue() >> 2, SDLoc(Op),
4159           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4160       .getNode();
4161 }
4162 
4163 namespace {
4164 
4165 struct TailCallArgumentInfo {
4166   SDValue Arg;
4167   SDValue FrameIdxOp;
4168   int       FrameIdx;
4169 
4170   TailCallArgumentInfo() : FrameIdx(0) {}
4171 };
4172 }
4173 
4174 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4175 static void StoreTailCallArgumentsToStackSlot(
4176     SelectionDAG &DAG, SDValue Chain,
4177     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4178     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4179   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4180     SDValue Arg = TailCallArgs[i].Arg;
4181     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4182     int FI = TailCallArgs[i].FrameIdx;
4183     // Store relative to framepointer.
4184     MemOpChains.push_back(DAG.getStore(
4185         Chain, dl, Arg, FIN,
4186         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4187   }
4188 }
4189 
4190 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4191 /// the appropriate stack slot for the tail call optimized function call.
4192 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4193                                              SDValue OldRetAddr, SDValue OldFP,
4194                                              int SPDiff, const SDLoc &dl) {
4195   if (SPDiff) {
4196     // Calculate the new stack slot for the return address.
4197     MachineFunction &MF = DAG.getMachineFunction();
4198     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4199     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4200     bool isPPC64 = Subtarget.isPPC64();
4201     int SlotSize = isPPC64 ? 8 : 4;
4202     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4203     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4204                                                          NewRetAddrLoc, true);
4205     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4206     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4207     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4208                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4209 
4210     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4211     // slot as the FP is never overwritten.
4212     if (Subtarget.isDarwinABI()) {
4213       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4214       int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4215                                                          true);
4216       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4217       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4218                            MachinePointerInfo::getFixedStack(
4219                                DAG.getMachineFunction(), NewFPIdx));
4220     }
4221   }
4222   return Chain;
4223 }
4224 
4225 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4226 /// the position of the argument.
4227 static void
4228 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4229                          SDValue Arg, int SPDiff, unsigned ArgOffset,
4230                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4231   int Offset = ArgOffset + SPDiff;
4232   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4233   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4234   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4235   SDValue FIN = DAG.getFrameIndex(FI, VT);
4236   TailCallArgumentInfo Info;
4237   Info.Arg = Arg;
4238   Info.FrameIdxOp = FIN;
4239   Info.FrameIdx = FI;
4240   TailCallArguments.push_back(Info);
4241 }
4242 
4243 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4244 /// stack slot. Returns the chain as result and the loaded frame pointers in
4245 /// LROpOut/FPOpout. Used when tail calling.
4246 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4247     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4248     SDValue &FPOpOut, const SDLoc &dl) const {
4249   if (SPDiff) {
4250     // Load the LR and FP stack slot for later adjusting.
4251     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4252     LROpOut = getReturnAddrFrameIndex(DAG);
4253     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4254     Chain = SDValue(LROpOut.getNode(), 1);
4255 
4256     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4257     // slot as the FP is never overwritten.
4258     if (Subtarget.isDarwinABI()) {
4259       FPOpOut = getFramePointerFrameIndex(DAG);
4260       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4261       Chain = SDValue(FPOpOut.getNode(), 1);
4262     }
4263   }
4264   return Chain;
4265 }
4266 
4267 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4268 /// by "Src" to address "Dst" of size "Size".  Alignment information is
4269 /// specified by the specific parameter attribute. The copy will be passed as
4270 /// a byval function parameter.
4271 /// Sometimes what we are copying is the end of a larger object, the part that
4272 /// does not fit in registers.
4273 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4274                                          SDValue Chain, ISD::ArgFlagsTy Flags,
4275                                          SelectionDAG &DAG, const SDLoc &dl) {
4276   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4277   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4278                        false, false, false, MachinePointerInfo(),
4279                        MachinePointerInfo());
4280 }
4281 
4282 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4283 /// tail calls.
4284 static void LowerMemOpCallTo(
4285     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4286     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4287     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4288     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4289   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4290   if (!isTailCall) {
4291     if (isVector) {
4292       SDValue StackPtr;
4293       if (isPPC64)
4294         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4295       else
4296         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4297       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4298                            DAG.getConstant(ArgOffset, dl, PtrVT));
4299     }
4300     MemOpChains.push_back(
4301         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4302     // Calculate and remember argument location.
4303   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4304                                   TailCallArguments);
4305 }
4306 
4307 static void
4308 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4309                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4310                 SDValue FPOp,
4311                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4312   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4313   // might overwrite each other in case of tail call optimization.
4314   SmallVector<SDValue, 8> MemOpChains2;
4315   // Do not flag preceding copytoreg stuff together with the following stuff.
4316   InFlag = SDValue();
4317   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4318                                     MemOpChains2, dl);
4319   if (!MemOpChains2.empty())
4320     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4321 
4322   // Store the return address to the appropriate stack slot.
4323   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4324 
4325   // Emit callseq_end just before tailcall node.
4326   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4327                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4328   InFlag = Chain.getValue(1);
4329 }
4330 
4331 // Is this global address that of a function that can be called by name? (as
4332 // opposed to something that must hold a descriptor for an indirect call).
4333 static bool isFunctionGlobalAddress(SDValue Callee) {
4334   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4335     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4336         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4337       return false;
4338 
4339     return G->getGlobal()->getValueType()->isFunctionTy();
4340   }
4341 
4342   return false;
4343 }
4344 
4345 static unsigned
4346 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4347             SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4348             bool isPatchPoint, bool hasNest,
4349             SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4350             SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4351             ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
4352 
4353   bool isPPC64 = Subtarget.isPPC64();
4354   bool isSVR4ABI = Subtarget.isSVR4ABI();
4355   bool isELFv2ABI = Subtarget.isELFv2ABI();
4356 
4357   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4358   NodeTys.push_back(MVT::Other);   // Returns a chain
4359   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4360 
4361   unsigned CallOpc = PPCISD::CALL;
4362 
4363   bool needIndirectCall = true;
4364   if (!isSVR4ABI || !isPPC64)
4365     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4366       // If this is an absolute destination address, use the munged value.
4367       Callee = SDValue(Dest, 0);
4368       needIndirectCall = false;
4369     }
4370 
4371   // PC-relative references to external symbols should go through $stub, unless
4372   // we're building with the leopard linker or later, which automatically
4373   // synthesizes these stubs.
4374   const TargetMachine &TM = DAG.getTarget();
4375   const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
4376   const GlobalValue *GV = nullptr;
4377   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4378     GV = G->getGlobal();
4379   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4380   bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4381 
4382   if (isFunctionGlobalAddress(Callee)) {
4383     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4384     // A call to a TLS address is actually an indirect call to a
4385     // thread-specific pointer.
4386     unsigned OpFlags = 0;
4387     if (UsePlt)
4388       OpFlags = PPCII::MO_PLT;
4389 
4390     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4391     // every direct call is) turn it into a TargetGlobalAddress /
4392     // TargetExternalSymbol node so that legalize doesn't hack it.
4393     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4394                                         Callee.getValueType(), 0, OpFlags);
4395     needIndirectCall = false;
4396   }
4397 
4398   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4399     unsigned char OpFlags = 0;
4400 
4401     if (UsePlt)
4402       OpFlags = PPCII::MO_PLT;
4403 
4404     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4405                                          OpFlags);
4406     needIndirectCall = false;
4407   }
4408 
4409   if (isPatchPoint) {
4410     // We'll form an invalid direct call when lowering a patchpoint; the full
4411     // sequence for an indirect call is complicated, and many of the
4412     // instructions introduced might have side effects (and, thus, can't be
4413     // removed later). The call itself will be removed as soon as the
4414     // argument/return lowering is complete, so the fact that it has the wrong
4415     // kind of operands should not really matter.
4416     needIndirectCall = false;
4417   }
4418 
4419   if (needIndirectCall) {
4420     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4421     // to do the call, we can't use PPCISD::CALL.
4422     SDValue MTCTROps[] = {Chain, Callee, InFlag};
4423 
4424     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4425       // Function pointers in the 64-bit SVR4 ABI do not point to the function
4426       // entry point, but to the function descriptor (the function entry point
4427       // address is part of the function descriptor though).
4428       // The function descriptor is a three doubleword structure with the
4429       // following fields: function entry point, TOC base address and
4430       // environment pointer.
4431       // Thus for a call through a function pointer, the following actions need
4432       // to be performed:
4433       //   1. Save the TOC of the caller in the TOC save area of its stack
4434       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4435       //   2. Load the address of the function entry point from the function
4436       //      descriptor.
4437       //   3. Load the TOC of the callee from the function descriptor into r2.
4438       //   4. Load the environment pointer from the function descriptor into
4439       //      r11.
4440       //   5. Branch to the function entry point address.
4441       //   6. On return of the callee, the TOC of the caller needs to be
4442       //      restored (this is done in FinishCall()).
4443       //
4444       // The loads are scheduled at the beginning of the call sequence, and the
4445       // register copies are flagged together to ensure that no other
4446       // operations can be scheduled in between. E.g. without flagging the
4447       // copies together, a TOC access in the caller could be scheduled between
4448       // the assignment of the callee TOC and the branch to the callee, which
4449       // results in the TOC access going through the TOC of the callee instead
4450       // of going through the TOC of the caller, which leads to incorrect code.
4451 
4452       // Load the address of the function entry point from the function
4453       // descriptor.
4454       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4455       if (LDChain.getValueType() == MVT::Glue)
4456         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4457 
4458       auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
4459                           ? (MachineMemOperand::MODereferenceable |
4460                              MachineMemOperand::MOInvariant)
4461                           : MachineMemOperand::MONone;
4462 
4463       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4464       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4465                                         /* Alignment = */ 8, MMOFlags);
4466 
4467       // Load environment pointer into r11.
4468       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4469       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4470       SDValue LoadEnvPtr =
4471           DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
4472                       /* Alignment = */ 8, MMOFlags);
4473 
4474       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4475       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4476       SDValue TOCPtr =
4477           DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
4478                       /* Alignment = */ 8, MMOFlags);
4479 
4480       setUsesTOCBasePtr(DAG);
4481       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4482                                         InFlag);
4483       Chain = TOCVal.getValue(0);
4484       InFlag = TOCVal.getValue(1);
4485 
4486       // If the function call has an explicit 'nest' parameter, it takes the
4487       // place of the environment pointer.
4488       if (!hasNest) {
4489         SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4490                                           InFlag);
4491 
4492         Chain = EnvVal.getValue(0);
4493         InFlag = EnvVal.getValue(1);
4494       }
4495 
4496       MTCTROps[0] = Chain;
4497       MTCTROps[1] = LoadFuncPtr;
4498       MTCTROps[2] = InFlag;
4499     }
4500 
4501     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4502                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4503     InFlag = Chain.getValue(1);
4504 
4505     NodeTys.clear();
4506     NodeTys.push_back(MVT::Other);
4507     NodeTys.push_back(MVT::Glue);
4508     Ops.push_back(Chain);
4509     CallOpc = PPCISD::BCTRL;
4510     Callee.setNode(nullptr);
4511     // Add use of X11 (holding environment pointer)
4512     if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
4513       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4514     // Add CTR register as callee so a bctr can be emitted later.
4515     if (isTailCall)
4516       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4517   }
4518 
4519   // If this is a direct call, pass the chain and the callee.
4520   if (Callee.getNode()) {
4521     Ops.push_back(Chain);
4522     Ops.push_back(Callee);
4523   }
4524   // If this is a tail call add stack pointer delta.
4525   if (isTailCall)
4526     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4527 
4528   // Add argument registers to the end of the list so that they are known live
4529   // into the call.
4530   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4531     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4532                                   RegsToPass[i].second.getValueType()));
4533 
4534   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4535   // into the call.
4536   if (isSVR4ABI && isPPC64 && !isPatchPoint) {
4537     setUsesTOCBasePtr(DAG);
4538     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4539   }
4540 
4541   return CallOpc;
4542 }
4543 
4544 static
4545 bool isLocalCall(const SDValue &Callee)
4546 {
4547   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4548     return G->getGlobal()->isStrongDefinitionForLinker();
4549   return false;
4550 }
4551 
4552 SDValue PPCTargetLowering::LowerCallResult(
4553     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4554     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4555     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4556 
4557   SmallVector<CCValAssign, 16> RVLocs;
4558   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4559                     *DAG.getContext());
4560   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4561 
4562   // Copy all of the result registers out of their specified physreg.
4563   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4564     CCValAssign &VA = RVLocs[i];
4565     assert(VA.isRegLoc() && "Can only return in registers!");
4566 
4567     SDValue Val = DAG.getCopyFromReg(Chain, dl,
4568                                      VA.getLocReg(), VA.getLocVT(), InFlag);
4569     Chain = Val.getValue(1);
4570     InFlag = Val.getValue(2);
4571 
4572     switch (VA.getLocInfo()) {
4573     default: llvm_unreachable("Unknown loc info!");
4574     case CCValAssign::Full: break;
4575     case CCValAssign::AExt:
4576       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4577       break;
4578     case CCValAssign::ZExt:
4579       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4580                         DAG.getValueType(VA.getValVT()));
4581       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4582       break;
4583     case CCValAssign::SExt:
4584       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4585                         DAG.getValueType(VA.getValVT()));
4586       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4587       break;
4588     }
4589 
4590     InVals.push_back(Val);
4591   }
4592 
4593   return Chain;
4594 }
4595 
4596 SDValue PPCTargetLowering::FinishCall(
4597     CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
4598     bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
4599     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
4600     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
4601     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
4602     SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
4603 
4604   std::vector<EVT> NodeTys;
4605   SmallVector<SDValue, 8> Ops;
4606   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4607                                  SPDiff, isTailCall, isPatchPoint, hasNest,
4608                                  RegsToPass, Ops, NodeTys, CS, Subtarget);
4609 
4610   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4611   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4612     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4613 
4614   // When performing tail call optimization the callee pops its arguments off
4615   // the stack. Account for this here so these bytes can be pushed back on in
4616   // PPCFrameLowering::eliminateCallFramePseudoInstr.
4617   int BytesCalleePops =
4618     (CallConv == CallingConv::Fast &&
4619      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
4620 
4621   // Add a register mask operand representing the call-preserved registers.
4622   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4623   const uint32_t *Mask =
4624       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4625   assert(Mask && "Missing call preserved mask for calling convention");
4626   Ops.push_back(DAG.getRegisterMask(Mask));
4627 
4628   if (InFlag.getNode())
4629     Ops.push_back(InFlag);
4630 
4631   // Emit tail call.
4632   if (isTailCall) {
4633     assert(((Callee.getOpcode() == ISD::Register &&
4634              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4635             Callee.getOpcode() == ISD::TargetExternalSymbol ||
4636             Callee.getOpcode() == ISD::TargetGlobalAddress ||
4637             isa<ConstantSDNode>(Callee)) &&
4638     "Expecting an global address, external symbol, absolute value or register");
4639 
4640     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
4641     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4642   }
4643 
4644   // Add a NOP immediately after the branch instruction when using the 64-bit
4645   // SVR4 ABI. At link time, if caller and callee are in a different module and
4646   // thus have a different TOC, the call will be replaced with a call to a stub
4647   // function which saves the current TOC, loads the TOC of the callee and
4648   // branches to the callee. The NOP will be replaced with a load instruction
4649   // which restores the TOC of the caller from the TOC save slot of the current
4650   // stack frame. If caller and callee belong to the same module (and have the
4651   // same TOC), the NOP will remain unchanged.
4652 
4653   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4654       !isPatchPoint) {
4655     if (CallOpc == PPCISD::BCTRL) {
4656       // This is a call through a function pointer.
4657       // Restore the caller TOC from the save area into R2.
4658       // See PrepareCall() for more information about calls through function
4659       // pointers in the 64-bit SVR4 ABI.
4660       // We are using a target-specific load with r2 hard coded, because the
4661       // result of a target-independent load would never go directly into r2,
4662       // since r2 is a reserved register (which prevents the register allocator
4663       // from allocating it), resulting in an additional register being
4664       // allocated and an unnecessary move instruction being generated.
4665       CallOpc = PPCISD::BCTRL_LOAD_TOC;
4666 
4667       EVT PtrVT = getPointerTy(DAG.getDataLayout());
4668       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4669       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4670       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
4671       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4672 
4673       // The address needs to go after the chain input but before the flag (or
4674       // any other variadic arguments).
4675       Ops.insert(std::next(Ops.begin()), AddTOC);
4676     } else if ((CallOpc == PPCISD::CALL) &&
4677                (!isLocalCall(Callee) ||
4678                 DAG.getTarget().getRelocationModel() == Reloc::PIC_))
4679       // Otherwise insert NOP for non-local calls.
4680       CallOpc = PPCISD::CALL_NOP;
4681   }
4682 
4683   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
4684   InFlag = Chain.getValue(1);
4685 
4686   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4687                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
4688                              InFlag, dl);
4689   if (!Ins.empty())
4690     InFlag = Chain.getValue(1);
4691 
4692   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
4693                          Ins, dl, DAG, InVals);
4694 }
4695 
4696 SDValue
4697 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4698                              SmallVectorImpl<SDValue> &InVals) const {
4699   SelectionDAG &DAG                     = CLI.DAG;
4700   SDLoc &dl                             = CLI.DL;
4701   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4702   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
4703   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
4704   SDValue Chain                         = CLI.Chain;
4705   SDValue Callee                        = CLI.Callee;
4706   bool &isTailCall                      = CLI.IsTailCall;
4707   CallingConv::ID CallConv              = CLI.CallConv;
4708   bool isVarArg                         = CLI.IsVarArg;
4709   bool isPatchPoint                     = CLI.IsPatchPoint;
4710   ImmutableCallSite *CS                 = CLI.CS;
4711 
4712   if (isTailCall) {
4713     if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall()))
4714       isTailCall = false;
4715     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
4716       isTailCall =
4717         IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
4718                                                  isVarArg, Outs, Ins, DAG);
4719     else
4720       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
4721                                                      Ins, DAG);
4722     if (isTailCall) {
4723       ++NumTailCalls;
4724       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4725         ++NumSiblingCalls;
4726 
4727       assert(isa<GlobalAddressSDNode>(Callee) &&
4728              "Callee should be an llvm::Function object.");
4729       DEBUG(
4730         const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
4731         const unsigned Width = 80 - strlen("TCO caller: ")
4732                                   - strlen(", callee linkage: 0, 0");
4733         dbgs() << "TCO caller: "
4734                << left_justify(DAG.getMachineFunction().getName(), Width)
4735                << ", callee linkage: "
4736                << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
4737       );
4738     }
4739   }
4740 
4741   if (!isTailCall && CS && CS->isMustTailCall())
4742     report_fatal_error("failed to perform tail call elimination on a call "
4743                        "site marked musttail");
4744 
4745   // When long calls (i.e. indirect calls) are always used, calls are always
4746   // made via function pointer. If we have a function name, first translate it
4747   // into a pointer.
4748   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
4749       !isTailCall)
4750     Callee = LowerGlobalAddress(Callee, DAG);
4751 
4752   if (Subtarget.isSVR4ABI()) {
4753     if (Subtarget.isPPC64())
4754       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
4755                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
4756                               dl, DAG, InVals, CS);
4757     else
4758       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
4759                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
4760                               dl, DAG, InVals, CS);
4761   }
4762 
4763   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
4764                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
4765                           dl, DAG, InVals, CS);
4766 }
4767 
4768 SDValue PPCTargetLowering::LowerCall_32SVR4(
4769     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
4770     bool isTailCall, bool isPatchPoint,
4771     const SmallVectorImpl<ISD::OutputArg> &Outs,
4772     const SmallVectorImpl<SDValue> &OutVals,
4773     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4774     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
4775     ImmutableCallSite *CS) const {
4776   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
4777   // of the 32-bit SVR4 ABI stack frame layout.
4778 
4779   assert((CallConv == CallingConv::C ||
4780           CallConv == CallingConv::Fast) && "Unknown calling convention!");
4781 
4782   unsigned PtrByteSize = 4;
4783 
4784   MachineFunction &MF = DAG.getMachineFunction();
4785 
4786   // Mark this function as potentially containing a function that contains a
4787   // tail call. As a consequence the frame pointer will be used for dynamicalloc
4788   // and restoring the callers stack pointer in this functions epilog. This is
4789   // done because by tail calling the called function might overwrite the value
4790   // in this function's (MF) stack pointer stack slot 0(SP).
4791   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4792       CallConv == CallingConv::Fast)
4793     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4794 
4795   // Count how many bytes are to be pushed on the stack, including the linkage
4796   // area, parameter list area and the part of the local variable space which
4797   // contains copies of aggregates which are passed by value.
4798 
4799   // Assign locations to all of the outgoing arguments.
4800   SmallVector<CCValAssign, 16> ArgLocs;
4801   PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4802 
4803   // Reserve space for the linkage area on the stack.
4804   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
4805                        PtrByteSize);
4806   if (useSoftFloat())
4807     CCInfo.PreAnalyzeCallOperands(Outs);
4808 
4809   if (isVarArg) {
4810     // Handle fixed and variable vector arguments differently.
4811     // Fixed vector arguments go into registers as long as registers are
4812     // available. Variable vector arguments always go into memory.
4813     unsigned NumArgs = Outs.size();
4814 
4815     for (unsigned i = 0; i != NumArgs; ++i) {
4816       MVT ArgVT = Outs[i].VT;
4817       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4818       bool Result;
4819 
4820       if (Outs[i].IsFixed) {
4821         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
4822                                CCInfo);
4823       } else {
4824         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
4825                                       ArgFlags, CCInfo);
4826       }
4827 
4828       if (Result) {
4829 #ifndef NDEBUG
4830         errs() << "Call operand #" << i << " has unhandled type "
4831              << EVT(ArgVT).getEVTString() << "\n";
4832 #endif
4833         llvm_unreachable(nullptr);
4834       }
4835     }
4836   } else {
4837     // All arguments are treated the same.
4838     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
4839   }
4840   CCInfo.clearWasPPCF128();
4841 
4842   // Assign locations to all of the outgoing aggregate by value arguments.
4843   SmallVector<CCValAssign, 16> ByValArgLocs;
4844   CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
4845 
4846   // Reserve stack space for the allocations in CCInfo.
4847   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
4848 
4849   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
4850 
4851   // Size of the linkage area, parameter list area and the part of the local
4852   // space variable where copies of aggregates which are passed by value are
4853   // stored.
4854   unsigned NumBytes = CCByValInfo.getNextStackOffset();
4855 
4856   // Calculate by how many bytes the stack has to be adjusted in case of tail
4857   // call optimization.
4858   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4859 
4860   // Adjust the stack pointer for the new arguments...
4861   // These operations are automatically eliminated by the prolog/epilog pass
4862   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4863                                dl);
4864   SDValue CallSeqStart = Chain;
4865 
4866   // Load the return address and frame pointer so it can be moved somewhere else
4867   // later.
4868   SDValue LROp, FPOp;
4869   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
4870 
4871   // Set up a copy of the stack pointer for use loading and storing any
4872   // arguments that may not fit in the registers available for argument
4873   // passing.
4874   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4875 
4876   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4877   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4878   SmallVector<SDValue, 8> MemOpChains;
4879 
4880   bool seenFloatArg = false;
4881   // Walk the register/memloc assignments, inserting copies/loads.
4882   for (unsigned i = 0, j = 0, e = ArgLocs.size();
4883        i != e;
4884        ++i) {
4885     CCValAssign &VA = ArgLocs[i];
4886     SDValue Arg = OutVals[i];
4887     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4888 
4889     if (Flags.isByVal()) {
4890       // Argument is an aggregate which is passed by value, thus we need to
4891       // create a copy of it in the local variable space of the current stack
4892       // frame (which is the stack frame of the caller) and pass the address of
4893       // this copy to the callee.
4894       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
4895       CCValAssign &ByValVA = ByValArgLocs[j++];
4896       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
4897 
4898       // Memory reserved in the local variable space of the callers stack frame.
4899       unsigned LocMemOffset = ByValVA.getLocMemOffset();
4900 
4901       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4902       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4903                            StackPtr, PtrOff);
4904 
4905       // Create a copy of the argument in the local area of the current
4906       // stack frame.
4907       SDValue MemcpyCall =
4908         CreateCopyOfByValArgument(Arg, PtrOff,
4909                                   CallSeqStart.getNode()->getOperand(0),
4910                                   Flags, DAG, dl);
4911 
4912       // This must go outside the CALLSEQ_START..END.
4913       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4914                            CallSeqStart.getNode()->getOperand(1),
4915                            SDLoc(MemcpyCall));
4916       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4917                              NewCallSeqStart.getNode());
4918       Chain = CallSeqStart = NewCallSeqStart;
4919 
4920       // Pass the address of the aggregate copy on the stack either in a
4921       // physical register or in the parameter list area of the current stack
4922       // frame to the callee.
4923       Arg = PtrOff;
4924     }
4925 
4926     if (VA.isRegLoc()) {
4927       if (Arg.getValueType() == MVT::i1)
4928         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
4929 
4930       seenFloatArg |= VA.getLocVT().isFloatingPoint();
4931       // Put argument in a physical register.
4932       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4933     } else {
4934       // Put argument in the parameter list area of the current stack frame.
4935       assert(VA.isMemLoc());
4936       unsigned LocMemOffset = VA.getLocMemOffset();
4937 
4938       if (!isTailCall) {
4939         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4940         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4941                              StackPtr, PtrOff);
4942 
4943         MemOpChains.push_back(
4944             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4945       } else {
4946         // Calculate and remember argument location.
4947         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
4948                                  TailCallArguments);
4949       }
4950     }
4951   }
4952 
4953   if (!MemOpChains.empty())
4954     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4955 
4956   // Build a sequence of copy-to-reg nodes chained together with token chain
4957   // and flag operands which copy the outgoing args into the appropriate regs.
4958   SDValue InFlag;
4959   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4960     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4961                              RegsToPass[i].second, InFlag);
4962     InFlag = Chain.getValue(1);
4963   }
4964 
4965   // Set CR bit 6 to true if this is a vararg call with floating args passed in
4966   // registers.
4967   if (isVarArg) {
4968     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
4969     SDValue Ops[] = { Chain, InFlag };
4970 
4971     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
4972                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
4973 
4974     InFlag = Chain.getValue(1);
4975   }
4976 
4977   if (isTailCall)
4978     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
4979                     TailCallArguments);
4980 
4981   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
4982                     /* unused except on PPC64 ELFv1 */ false, DAG,
4983                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
4984                     NumBytes, Ins, InVals, CS);
4985 }
4986 
4987 // Copy an argument into memory, being careful to do this outside the
4988 // call sequence for the call to which the argument belongs.
4989 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
4990     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
4991     SelectionDAG &DAG, const SDLoc &dl) const {
4992   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
4993                         CallSeqStart.getNode()->getOperand(0),
4994                         Flags, DAG, dl);
4995   // The MEMCPY must go outside the CALLSEQ_START..END.
4996   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4997                              CallSeqStart.getNode()->getOperand(1),
4998                              SDLoc(MemcpyCall));
4999   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5000                          NewCallSeqStart.getNode());
5001   return NewCallSeqStart;
5002 }
5003 
5004 SDValue PPCTargetLowering::LowerCall_64SVR4(
5005     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5006     bool isTailCall, bool isPatchPoint,
5007     const SmallVectorImpl<ISD::OutputArg> &Outs,
5008     const SmallVectorImpl<SDValue> &OutVals,
5009     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5010     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5011     ImmutableCallSite *CS) const {
5012 
5013   bool isELFv2ABI = Subtarget.isELFv2ABI();
5014   bool isLittleEndian = Subtarget.isLittleEndian();
5015   unsigned NumOps = Outs.size();
5016   bool hasNest = false;
5017   bool IsSibCall = false;
5018 
5019   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5020   unsigned PtrByteSize = 8;
5021 
5022   MachineFunction &MF = DAG.getMachineFunction();
5023 
5024   if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5025     IsSibCall = true;
5026 
5027   // Mark this function as potentially containing a function that contains a
5028   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5029   // and restoring the callers stack pointer in this functions epilog. This is
5030   // done because by tail calling the called function might overwrite the value
5031   // in this function's (MF) stack pointer stack slot 0(SP).
5032   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5033       CallConv == CallingConv::Fast)
5034     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5035 
5036   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5037          "fastcc not supported on varargs functions");
5038 
5039   // Count how many bytes are to be pushed on the stack, including the linkage
5040   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5041   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5042   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5043   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5044   unsigned NumBytes = LinkageSize;
5045   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5046   unsigned &QFPR_idx = FPR_idx;
5047 
5048   static const MCPhysReg GPR[] = {
5049     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5050     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5051   };
5052   static const MCPhysReg VR[] = {
5053     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5054     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5055   };
5056 
5057   const unsigned NumGPRs = array_lengthof(GPR);
5058   const unsigned NumFPRs = 13;
5059   const unsigned NumVRs  = array_lengthof(VR);
5060   const unsigned NumQFPRs = NumFPRs;
5061 
5062   // When using the fast calling convention, we don't provide backing for
5063   // arguments that will be in registers.
5064   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5065 
5066   // Add up all the space actually used.
5067   for (unsigned i = 0; i != NumOps; ++i) {
5068     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5069     EVT ArgVT = Outs[i].VT;
5070     EVT OrigVT = Outs[i].ArgVT;
5071 
5072     if (Flags.isNest())
5073       continue;
5074 
5075     if (CallConv == CallingConv::Fast) {
5076       if (Flags.isByVal())
5077         NumGPRsUsed += (Flags.getByValSize()+7)/8;
5078       else
5079         switch (ArgVT.getSimpleVT().SimpleTy) {
5080         default: llvm_unreachable("Unexpected ValueType for argument!");
5081         case MVT::i1:
5082         case MVT::i32:
5083         case MVT::i64:
5084           if (++NumGPRsUsed <= NumGPRs)
5085             continue;
5086           break;
5087         case MVT::v4i32:
5088         case MVT::v8i16:
5089         case MVT::v16i8:
5090         case MVT::v2f64:
5091         case MVT::v2i64:
5092         case MVT::v1i128:
5093           if (++NumVRsUsed <= NumVRs)
5094             continue;
5095           break;
5096         case MVT::v4f32:
5097           // When using QPX, this is handled like a FP register, otherwise, it
5098           // is an Altivec register.
5099           if (Subtarget.hasQPX()) {
5100             if (++NumFPRsUsed <= NumFPRs)
5101               continue;
5102           } else {
5103             if (++NumVRsUsed <= NumVRs)
5104               continue;
5105           }
5106           break;
5107         case MVT::f32:
5108         case MVT::f64:
5109         case MVT::v4f64: // QPX
5110         case MVT::v4i1:  // QPX
5111           if (++NumFPRsUsed <= NumFPRs)
5112             continue;
5113           break;
5114         }
5115     }
5116 
5117     /* Respect alignment of argument on the stack.  */
5118     unsigned Align =
5119       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5120     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5121 
5122     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5123     if (Flags.isInConsecutiveRegsLast())
5124       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5125   }
5126 
5127   unsigned NumBytesActuallyUsed = NumBytes;
5128 
5129   // The prolog code of the callee may store up to 8 GPR argument registers to
5130   // the stack, allowing va_start to index over them in memory if its varargs.
5131   // Because we cannot tell if this is needed on the caller side, we have to
5132   // conservatively assume that it is needed.  As such, make sure we have at
5133   // least enough stack space for the caller to store the 8 GPRs.
5134   // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
5135   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5136 
5137   // Tail call needs the stack to be aligned.
5138   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5139       CallConv == CallingConv::Fast)
5140     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5141 
5142   int SPDiff = 0;
5143 
5144   // Calculate by how many bytes the stack has to be adjusted in case of tail
5145   // call optimization.
5146   if (!IsSibCall)
5147     SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5148 
5149   // To protect arguments on the stack from being clobbered in a tail call,
5150   // force all the loads to happen before doing any other lowering.
5151   if (isTailCall)
5152     Chain = DAG.getStackArgumentTokenFactor(Chain);
5153 
5154   // Adjust the stack pointer for the new arguments...
5155   // These operations are automatically eliminated by the prolog/epilog pass
5156   if (!IsSibCall)
5157     Chain = DAG.getCALLSEQ_START(Chain,
5158                                  DAG.getIntPtrConstant(NumBytes, dl, true), dl);
5159   SDValue CallSeqStart = Chain;
5160 
5161   // Load the return address and frame pointer so it can be move somewhere else
5162   // later.
5163   SDValue LROp, FPOp;
5164   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5165 
5166   // Set up a copy of the stack pointer for use loading and storing any
5167   // arguments that may not fit in the registers available for argument
5168   // passing.
5169   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5170 
5171   // Figure out which arguments are going to go in registers, and which in
5172   // memory.  Also, if this is a vararg function, floating point operations
5173   // must be stored to our stack, and loaded into integer regs as well, if
5174   // any integer regs are available for argument passing.
5175   unsigned ArgOffset = LinkageSize;
5176 
5177   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5178   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5179 
5180   SmallVector<SDValue, 8> MemOpChains;
5181   for (unsigned i = 0; i != NumOps; ++i) {
5182     SDValue Arg = OutVals[i];
5183     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5184     EVT ArgVT = Outs[i].VT;
5185     EVT OrigVT = Outs[i].ArgVT;
5186 
5187     // PtrOff will be used to store the current argument to the stack if a
5188     // register cannot be found for it.
5189     SDValue PtrOff;
5190 
5191     // We re-align the argument offset for each argument, except when using the
5192     // fast calling convention, when we need to make sure we do that only when
5193     // we'll actually use a stack slot.
5194     auto ComputePtrOff = [&]() {
5195       /* Respect alignment of argument on the stack.  */
5196       unsigned Align =
5197         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5198       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5199 
5200       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5201 
5202       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5203     };
5204 
5205     if (CallConv != CallingConv::Fast) {
5206       ComputePtrOff();
5207 
5208       /* Compute GPR index associated with argument offset.  */
5209       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5210       GPR_idx = std::min(GPR_idx, NumGPRs);
5211     }
5212 
5213     // Promote integers to 64-bit values.
5214     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5215       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5216       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5217       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5218     }
5219 
5220     // FIXME memcpy is used way more than necessary.  Correctness first.
5221     // Note: "by value" is code for passing a structure by value, not
5222     // basic types.
5223     if (Flags.isByVal()) {
5224       // Note: Size includes alignment padding, so
5225       //   struct x { short a; char b; }
5226       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5227       // These are the proper values we need for right-justifying the
5228       // aggregate in a parameter register.
5229       unsigned Size = Flags.getByValSize();
5230 
5231       // An empty aggregate parameter takes up no storage and no
5232       // registers.
5233       if (Size == 0)
5234         continue;
5235 
5236       if (CallConv == CallingConv::Fast)
5237         ComputePtrOff();
5238 
5239       // All aggregates smaller than 8 bytes must be passed right-justified.
5240       if (Size==1 || Size==2 || Size==4) {
5241         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5242         if (GPR_idx != NumGPRs) {
5243           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5244                                         MachinePointerInfo(), VT);
5245           MemOpChains.push_back(Load.getValue(1));
5246           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5247 
5248           ArgOffset += PtrByteSize;
5249           continue;
5250         }
5251       }
5252 
5253       if (GPR_idx == NumGPRs && Size < 8) {
5254         SDValue AddPtr = PtrOff;
5255         if (!isLittleEndian) {
5256           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5257                                           PtrOff.getValueType());
5258           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5259         }
5260         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5261                                                           CallSeqStart,
5262                                                           Flags, DAG, dl);
5263         ArgOffset += PtrByteSize;
5264         continue;
5265       }
5266       // Copy entire object into memory.  There are cases where gcc-generated
5267       // code assumes it is there, even if it could be put entirely into
5268       // registers.  (This is not what the doc says.)
5269 
5270       // FIXME: The above statement is likely due to a misunderstanding of the
5271       // documents.  All arguments must be copied into the parameter area BY
5272       // THE CALLEE in the event that the callee takes the address of any
5273       // formal argument.  That has not yet been implemented.  However, it is
5274       // reasonable to use the stack area as a staging area for the register
5275       // load.
5276 
5277       // Skip this for small aggregates, as we will use the same slot for a
5278       // right-justified copy, below.
5279       if (Size >= 8)
5280         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5281                                                           CallSeqStart,
5282                                                           Flags, DAG, dl);
5283 
5284       // When a register is available, pass a small aggregate right-justified.
5285       if (Size < 8 && GPR_idx != NumGPRs) {
5286         // The easiest way to get this right-justified in a register
5287         // is to copy the structure into the rightmost portion of a
5288         // local variable slot, then load the whole slot into the
5289         // register.
5290         // FIXME: The memcpy seems to produce pretty awful code for
5291         // small aggregates, particularly for packed ones.
5292         // FIXME: It would be preferable to use the slot in the
5293         // parameter save area instead of a new local variable.
5294         SDValue AddPtr = PtrOff;
5295         if (!isLittleEndian) {
5296           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5297           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5298         }
5299         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5300                                                           CallSeqStart,
5301                                                           Flags, DAG, dl);
5302 
5303         // Load the slot into the register.
5304         SDValue Load =
5305             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5306         MemOpChains.push_back(Load.getValue(1));
5307         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5308 
5309         // Done with this argument.
5310         ArgOffset += PtrByteSize;
5311         continue;
5312       }
5313 
5314       // For aggregates larger than PtrByteSize, copy the pieces of the
5315       // object that fit into registers from the parameter save area.
5316       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5317         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5318         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5319         if (GPR_idx != NumGPRs) {
5320           SDValue Load =
5321               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5322           MemOpChains.push_back(Load.getValue(1));
5323           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5324           ArgOffset += PtrByteSize;
5325         } else {
5326           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5327           break;
5328         }
5329       }
5330       continue;
5331     }
5332 
5333     switch (Arg.getSimpleValueType().SimpleTy) {
5334     default: llvm_unreachable("Unexpected ValueType for argument!");
5335     case MVT::i1:
5336     case MVT::i32:
5337     case MVT::i64:
5338       if (Flags.isNest()) {
5339         // The 'nest' parameter, if any, is passed in R11.
5340         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5341         hasNest = true;
5342         break;
5343       }
5344 
5345       // These can be scalar arguments or elements of an integer array type
5346       // passed directly.  Clang may use those instead of "byval" aggregate
5347       // types to avoid forcing arguments to memory unnecessarily.
5348       if (GPR_idx != NumGPRs) {
5349         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5350       } else {
5351         if (CallConv == CallingConv::Fast)
5352           ComputePtrOff();
5353 
5354         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5355                          true, isTailCall, false, MemOpChains,
5356                          TailCallArguments, dl);
5357         if (CallConv == CallingConv::Fast)
5358           ArgOffset += PtrByteSize;
5359       }
5360       if (CallConv != CallingConv::Fast)
5361         ArgOffset += PtrByteSize;
5362       break;
5363     case MVT::f32:
5364     case MVT::f64: {
5365       // These can be scalar arguments or elements of a float array type
5366       // passed directly.  The latter are used to implement ELFv2 homogenous
5367       // float aggregates.
5368 
5369       // Named arguments go into FPRs first, and once they overflow, the
5370       // remaining arguments go into GPRs and then the parameter save area.
5371       // Unnamed arguments for vararg functions always go to GPRs and
5372       // then the parameter save area.  For now, put all arguments to vararg
5373       // routines always in both locations (FPR *and* GPR or stack slot).
5374       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5375       bool NeededLoad = false;
5376 
5377       // First load the argument into the next available FPR.
5378       if (FPR_idx != NumFPRs)
5379         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5380 
5381       // Next, load the argument into GPR or stack slot if needed.
5382       if (!NeedGPROrStack)
5383         ;
5384       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
5385         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5386         // once we support fp <-> gpr moves.
5387 
5388         // In the non-vararg case, this can only ever happen in the
5389         // presence of f32 array types, since otherwise we never run
5390         // out of FPRs before running out of GPRs.
5391         SDValue ArgVal;
5392 
5393         // Double values are always passed in a single GPR.
5394         if (Arg.getValueType() != MVT::f32) {
5395           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
5396 
5397         // Non-array float values are extended and passed in a GPR.
5398         } else if (!Flags.isInConsecutiveRegs()) {
5399           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5400           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5401 
5402         // If we have an array of floats, we collect every odd element
5403         // together with its predecessor into one GPR.
5404         } else if (ArgOffset % PtrByteSize != 0) {
5405           SDValue Lo, Hi;
5406           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5407           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5408           if (!isLittleEndian)
5409             std::swap(Lo, Hi);
5410           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5411 
5412         // The final element, if even, goes into the first half of a GPR.
5413         } else if (Flags.isInConsecutiveRegsLast()) {
5414           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5415           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5416           if (!isLittleEndian)
5417             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5418                                  DAG.getConstant(32, dl, MVT::i32));
5419 
5420         // Non-final even elements are skipped; they will be handled
5421         // together the with subsequent argument on the next go-around.
5422         } else
5423           ArgVal = SDValue();
5424 
5425         if (ArgVal.getNode())
5426           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5427       } else {
5428         if (CallConv == CallingConv::Fast)
5429           ComputePtrOff();
5430 
5431         // Single-precision floating-point values are mapped to the
5432         // second (rightmost) word of the stack doubleword.
5433         if (Arg.getValueType() == MVT::f32 &&
5434             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
5435           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5436           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5437         }
5438 
5439         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5440                          true, isTailCall, false, MemOpChains,
5441                          TailCallArguments, dl);
5442 
5443         NeededLoad = true;
5444       }
5445       // When passing an array of floats, the array occupies consecutive
5446       // space in the argument area; only round up to the next doubleword
5447       // at the end of the array.  Otherwise, each float takes 8 bytes.
5448       if (CallConv != CallingConv::Fast || NeededLoad) {
5449         ArgOffset += (Arg.getValueType() == MVT::f32 &&
5450                       Flags.isInConsecutiveRegs()) ? 4 : 8;
5451         if (Flags.isInConsecutiveRegsLast())
5452           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5453       }
5454       break;
5455     }
5456     case MVT::v4f32:
5457     case MVT::v4i32:
5458     case MVT::v8i16:
5459     case MVT::v16i8:
5460     case MVT::v2f64:
5461     case MVT::v2i64:
5462     case MVT::v1i128:
5463       if (!Subtarget.hasQPX()) {
5464       // These can be scalar arguments or elements of a vector array type
5465       // passed directly.  The latter are used to implement ELFv2 homogenous
5466       // vector aggregates.
5467 
5468       // For a varargs call, named arguments go into VRs or on the stack as
5469       // usual; unnamed arguments always go to the stack or the corresponding
5470       // GPRs when within range.  For now, we always put the value in both
5471       // locations (or even all three).
5472       if (isVarArg) {
5473         // We could elide this store in the case where the object fits
5474         // entirely in R registers.  Maybe later.
5475         SDValue Store =
5476             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5477         MemOpChains.push_back(Store);
5478         if (VR_idx != NumVRs) {
5479           SDValue Load =
5480               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
5481           MemOpChains.push_back(Load.getValue(1));
5482           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5483         }
5484         ArgOffset += 16;
5485         for (unsigned i=0; i<16; i+=PtrByteSize) {
5486           if (GPR_idx == NumGPRs)
5487             break;
5488           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5489                                    DAG.getConstant(i, dl, PtrVT));
5490           SDValue Load =
5491               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5492           MemOpChains.push_back(Load.getValue(1));
5493           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5494         }
5495         break;
5496       }
5497 
5498       // Non-varargs Altivec params go into VRs or on the stack.
5499       if (VR_idx != NumVRs) {
5500         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5501       } else {
5502         if (CallConv == CallingConv::Fast)
5503           ComputePtrOff();
5504 
5505         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5506                          true, isTailCall, true, MemOpChains,
5507                          TailCallArguments, dl);
5508         if (CallConv == CallingConv::Fast)
5509           ArgOffset += 16;
5510       }
5511 
5512       if (CallConv != CallingConv::Fast)
5513         ArgOffset += 16;
5514       break;
5515       } // not QPX
5516 
5517       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5518              "Invalid QPX parameter type");
5519 
5520       /* fall through */
5521     case MVT::v4f64:
5522     case MVT::v4i1: {
5523       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5524       if (isVarArg) {
5525         // We could elide this store in the case where the object fits
5526         // entirely in R registers.  Maybe later.
5527         SDValue Store =
5528             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5529         MemOpChains.push_back(Store);
5530         if (QFPR_idx != NumQFPRs) {
5531           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
5532                                      PtrOff, MachinePointerInfo());
5533           MemOpChains.push_back(Load.getValue(1));
5534           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5535         }
5536         ArgOffset += (IsF32 ? 16 : 32);
5537         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5538           if (GPR_idx == NumGPRs)
5539             break;
5540           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5541                                    DAG.getConstant(i, dl, PtrVT));
5542           SDValue Load =
5543               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5544           MemOpChains.push_back(Load.getValue(1));
5545           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5546         }
5547         break;
5548       }
5549 
5550       // Non-varargs QPX params go into registers or on the stack.
5551       if (QFPR_idx != NumQFPRs) {
5552         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5553       } else {
5554         if (CallConv == CallingConv::Fast)
5555           ComputePtrOff();
5556 
5557         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5558                          true, isTailCall, true, MemOpChains,
5559                          TailCallArguments, dl);
5560         if (CallConv == CallingConv::Fast)
5561           ArgOffset += (IsF32 ? 16 : 32);
5562       }
5563 
5564       if (CallConv != CallingConv::Fast)
5565         ArgOffset += (IsF32 ? 16 : 32);
5566       break;
5567       }
5568     }
5569   }
5570 
5571   assert(NumBytesActuallyUsed == ArgOffset);
5572   (void)NumBytesActuallyUsed;
5573 
5574   if (!MemOpChains.empty())
5575     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5576 
5577   // Check if this is an indirect call (MTCTR/BCTRL).
5578   // See PrepareCall() for more information about calls through function
5579   // pointers in the 64-bit SVR4 ABI.
5580   if (!isTailCall && !isPatchPoint &&
5581       !isFunctionGlobalAddress(Callee) &&
5582       !isa<ExternalSymbolSDNode>(Callee)) {
5583     // Load r2 into a virtual register and store it to the TOC save area.
5584     setUsesTOCBasePtr(DAG);
5585     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5586     // TOC save area offset.
5587     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5588     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5589     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5590     Chain = DAG.getStore(
5591         Val.getValue(1), dl, Val, AddPtr,
5592         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
5593     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5594     // This does not mean the MTCTR instruction must use R12; it's easier
5595     // to model this as an extra parameter, so do that.
5596     if (isELFv2ABI && !isPatchPoint)
5597       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5598   }
5599 
5600   // Build a sequence of copy-to-reg nodes chained together with token chain
5601   // and flag operands which copy the outgoing args into the appropriate regs.
5602   SDValue InFlag;
5603   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5604     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5605                              RegsToPass[i].second, InFlag);
5606     InFlag = Chain.getValue(1);
5607   }
5608 
5609   if (isTailCall && !IsSibCall)
5610     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5611                     TailCallArguments);
5612 
5613   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
5614                     DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
5615                     SPDiff, NumBytes, Ins, InVals, CS);
5616 }
5617 
5618 SDValue PPCTargetLowering::LowerCall_Darwin(
5619     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5620     bool isTailCall, bool isPatchPoint,
5621     const SmallVectorImpl<ISD::OutputArg> &Outs,
5622     const SmallVectorImpl<SDValue> &OutVals,
5623     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5624     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5625     ImmutableCallSite *CS) const {
5626 
5627   unsigned NumOps = Outs.size();
5628 
5629   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5630   bool isPPC64 = PtrVT == MVT::i64;
5631   unsigned PtrByteSize = isPPC64 ? 8 : 4;
5632 
5633   MachineFunction &MF = DAG.getMachineFunction();
5634 
5635   // Mark this function as potentially containing a function that contains a
5636   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5637   // and restoring the callers stack pointer in this functions epilog. This is
5638   // done because by tail calling the called function might overwrite the value
5639   // in this function's (MF) stack pointer stack slot 0(SP).
5640   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5641       CallConv == CallingConv::Fast)
5642     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5643 
5644   // Count how many bytes are to be pushed on the stack, including the linkage
5645   // area, and parameter passing area.  We start with 24/48 bytes, which is
5646   // prereserved space for [SP][CR][LR][3 x unused].
5647   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5648   unsigned NumBytes = LinkageSize;
5649 
5650   // Add up all the space actually used.
5651   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
5652   // they all go in registers, but we must reserve stack space for them for
5653   // possible use by the caller.  In varargs or 64-bit calls, parameters are
5654   // assigned stack space in order, with padding so Altivec parameters are
5655   // 16-byte aligned.
5656   unsigned nAltivecParamsAtEnd = 0;
5657   for (unsigned i = 0; i != NumOps; ++i) {
5658     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5659     EVT ArgVT = Outs[i].VT;
5660     // Varargs Altivec parameters are padded to a 16 byte boundary.
5661     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
5662         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
5663         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
5664       if (!isVarArg && !isPPC64) {
5665         // Non-varargs Altivec parameters go after all the non-Altivec
5666         // parameters; handle those later so we know how much padding we need.
5667         nAltivecParamsAtEnd++;
5668         continue;
5669       }
5670       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
5671       NumBytes = ((NumBytes+15)/16)*16;
5672     }
5673     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5674   }
5675 
5676   // Allow for Altivec parameters at the end, if needed.
5677   if (nAltivecParamsAtEnd) {
5678     NumBytes = ((NumBytes+15)/16)*16;
5679     NumBytes += 16*nAltivecParamsAtEnd;
5680   }
5681 
5682   // The prolog code of the callee may store up to 8 GPR argument registers to
5683   // the stack, allowing va_start to index over them in memory if its varargs.
5684   // Because we cannot tell if this is needed on the caller side, we have to
5685   // conservatively assume that it is needed.  As such, make sure we have at
5686   // least enough stack space for the caller to store the 8 GPRs.
5687   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5688 
5689   // Tail call needs the stack to be aligned.
5690   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5691       CallConv == CallingConv::Fast)
5692     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5693 
5694   // Calculate by how many bytes the stack has to be adjusted in case of tail
5695   // call optimization.
5696   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5697 
5698   // To protect arguments on the stack from being clobbered in a tail call,
5699   // force all the loads to happen before doing any other lowering.
5700   if (isTailCall)
5701     Chain = DAG.getStackArgumentTokenFactor(Chain);
5702 
5703   // Adjust the stack pointer for the new arguments...
5704   // These operations are automatically eliminated by the prolog/epilog pass
5705   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5706                                dl);
5707   SDValue CallSeqStart = Chain;
5708 
5709   // Load the return address and frame pointer so it can be move somewhere else
5710   // later.
5711   SDValue LROp, FPOp;
5712   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5713 
5714   // Set up a copy of the stack pointer for use loading and storing any
5715   // arguments that may not fit in the registers available for argument
5716   // passing.
5717   SDValue StackPtr;
5718   if (isPPC64)
5719     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5720   else
5721     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5722 
5723   // Figure out which arguments are going to go in registers, and which in
5724   // memory.  Also, if this is a vararg function, floating point operations
5725   // must be stored to our stack, and loaded into integer regs as well, if
5726   // any integer regs are available for argument passing.
5727   unsigned ArgOffset = LinkageSize;
5728   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5729 
5730   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
5731     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
5732     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
5733   };
5734   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
5735     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5736     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5737   };
5738   static const MCPhysReg VR[] = {
5739     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5740     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5741   };
5742   const unsigned NumGPRs = array_lengthof(GPR_32);
5743   const unsigned NumFPRs = 13;
5744   const unsigned NumVRs  = array_lengthof(VR);
5745 
5746   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
5747 
5748   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5749   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5750 
5751   SmallVector<SDValue, 8> MemOpChains;
5752   for (unsigned i = 0; i != NumOps; ++i) {
5753     SDValue Arg = OutVals[i];
5754     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5755 
5756     // PtrOff will be used to store the current argument to the stack if a
5757     // register cannot be found for it.
5758     SDValue PtrOff;
5759 
5760     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5761 
5762     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5763 
5764     // On PPC64, promote integers to 64-bit values.
5765     if (isPPC64 && Arg.getValueType() == MVT::i32) {
5766       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5767       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5768       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5769     }
5770 
5771     // FIXME memcpy is used way more than necessary.  Correctness first.
5772     // Note: "by value" is code for passing a structure by value, not
5773     // basic types.
5774     if (Flags.isByVal()) {
5775       unsigned Size = Flags.getByValSize();
5776       // Very small objects are passed right-justified.  Everything else is
5777       // passed left-justified.
5778       if (Size==1 || Size==2) {
5779         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
5780         if (GPR_idx != NumGPRs) {
5781           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5782                                         MachinePointerInfo(), VT);
5783           MemOpChains.push_back(Load.getValue(1));
5784           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5785 
5786           ArgOffset += PtrByteSize;
5787         } else {
5788           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5789                                           PtrOff.getValueType());
5790           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5791           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5792                                                             CallSeqStart,
5793                                                             Flags, DAG, dl);
5794           ArgOffset += PtrByteSize;
5795         }
5796         continue;
5797       }
5798       // Copy entire object into memory.  There are cases where gcc-generated
5799       // code assumes it is there, even if it could be put entirely into
5800       // registers.  (This is not what the doc says.)
5801       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5802                                                         CallSeqStart,
5803                                                         Flags, DAG, dl);
5804 
5805       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
5806       // copy the pieces of the object that fit into registers from the
5807       // parameter save area.
5808       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5809         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5810         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5811         if (GPR_idx != NumGPRs) {
5812           SDValue Load =
5813               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5814           MemOpChains.push_back(Load.getValue(1));
5815           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5816           ArgOffset += PtrByteSize;
5817         } else {
5818           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5819           break;
5820         }
5821       }
5822       continue;
5823     }
5824 
5825     switch (Arg.getSimpleValueType().SimpleTy) {
5826     default: llvm_unreachable("Unexpected ValueType for argument!");
5827     case MVT::i1:
5828     case MVT::i32:
5829     case MVT::i64:
5830       if (GPR_idx != NumGPRs) {
5831         if (Arg.getValueType() == MVT::i1)
5832           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
5833 
5834         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5835       } else {
5836         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5837                          isPPC64, isTailCall, false, MemOpChains,
5838                          TailCallArguments, dl);
5839       }
5840       ArgOffset += PtrByteSize;
5841       break;
5842     case MVT::f32:
5843     case MVT::f64:
5844       if (FPR_idx != NumFPRs) {
5845         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5846 
5847         if (isVarArg) {
5848           SDValue Store =
5849               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5850           MemOpChains.push_back(Store);
5851 
5852           // Float varargs are always shadowed in available integer registers
5853           if (GPR_idx != NumGPRs) {
5854             SDValue Load =
5855                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
5856             MemOpChains.push_back(Load.getValue(1));
5857             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5858           }
5859           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
5860             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5861             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5862             SDValue Load =
5863                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
5864             MemOpChains.push_back(Load.getValue(1));
5865             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5866           }
5867         } else {
5868           // If we have any FPRs remaining, we may also have GPRs remaining.
5869           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
5870           // GPRs.
5871           if (GPR_idx != NumGPRs)
5872             ++GPR_idx;
5873           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
5874               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
5875             ++GPR_idx;
5876         }
5877       } else
5878         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5879                          isPPC64, isTailCall, false, MemOpChains,
5880                          TailCallArguments, dl);
5881       if (isPPC64)
5882         ArgOffset += 8;
5883       else
5884         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
5885       break;
5886     case MVT::v4f32:
5887     case MVT::v4i32:
5888     case MVT::v8i16:
5889     case MVT::v16i8:
5890       if (isVarArg) {
5891         // These go aligned on the stack, or in the corresponding R registers
5892         // when within range.  The Darwin PPC ABI doc claims they also go in
5893         // V registers; in fact gcc does this only for arguments that are
5894         // prototyped, not for those that match the ...  We do it for all
5895         // arguments, seems to work.
5896         while (ArgOffset % 16 !=0) {
5897           ArgOffset += PtrByteSize;
5898           if (GPR_idx != NumGPRs)
5899             GPR_idx++;
5900         }
5901         // We could elide this store in the case where the object fits
5902         // entirely in R registers.  Maybe later.
5903         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5904                              DAG.getConstant(ArgOffset, dl, PtrVT));
5905         SDValue Store =
5906             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5907         MemOpChains.push_back(Store);
5908         if (VR_idx != NumVRs) {
5909           SDValue Load =
5910               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
5911           MemOpChains.push_back(Load.getValue(1));
5912           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5913         }
5914         ArgOffset += 16;
5915         for (unsigned i=0; i<16; i+=PtrByteSize) {
5916           if (GPR_idx == NumGPRs)
5917             break;
5918           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5919                                    DAG.getConstant(i, dl, PtrVT));
5920           SDValue Load =
5921               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5922           MemOpChains.push_back(Load.getValue(1));
5923           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5924         }
5925         break;
5926       }
5927 
5928       // Non-varargs Altivec params generally go in registers, but have
5929       // stack space allocated at the end.
5930       if (VR_idx != NumVRs) {
5931         // Doesn't have GPR space allocated.
5932         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5933       } else if (nAltivecParamsAtEnd==0) {
5934         // We are emitting Altivec params in order.
5935         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5936                          isPPC64, isTailCall, true, MemOpChains,
5937                          TailCallArguments, dl);
5938         ArgOffset += 16;
5939       }
5940       break;
5941     }
5942   }
5943   // If all Altivec parameters fit in registers, as they usually do,
5944   // they get stack space following the non-Altivec parameters.  We
5945   // don't track this here because nobody below needs it.
5946   // If there are more Altivec parameters than fit in registers emit
5947   // the stores here.
5948   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
5949     unsigned j = 0;
5950     // Offset is aligned; skip 1st 12 params which go in V registers.
5951     ArgOffset = ((ArgOffset+15)/16)*16;
5952     ArgOffset += 12*16;
5953     for (unsigned i = 0; i != NumOps; ++i) {
5954       SDValue Arg = OutVals[i];
5955       EVT ArgType = Outs[i].VT;
5956       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
5957           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
5958         if (++j > NumVRs) {
5959           SDValue PtrOff;
5960           // We are emitting Altivec params in order.
5961           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5962                            isPPC64, isTailCall, true, MemOpChains,
5963                            TailCallArguments, dl);
5964           ArgOffset += 16;
5965         }
5966       }
5967     }
5968   }
5969 
5970   if (!MemOpChains.empty())
5971     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5972 
5973   // On Darwin, R12 must contain the address of an indirect callee.  This does
5974   // not mean the MTCTR instruction must use R12; it's easier to model this as
5975   // an extra parameter, so do that.
5976   if (!isTailCall &&
5977       !isFunctionGlobalAddress(Callee) &&
5978       !isa<ExternalSymbolSDNode>(Callee) &&
5979       !isBLACompatibleAddress(Callee, DAG))
5980     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
5981                                                    PPC::R12), Callee));
5982 
5983   // Build a sequence of copy-to-reg nodes chained together with token chain
5984   // and flag operands which copy the outgoing args into the appropriate regs.
5985   SDValue InFlag;
5986   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5987     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5988                              RegsToPass[i].second, InFlag);
5989     InFlag = Chain.getValue(1);
5990   }
5991 
5992   if (isTailCall)
5993     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5994                     TailCallArguments);
5995 
5996   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5997                     /* unused except on PPC64 ELFv1 */ false, DAG,
5998                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5999                     NumBytes, Ins, InVals, CS);
6000 }
6001 
6002 bool
6003 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6004                                   MachineFunction &MF, bool isVarArg,
6005                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
6006                                   LLVMContext &Context) const {
6007   SmallVector<CCValAssign, 16> RVLocs;
6008   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6009   return CCInfo.CheckReturn(Outs, RetCC_PPC);
6010 }
6011 
6012 SDValue
6013 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6014                                bool isVarArg,
6015                                const SmallVectorImpl<ISD::OutputArg> &Outs,
6016                                const SmallVectorImpl<SDValue> &OutVals,
6017                                const SDLoc &dl, SelectionDAG &DAG) const {
6018 
6019   SmallVector<CCValAssign, 16> RVLocs;
6020   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6021                  *DAG.getContext());
6022   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
6023 
6024   SDValue Flag;
6025   SmallVector<SDValue, 4> RetOps(1, Chain);
6026 
6027   // Copy the result values into the output registers.
6028   for (unsigned i = 0; i != RVLocs.size(); ++i) {
6029     CCValAssign &VA = RVLocs[i];
6030     assert(VA.isRegLoc() && "Can only return in registers!");
6031 
6032     SDValue Arg = OutVals[i];
6033 
6034     switch (VA.getLocInfo()) {
6035     default: llvm_unreachable("Unknown loc info!");
6036     case CCValAssign::Full: break;
6037     case CCValAssign::AExt:
6038       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6039       break;
6040     case CCValAssign::ZExt:
6041       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6042       break;
6043     case CCValAssign::SExt:
6044       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6045       break;
6046     }
6047 
6048     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6049     Flag = Chain.getValue(1);
6050     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6051   }
6052 
6053   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6054   const MCPhysReg *I =
6055     TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6056   if (I) {
6057     for (; *I; ++I) {
6058 
6059       if (PPC::G8RCRegClass.contains(*I))
6060         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6061       else if (PPC::F8RCRegClass.contains(*I))
6062         RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6063       else if (PPC::CRRCRegClass.contains(*I))
6064         RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6065       else if (PPC::VRRCRegClass.contains(*I))
6066         RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6067       else
6068         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6069     }
6070   }
6071 
6072   RetOps[0] = Chain;  // Update chain.
6073 
6074   // Add the flag if we have it.
6075   if (Flag.getNode())
6076     RetOps.push_back(Flag);
6077 
6078   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6079 }
6080 
6081 SDValue
6082 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6083                                                 SelectionDAG &DAG) const {
6084   SDLoc dl(Op);
6085 
6086   // Get the corect type for integers.
6087   EVT IntVT = Op.getValueType();
6088 
6089   // Get the inputs.
6090   SDValue Chain = Op.getOperand(0);
6091   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6092   // Build a DYNAREAOFFSET node.
6093   SDValue Ops[2] = {Chain, FPSIdx};
6094   SDVTList VTs = DAG.getVTList(IntVT);
6095   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6096 }
6097 
6098 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6099                                              SelectionDAG &DAG) const {
6100   // When we pop the dynamic allocation we need to restore the SP link.
6101   SDLoc dl(Op);
6102 
6103   // Get the corect type for pointers.
6104   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6105 
6106   // Construct the stack pointer operand.
6107   bool isPPC64 = Subtarget.isPPC64();
6108   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6109   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6110 
6111   // Get the operands for the STACKRESTORE.
6112   SDValue Chain = Op.getOperand(0);
6113   SDValue SaveSP = Op.getOperand(1);
6114 
6115   // Load the old link SP.
6116   SDValue LoadLinkSP =
6117       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6118 
6119   // Restore the stack pointer.
6120   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6121 
6122   // Store the old link SP.
6123   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6124 }
6125 
6126 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6127   MachineFunction &MF = DAG.getMachineFunction();
6128   bool isPPC64 = Subtarget.isPPC64();
6129   EVT PtrVT = getPointerTy(MF.getDataLayout());
6130 
6131   // Get current frame pointer save index.  The users of this index will be
6132   // primarily DYNALLOC instructions.
6133   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6134   int RASI = FI->getReturnAddrSaveIndex();
6135 
6136   // If the frame pointer save index hasn't been defined yet.
6137   if (!RASI) {
6138     // Find out what the fix offset of the frame pointer save area.
6139     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6140     // Allocate the frame index for frame pointer save area.
6141     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6142     // Save the result.
6143     FI->setReturnAddrSaveIndex(RASI);
6144   }
6145   return DAG.getFrameIndex(RASI, PtrVT);
6146 }
6147 
6148 SDValue
6149 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6150   MachineFunction &MF = DAG.getMachineFunction();
6151   bool isPPC64 = Subtarget.isPPC64();
6152   EVT PtrVT = getPointerTy(MF.getDataLayout());
6153 
6154   // Get current frame pointer save index.  The users of this index will be
6155   // primarily DYNALLOC instructions.
6156   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6157   int FPSI = FI->getFramePointerSaveIndex();
6158 
6159   // If the frame pointer save index hasn't been defined yet.
6160   if (!FPSI) {
6161     // Find out what the fix offset of the frame pointer save area.
6162     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6163     // Allocate the frame index for frame pointer save area.
6164     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
6165     // Save the result.
6166     FI->setFramePointerSaveIndex(FPSI);
6167   }
6168   return DAG.getFrameIndex(FPSI, PtrVT);
6169 }
6170 
6171 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6172                                                    SelectionDAG &DAG) const {
6173   // Get the inputs.
6174   SDValue Chain = Op.getOperand(0);
6175   SDValue Size  = Op.getOperand(1);
6176   SDLoc dl(Op);
6177 
6178   // Get the corect type for pointers.
6179   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6180   // Negate the size.
6181   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
6182                                 DAG.getConstant(0, dl, PtrVT), Size);
6183   // Construct a node for the frame pointer save index.
6184   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6185   // Build a DYNALLOC node.
6186   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
6187   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
6188   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
6189 }
6190 
6191 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
6192                                                      SelectionDAG &DAG) const {
6193   MachineFunction &MF = DAG.getMachineFunction();
6194 
6195   bool isPPC64 = Subtarget.isPPC64();
6196   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6197 
6198   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
6199   return DAG.getFrameIndex(FI, PtrVT);
6200 }
6201 
6202 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
6203                                                SelectionDAG &DAG) const {
6204   SDLoc DL(Op);
6205   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
6206                      DAG.getVTList(MVT::i32, MVT::Other),
6207                      Op.getOperand(0), Op.getOperand(1));
6208 }
6209 
6210 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
6211                                                 SelectionDAG &DAG) const {
6212   SDLoc DL(Op);
6213   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
6214                      Op.getOperand(0), Op.getOperand(1));
6215 }
6216 
6217 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
6218   if (Op.getValueType().isVector())
6219     return LowerVectorLoad(Op, DAG);
6220 
6221   assert(Op.getValueType() == MVT::i1 &&
6222          "Custom lowering only for i1 loads");
6223 
6224   // First, load 8 bits into 32 bits, then truncate to 1 bit.
6225 
6226   SDLoc dl(Op);
6227   LoadSDNode *LD = cast<LoadSDNode>(Op);
6228 
6229   SDValue Chain = LD->getChain();
6230   SDValue BasePtr = LD->getBasePtr();
6231   MachineMemOperand *MMO = LD->getMemOperand();
6232 
6233   SDValue NewLD =
6234       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
6235                      BasePtr, MVT::i8, MMO);
6236   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
6237 
6238   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
6239   return DAG.getMergeValues(Ops, dl);
6240 }
6241 
6242 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
6243   if (Op.getOperand(1).getValueType().isVector())
6244     return LowerVectorStore(Op, DAG);
6245 
6246   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
6247          "Custom lowering only for i1 stores");
6248 
6249   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
6250 
6251   SDLoc dl(Op);
6252   StoreSDNode *ST = cast<StoreSDNode>(Op);
6253 
6254   SDValue Chain = ST->getChain();
6255   SDValue BasePtr = ST->getBasePtr();
6256   SDValue Value = ST->getValue();
6257   MachineMemOperand *MMO = ST->getMemOperand();
6258 
6259   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
6260                       Value);
6261   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
6262 }
6263 
6264 // FIXME: Remove this once the ANDI glue bug is fixed:
6265 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
6266   assert(Op.getValueType() == MVT::i1 &&
6267          "Custom lowering only for i1 results");
6268 
6269   SDLoc DL(Op);
6270   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
6271                      Op.getOperand(0));
6272 }
6273 
6274 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
6275 /// possible.
6276 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
6277   // Not FP? Not a fsel.
6278   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
6279       !Op.getOperand(2).getValueType().isFloatingPoint())
6280     return Op;
6281 
6282   // We might be able to do better than this under some circumstances, but in
6283   // general, fsel-based lowering of select is a finite-math-only optimization.
6284   // For more information, see section F.3 of the 2.06 ISA specification.
6285   if (!DAG.getTarget().Options.NoInfsFPMath ||
6286       !DAG.getTarget().Options.NoNaNsFPMath)
6287     return Op;
6288   // TODO: Propagate flags from the select rather than global settings.
6289   SDNodeFlags Flags;
6290   Flags.setNoInfs(true);
6291   Flags.setNoNaNs(true);
6292 
6293   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
6294 
6295   EVT ResVT = Op.getValueType();
6296   EVT CmpVT = Op.getOperand(0).getValueType();
6297   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6298   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
6299   SDLoc dl(Op);
6300 
6301   // If the RHS of the comparison is a 0.0, we don't need to do the
6302   // subtraction at all.
6303   SDValue Sel1;
6304   if (isFloatingPointZero(RHS))
6305     switch (CC) {
6306     default: break;       // SETUO etc aren't handled by fsel.
6307     case ISD::SETNE:
6308       std::swap(TV, FV);
6309     case ISD::SETEQ:
6310       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6311         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6312       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6313       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6314         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6315       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6316                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6317     case ISD::SETULT:
6318     case ISD::SETLT:
6319       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6320     case ISD::SETOGE:
6321     case ISD::SETGE:
6322       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6323         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6324       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6325     case ISD::SETUGT:
6326     case ISD::SETGT:
6327       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6328     case ISD::SETOLE:
6329     case ISD::SETLE:
6330       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6331         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6332       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6333                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6334     }
6335 
6336   SDValue Cmp;
6337   switch (CC) {
6338   default: break;       // SETUO etc aren't handled by fsel.
6339   case ISD::SETNE:
6340     std::swap(TV, FV);
6341   case ISD::SETEQ:
6342     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6343     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6344       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6345     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6346     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6347       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6348     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6349                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6350   case ISD::SETULT:
6351   case ISD::SETLT:
6352     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6353     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6354       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6355     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6356   case ISD::SETOGE:
6357   case ISD::SETGE:
6358     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6359     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6360       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6361     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6362   case ISD::SETUGT:
6363   case ISD::SETGT:
6364     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
6365     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6366       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6367     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6368   case ISD::SETOLE:
6369   case ISD::SETLE:
6370     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
6371     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6372       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6373     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6374   }
6375   return Op;
6376 }
6377 
6378 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
6379                                                SelectionDAG &DAG,
6380                                                const SDLoc &dl) const {
6381   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6382   SDValue Src = Op.getOperand(0);
6383   if (Src.getValueType() == MVT::f32)
6384     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6385 
6386   SDValue Tmp;
6387   switch (Op.getSimpleValueType().SimpleTy) {
6388   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6389   case MVT::i32:
6390     Tmp = DAG.getNode(
6391         Op.getOpcode() == ISD::FP_TO_SINT
6392             ? PPCISD::FCTIWZ
6393             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6394         dl, MVT::f64, Src);
6395     break;
6396   case MVT::i64:
6397     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6398            "i64 FP_TO_UINT is supported only with FPCVT");
6399     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6400                                                         PPCISD::FCTIDUZ,
6401                       dl, MVT::f64, Src);
6402     break;
6403   }
6404 
6405   // Convert the FP value to an int value through memory.
6406   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
6407     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
6408   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
6409   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
6410   MachinePointerInfo MPI =
6411       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6412 
6413   // Emit a store to the stack slot.
6414   SDValue Chain;
6415   if (i32Stack) {
6416     MachineFunction &MF = DAG.getMachineFunction();
6417     MachineMemOperand *MMO =
6418       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
6419     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
6420     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
6421               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
6422   } else
6423     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
6424 
6425   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
6426   // add in a bias on big endian.
6427   if (Op.getValueType() == MVT::i32 && !i32Stack) {
6428     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
6429                         DAG.getConstant(4, dl, FIPtr.getValueType()));
6430     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
6431   }
6432 
6433   RLI.Chain = Chain;
6434   RLI.Ptr = FIPtr;
6435   RLI.MPI = MPI;
6436 }
6437 
6438 /// \brief Custom lowers floating point to integer conversions to use
6439 /// the direct move instructions available in ISA 2.07 to avoid the
6440 /// need for load/store combinations.
6441 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6442                                                     SelectionDAG &DAG,
6443                                                     const SDLoc &dl) const {
6444   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6445   SDValue Src = Op.getOperand(0);
6446 
6447   if (Src.getValueType() == MVT::f32)
6448     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6449 
6450   SDValue Tmp;
6451   switch (Op.getSimpleValueType().SimpleTy) {
6452   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6453   case MVT::i32:
6454     Tmp = DAG.getNode(
6455         Op.getOpcode() == ISD::FP_TO_SINT
6456             ? PPCISD::FCTIWZ
6457             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6458         dl, MVT::f64, Src);
6459     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6460     break;
6461   case MVT::i64:
6462     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6463            "i64 FP_TO_UINT is supported only with FPCVT");
6464     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6465                                                         PPCISD::FCTIDUZ,
6466                       dl, MVT::f64, Src);
6467     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6468     break;
6469   }
6470   return Tmp;
6471 }
6472 
6473 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6474                                           const SDLoc &dl) const {
6475   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
6476     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6477 
6478   ReuseLoadInfo RLI;
6479   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6480 
6481   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
6482                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
6483 }
6484 
6485 // We're trying to insert a regular store, S, and then a load, L. If the
6486 // incoming value, O, is a load, we might just be able to have our load use the
6487 // address used by O. However, we don't know if anything else will store to
6488 // that address before we can load from it. To prevent this situation, we need
6489 // to insert our load, L, into the chain as a peer of O. To do this, we give L
6490 // the same chain operand as O, we create a token factor from the chain results
6491 // of O and L, and we replace all uses of O's chain result with that token
6492 // factor (see spliceIntoChain below for this last part).
6493 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6494                                             ReuseLoadInfo &RLI,
6495                                             SelectionDAG &DAG,
6496                                             ISD::LoadExtType ET) const {
6497   SDLoc dl(Op);
6498   if (ET == ISD::NON_EXTLOAD &&
6499       (Op.getOpcode() == ISD::FP_TO_UINT ||
6500        Op.getOpcode() == ISD::FP_TO_SINT) &&
6501       isOperationLegalOrCustom(Op.getOpcode(),
6502                                Op.getOperand(0).getValueType())) {
6503 
6504     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6505     return true;
6506   }
6507 
6508   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6509   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
6510       LD->isNonTemporal())
6511     return false;
6512   if (LD->getMemoryVT() != MemVT)
6513     return false;
6514 
6515   RLI.Ptr = LD->getBasePtr();
6516   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
6517     assert(LD->getAddressingMode() == ISD::PRE_INC &&
6518            "Non-pre-inc AM on PPC?");
6519     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6520                           LD->getOffset());
6521   }
6522 
6523   RLI.Chain = LD->getChain();
6524   RLI.MPI = LD->getPointerInfo();
6525   RLI.IsDereferenceable = LD->isDereferenceable();
6526   RLI.IsInvariant = LD->isInvariant();
6527   RLI.Alignment = LD->getAlignment();
6528   RLI.AAInfo = LD->getAAInfo();
6529   RLI.Ranges = LD->getRanges();
6530 
6531   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
6532   return true;
6533 }
6534 
6535 // Given the head of the old chain, ResChain, insert a token factor containing
6536 // it and NewResChain, and make users of ResChain now be users of that token
6537 // factor.
6538 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6539                                         SDValue NewResChain,
6540                                         SelectionDAG &DAG) const {
6541   if (!ResChain)
6542     return;
6543 
6544   SDLoc dl(NewResChain);
6545 
6546   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
6547                            NewResChain, DAG.getUNDEF(MVT::Other));
6548   assert(TF.getNode() != NewResChain.getNode() &&
6549          "A new TF really is required here");
6550 
6551   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6552   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6553 }
6554 
6555 /// \brief Analyze profitability of direct move
6556 /// prefer float load to int load plus direct move
6557 /// when there is no integer use of int load
6558 static bool directMoveIsProfitable(const SDValue &Op) {
6559   SDNode *Origin = Op.getOperand(0).getNode();
6560   if (Origin->getOpcode() != ISD::LOAD)
6561     return true;
6562 
6563   for (SDNode::use_iterator UI = Origin->use_begin(),
6564                             UE = Origin->use_end();
6565        UI != UE; ++UI) {
6566 
6567     // Only look at the users of the loaded value.
6568     if (UI.getUse().get().getResNo() != 0)
6569       continue;
6570 
6571     if (UI->getOpcode() != ISD::SINT_TO_FP &&
6572         UI->getOpcode() != ISD::UINT_TO_FP)
6573       return true;
6574   }
6575 
6576   return false;
6577 }
6578 
6579 /// \brief Custom lowers integer to floating point conversions to use
6580 /// the direct move instructions available in ISA 2.07 to avoid the
6581 /// need for load/store combinations.
6582 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6583                                                     SelectionDAG &DAG,
6584                                                     const SDLoc &dl) const {
6585   assert((Op.getValueType() == MVT::f32 ||
6586           Op.getValueType() == MVT::f64) &&
6587          "Invalid floating point type as target of conversion");
6588   assert(Subtarget.hasFPCVT() &&
6589          "Int to FP conversions with direct moves require FPCVT");
6590   SDValue FP;
6591   SDValue Src = Op.getOperand(0);
6592   bool SinglePrec = Op.getValueType() == MVT::f32;
6593   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6594   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6595   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
6596                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
6597 
6598   if (WordInt) {
6599     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
6600                      dl, MVT::f64, Src);
6601     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6602   }
6603   else {
6604     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6605     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6606   }
6607 
6608   return FP;
6609 }
6610 
6611 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6612                                           SelectionDAG &DAG) const {
6613   SDLoc dl(Op);
6614 
6615   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6616     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6617       return SDValue();
6618 
6619     SDValue Value = Op.getOperand(0);
6620     // The values are now known to be -1 (false) or 1 (true). To convert this
6621     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6622     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6623     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6624 
6625     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
6626 
6627     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6628 
6629     if (Op.getValueType() != MVT::v4f64)
6630       Value = DAG.getNode(ISD::FP_ROUND, dl,
6631                           Op.getValueType(), Value,
6632                           DAG.getIntPtrConstant(1, dl));
6633     return Value;
6634   }
6635 
6636   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6637   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6638     return SDValue();
6639 
6640   if (Op.getOperand(0).getValueType() == MVT::i1)
6641     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
6642                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
6643                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
6644 
6645   // If we have direct moves, we can do all the conversion, skip the store/load
6646   // however, without FPCVT we can't do most conversions.
6647   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
6648       Subtarget.isPPC64() && Subtarget.hasFPCVT())
6649     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
6650 
6651   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
6652          "UINT_TO_FP is supported only with FPCVT");
6653 
6654   // If we have FCFIDS, then use it when converting to single-precision.
6655   // Otherwise, convert to double-precision and then round.
6656   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6657                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
6658                                                             : PPCISD::FCFIDS)
6659                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
6660                                                             : PPCISD::FCFID);
6661   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6662                   ? MVT::f32
6663                   : MVT::f64;
6664 
6665   if (Op.getOperand(0).getValueType() == MVT::i64) {
6666     SDValue SINT = Op.getOperand(0);
6667     // When converting to single-precision, we actually need to convert
6668     // to double-precision first and then round to single-precision.
6669     // To avoid double-rounding effects during that operation, we have
6670     // to prepare the input operand.  Bits that might be truncated when
6671     // converting to double-precision are replaced by a bit that won't
6672     // be lost at this stage, but is below the single-precision rounding
6673     // position.
6674     //
6675     // However, if -enable-unsafe-fp-math is in effect, accept double
6676     // rounding to avoid the extra overhead.
6677     if (Op.getValueType() == MVT::f32 &&
6678         !Subtarget.hasFPCVT() &&
6679         !DAG.getTarget().Options.UnsafeFPMath) {
6680 
6681       // Twiddle input to make sure the low 11 bits are zero.  (If this
6682       // is the case, we are guaranteed the value will fit into the 53 bit
6683       // mantissa of an IEEE double-precision value without rounding.)
6684       // If any of those low 11 bits were not zero originally, make sure
6685       // bit 12 (value 2048) is set instead, so that the final rounding
6686       // to single-precision gets the correct result.
6687       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6688                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
6689       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
6690                           Round, DAG.getConstant(2047, dl, MVT::i64));
6691       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
6692       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6693                           Round, DAG.getConstant(-2048, dl, MVT::i64));
6694 
6695       // However, we cannot use that value unconditionally: if the magnitude
6696       // of the input value is small, the bit-twiddling we did above might
6697       // end up visibly changing the output.  Fortunately, in that case, we
6698       // don't need to twiddle bits since the original input will convert
6699       // exactly to double-precision floating-point already.  Therefore,
6700       // construct a conditional to use the original value if the top 11
6701       // bits are all sign-bit copies, and use the rounded value computed
6702       // above otherwise.
6703       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
6704                                  SINT, DAG.getConstant(53, dl, MVT::i32));
6705       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
6706                          Cond, DAG.getConstant(1, dl, MVT::i64));
6707       Cond = DAG.getSetCC(dl, MVT::i32,
6708                           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
6709 
6710       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
6711     }
6712 
6713     ReuseLoadInfo RLI;
6714     SDValue Bits;
6715 
6716     MachineFunction &MF = DAG.getMachineFunction();
6717     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
6718       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
6719                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
6720       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6721     } else if (Subtarget.hasLFIWAX() &&
6722                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
6723       MachineMemOperand *MMO =
6724         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6725                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6726       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6727       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
6728                                      DAG.getVTList(MVT::f64, MVT::Other),
6729                                      Ops, MVT::i32, MMO);
6730       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6731     } else if (Subtarget.hasFPCVT() &&
6732                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
6733       MachineMemOperand *MMO =
6734         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6735                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6736       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6737       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
6738                                      DAG.getVTList(MVT::f64, MVT::Other),
6739                                      Ops, MVT::i32, MMO);
6740       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6741     } else if (((Subtarget.hasLFIWAX() &&
6742                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
6743                 (Subtarget.hasFPCVT() &&
6744                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
6745                SINT.getOperand(0).getValueType() == MVT::i32) {
6746       MachineFrameInfo &MFI = MF.getFrameInfo();
6747       EVT PtrVT = getPointerTy(DAG.getDataLayout());
6748 
6749       int FrameIdx = MFI.CreateStackObject(4, 4, false);
6750       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6751 
6752       SDValue Store =
6753           DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
6754                        MachinePointerInfo::getFixedStack(
6755                            DAG.getMachineFunction(), FrameIdx));
6756 
6757       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6758              "Expected an i32 store");
6759 
6760       RLI.Ptr = FIdx;
6761       RLI.Chain = Store;
6762       RLI.MPI =
6763           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
6764       RLI.Alignment = 4;
6765 
6766       MachineMemOperand *MMO =
6767         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6768                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6769       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6770       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
6771                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
6772                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
6773                                      Ops, MVT::i32, MMO);
6774     } else
6775       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
6776 
6777     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
6778 
6779     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6780       FP = DAG.getNode(ISD::FP_ROUND, dl,
6781                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
6782     return FP;
6783   }
6784 
6785   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
6786          "Unhandled INT_TO_FP type in custom expander!");
6787   // Since we only generate this in 64-bit mode, we can take advantage of
6788   // 64-bit registers.  In particular, sign extend the input value into the
6789   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
6790   // then lfd it and fcfid it.
6791   MachineFunction &MF = DAG.getMachineFunction();
6792   MachineFrameInfo &MFI = MF.getFrameInfo();
6793   EVT PtrVT = getPointerTy(MF.getDataLayout());
6794 
6795   SDValue Ld;
6796   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
6797     ReuseLoadInfo RLI;
6798     bool ReusingLoad;
6799     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
6800                                             DAG))) {
6801       int FrameIdx = MFI.CreateStackObject(4, 4, false);
6802       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6803 
6804       SDValue Store =
6805           DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
6806                        MachinePointerInfo::getFixedStack(
6807                            DAG.getMachineFunction(), FrameIdx));
6808 
6809       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6810              "Expected an i32 store");
6811 
6812       RLI.Ptr = FIdx;
6813       RLI.Chain = Store;
6814       RLI.MPI =
6815           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
6816       RLI.Alignment = 4;
6817     }
6818 
6819     MachineMemOperand *MMO =
6820       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6821                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6822     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6823     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
6824                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
6825                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
6826                                  Ops, MVT::i32, MMO);
6827     if (ReusingLoad)
6828       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
6829   } else {
6830     assert(Subtarget.isPPC64() &&
6831            "i32->FP without LFIWAX supported only on PPC64");
6832 
6833     int FrameIdx = MFI.CreateStackObject(8, 8, false);
6834     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6835 
6836     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
6837                                 Op.getOperand(0));
6838 
6839     // STD the extended value into the stack slot.
6840     SDValue Store = DAG.getStore(
6841         DAG.getEntryNode(), dl, Ext64, FIdx,
6842         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
6843 
6844     // Load the value as a double.
6845     Ld = DAG.getLoad(
6846         MVT::f64, dl, Store, FIdx,
6847         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
6848   }
6849 
6850   // FCFID it and return it.
6851   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
6852   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6853     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
6854                      DAG.getIntPtrConstant(0, dl));
6855   return FP;
6856 }
6857 
6858 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
6859                                             SelectionDAG &DAG) const {
6860   SDLoc dl(Op);
6861   /*
6862    The rounding mode is in bits 30:31 of FPSR, and has the following
6863    settings:
6864      00 Round to nearest
6865      01 Round to 0
6866      10 Round to +inf
6867      11 Round to -inf
6868 
6869   FLT_ROUNDS, on the other hand, expects the following:
6870     -1 Undefined
6871      0 Round to 0
6872      1 Round to nearest
6873      2 Round to +inf
6874      3 Round to -inf
6875 
6876   To perform the conversion, we do:
6877     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
6878   */
6879 
6880   MachineFunction &MF = DAG.getMachineFunction();
6881   EVT VT = Op.getValueType();
6882   EVT PtrVT = getPointerTy(MF.getDataLayout());
6883 
6884   // Save FP Control Word to register
6885   EVT NodeTys[] = {
6886     MVT::f64,    // return register
6887     MVT::Glue    // unused in this context
6888   };
6889   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
6890 
6891   // Save FP register to stack slot
6892   int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
6893   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
6894   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
6895                                MachinePointerInfo());
6896 
6897   // Load FP Control Word from low 32 bits of stack slot.
6898   SDValue Four = DAG.getConstant(4, dl, PtrVT);
6899   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
6900   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
6901 
6902   // Transform as necessary
6903   SDValue CWD1 =
6904     DAG.getNode(ISD::AND, dl, MVT::i32,
6905                 CWD, DAG.getConstant(3, dl, MVT::i32));
6906   SDValue CWD2 =
6907     DAG.getNode(ISD::SRL, dl, MVT::i32,
6908                 DAG.getNode(ISD::AND, dl, MVT::i32,
6909                             DAG.getNode(ISD::XOR, dl, MVT::i32,
6910                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
6911                             DAG.getConstant(3, dl, MVT::i32)),
6912                 DAG.getConstant(1, dl, MVT::i32));
6913 
6914   SDValue RetVal =
6915     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
6916 
6917   return DAG.getNode((VT.getSizeInBits() < 16 ?
6918                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
6919 }
6920 
6921 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6922   EVT VT = Op.getValueType();
6923   unsigned BitWidth = VT.getSizeInBits();
6924   SDLoc dl(Op);
6925   assert(Op.getNumOperands() == 3 &&
6926          VT == Op.getOperand(1).getValueType() &&
6927          "Unexpected SHL!");
6928 
6929   // Expand into a bunch of logical ops.  Note that these ops
6930   // depend on the PPC behavior for oversized shift amounts.
6931   SDValue Lo = Op.getOperand(0);
6932   SDValue Hi = Op.getOperand(1);
6933   SDValue Amt = Op.getOperand(2);
6934   EVT AmtVT = Amt.getValueType();
6935 
6936   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6937                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6938   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
6939   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
6940   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
6941   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6942                              DAG.getConstant(-BitWidth, dl, AmtVT));
6943   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
6944   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6945   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
6946   SDValue OutOps[] = { OutLo, OutHi };
6947   return DAG.getMergeValues(OutOps, dl);
6948 }
6949 
6950 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6951   EVT VT = Op.getValueType();
6952   SDLoc dl(Op);
6953   unsigned BitWidth = VT.getSizeInBits();
6954   assert(Op.getNumOperands() == 3 &&
6955          VT == Op.getOperand(1).getValueType() &&
6956          "Unexpected SRL!");
6957 
6958   // Expand into a bunch of logical ops.  Note that these ops
6959   // depend on the PPC behavior for oversized shift amounts.
6960   SDValue Lo = Op.getOperand(0);
6961   SDValue Hi = Op.getOperand(1);
6962   SDValue Amt = Op.getOperand(2);
6963   EVT AmtVT = Amt.getValueType();
6964 
6965   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6966                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6967   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6968   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6969   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6970   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6971                              DAG.getConstant(-BitWidth, dl, AmtVT));
6972   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
6973   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6974   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
6975   SDValue OutOps[] = { OutLo, OutHi };
6976   return DAG.getMergeValues(OutOps, dl);
6977 }
6978 
6979 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
6980   SDLoc dl(Op);
6981   EVT VT = Op.getValueType();
6982   unsigned BitWidth = VT.getSizeInBits();
6983   assert(Op.getNumOperands() == 3 &&
6984          VT == Op.getOperand(1).getValueType() &&
6985          "Unexpected SRA!");
6986 
6987   // Expand into a bunch of logical ops, followed by a select_cc.
6988   SDValue Lo = Op.getOperand(0);
6989   SDValue Hi = Op.getOperand(1);
6990   SDValue Amt = Op.getOperand(2);
6991   EVT AmtVT = Amt.getValueType();
6992 
6993   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6994                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6995   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6996   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6997   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6998   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6999                              DAG.getConstant(-BitWidth, dl, AmtVT));
7000   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
7001   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
7002   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
7003                                   Tmp4, Tmp6, ISD::SETLE);
7004   SDValue OutOps[] = { OutLo, OutHi };
7005   return DAG.getMergeValues(OutOps, dl);
7006 }
7007 
7008 //===----------------------------------------------------------------------===//
7009 // Vector related lowering.
7010 //
7011 
7012 /// BuildSplatI - Build a canonical splati of Val with an element size of
7013 /// SplatSize.  Cast the result to VT.
7014 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
7015                            SelectionDAG &DAG, const SDLoc &dl) {
7016   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
7017 
7018   static const MVT VTys[] = { // canonical VT to use for each size.
7019     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
7020   };
7021 
7022   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
7023 
7024   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
7025   if (Val == -1)
7026     SplatSize = 1;
7027 
7028   EVT CanonicalVT = VTys[SplatSize-1];
7029 
7030   // Build a canonical splat for this value.
7031   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
7032 }
7033 
7034 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
7035 /// specified intrinsic ID.
7036 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
7037                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
7038   if (DestVT == MVT::Other) DestVT = Op.getValueType();
7039   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7040                      DAG.getConstant(IID, dl, MVT::i32), Op);
7041 }
7042 
7043 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
7044 /// specified intrinsic ID.
7045 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
7046                                 SelectionDAG &DAG, const SDLoc &dl,
7047                                 EVT DestVT = MVT::Other) {
7048   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
7049   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7050                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
7051 }
7052 
7053 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
7054 /// specified intrinsic ID.
7055 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
7056                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
7057                                 EVT DestVT = MVT::Other) {
7058   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
7059   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7060                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
7061 }
7062 
7063 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
7064 /// amount.  The result has the specified value type.
7065 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
7066                            SelectionDAG &DAG, const SDLoc &dl) {
7067   // Force LHS/RHS to be the right type.
7068   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
7069   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
7070 
7071   int Ops[16];
7072   for (unsigned i = 0; i != 16; ++i)
7073     Ops[i] = i + Amt;
7074   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
7075   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7076 }
7077 
7078 static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) {
7079   if (BVN->isConstant() || BVN->getValueType(0) != Type)
7080     return false;
7081   auto OpZero = BVN->getOperand(0);
7082   for (int i = 1, e = BVN->getNumOperands(); i < e; i++)
7083     if (BVN->getOperand(i) != OpZero)
7084       return false;
7085   return true;
7086 }
7087 
7088 // If this is a case we can't handle, return null and let the default
7089 // expansion code take care of it.  If we CAN select this case, and if it
7090 // selects to a single instruction, return Op.  Otherwise, if we can codegen
7091 // this case more efficiently than a constant pool load, lower it to the
7092 // sequence of ops that should be used.
7093 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
7094                                              SelectionDAG &DAG) const {
7095   SDLoc dl(Op);
7096   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7097   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
7098 
7099   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
7100     // We first build an i32 vector, load it into a QPX register,
7101     // then convert it to a floating-point vector and compare it
7102     // to a zero vector to get the boolean result.
7103     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7104     int FrameIdx = MFI.CreateStackObject(16, 16, false);
7105     MachinePointerInfo PtrInfo =
7106         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7107     EVT PtrVT = getPointerTy(DAG.getDataLayout());
7108     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7109 
7110     assert(BVN->getNumOperands() == 4 &&
7111       "BUILD_VECTOR for v4i1 does not have 4 operands");
7112 
7113     bool IsConst = true;
7114     for (unsigned i = 0; i < 4; ++i) {
7115       if (BVN->getOperand(i).isUndef()) continue;
7116       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
7117         IsConst = false;
7118         break;
7119       }
7120     }
7121 
7122     if (IsConst) {
7123       Constant *One =
7124         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
7125       Constant *NegOne =
7126         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
7127 
7128       Constant *CV[4];
7129       for (unsigned i = 0; i < 4; ++i) {
7130         if (BVN->getOperand(i).isUndef())
7131           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
7132         else if (isNullConstant(BVN->getOperand(i)))
7133           CV[i] = NegOne;
7134         else
7135           CV[i] = One;
7136       }
7137 
7138       Constant *CP = ConstantVector::get(CV);
7139       SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
7140                                           16 /* alignment */);
7141 
7142       SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
7143       SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
7144       return DAG.getMemIntrinsicNode(
7145           PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
7146           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7147     }
7148 
7149     SmallVector<SDValue, 4> Stores;
7150     for (unsigned i = 0; i < 4; ++i) {
7151       if (BVN->getOperand(i).isUndef()) continue;
7152 
7153       unsigned Offset = 4*i;
7154       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7155       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7156 
7157       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
7158       if (StoreSize > 4) {
7159         Stores.push_back(
7160             DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
7161                               PtrInfo.getWithOffset(Offset), MVT::i32));
7162       } else {
7163         SDValue StoreValue = BVN->getOperand(i);
7164         if (StoreSize < 4)
7165           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
7166 
7167         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
7168                                       PtrInfo.getWithOffset(Offset)));
7169       }
7170     }
7171 
7172     SDValue StoreChain;
7173     if (!Stores.empty())
7174       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7175     else
7176       StoreChain = DAG.getEntryNode();
7177 
7178     // Now load from v4i32 into the QPX register; this will extend it to
7179     // v4i64 but not yet convert it to a floating point. Nevertheless, this
7180     // is typed as v4f64 because the QPX register integer states are not
7181     // explicitly represented.
7182 
7183     SDValue Ops[] = {StoreChain,
7184                      DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
7185                      FIdx};
7186     SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
7187 
7188     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
7189       dl, VTs, Ops, MVT::v4i32, PtrInfo);
7190     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7191       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
7192       LoadedVect);
7193 
7194     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
7195 
7196     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
7197   }
7198 
7199   // All other QPX vectors are handled by generic code.
7200   if (Subtarget.hasQPX())
7201     return SDValue();
7202 
7203   // Check if this is a splat of a constant value.
7204   APInt APSplatBits, APSplatUndef;
7205   unsigned SplatBitSize;
7206   bool HasAnyUndefs;
7207   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
7208                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
7209       SplatBitSize > 32) {
7210     // We can splat a non-const value on CPU's that implement ISA 3.0
7211     // in two ways: LXVWSX (load and splat) and MTVSRWS(move and splat).
7212     auto OpZero = BVN->getOperand(0);
7213     bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD &&
7214       BVN->isOnlyUserOf(OpZero.getNode());
7215     if (Subtarget.isISA3_0() && !CanLoadAndSplat &&
7216         (isNonConstSplatBV(BVN, MVT::v4i32) ||
7217          isNonConstSplatBV(BVN, MVT::v2i64)))
7218       return Op;
7219     return SDValue();
7220   }
7221 
7222   unsigned SplatBits = APSplatBits.getZExtValue();
7223   unsigned SplatUndef = APSplatUndef.getZExtValue();
7224   unsigned SplatSize = SplatBitSize / 8;
7225 
7226   // First, handle single instruction cases.
7227 
7228   // All zeros?
7229   if (SplatBits == 0) {
7230     // Canonicalize all zero vectors to be v4i32.
7231     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
7232       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
7233       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
7234     }
7235     return Op;
7236   }
7237 
7238   // We have XXSPLTIB for constant splats one byte wide
7239   if (Subtarget.isISA3_0() && Op.getValueType() == MVT::v16i8)
7240     return Op;
7241 
7242   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
7243   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
7244                     (32-SplatBitSize));
7245   if (SextVal >= -16 && SextVal <= 15)
7246     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
7247 
7248   // Two instruction sequences.
7249 
7250   // If this value is in the range [-32,30] and is even, use:
7251   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
7252   // If this value is in the range [17,31] and is odd, use:
7253   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
7254   // If this value is in the range [-31,-17] and is odd, use:
7255   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
7256   // Note the last two are three-instruction sequences.
7257   if (SextVal >= -32 && SextVal <= 31) {
7258     // To avoid having these optimizations undone by constant folding,
7259     // we convert to a pseudo that will be expanded later into one of
7260     // the above forms.
7261     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
7262     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
7263               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
7264     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
7265     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
7266     if (VT == Op.getValueType())
7267       return RetVal;
7268     else
7269       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
7270   }
7271 
7272   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
7273   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
7274   // for fneg/fabs.
7275   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
7276     // Make -1 and vspltisw -1:
7277     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
7278 
7279     // Make the VSLW intrinsic, computing 0x8000_0000.
7280     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
7281                                    OnesV, DAG, dl);
7282 
7283     // xor by OnesV to invert it.
7284     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
7285     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7286   }
7287 
7288   // Check to see if this is a wide variety of vsplti*, binop self cases.
7289   static const signed char SplatCsts[] = {
7290     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
7291     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
7292   };
7293 
7294   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
7295     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
7296     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
7297     int i = SplatCsts[idx];
7298 
7299     // Figure out what shift amount will be used by altivec if shifted by i in
7300     // this splat size.
7301     unsigned TypeShiftAmt = i & (SplatBitSize-1);
7302 
7303     // vsplti + shl self.
7304     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
7305       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7306       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7307         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
7308         Intrinsic::ppc_altivec_vslw
7309       };
7310       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7311       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7312     }
7313 
7314     // vsplti + srl self.
7315     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7316       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7317       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7318         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
7319         Intrinsic::ppc_altivec_vsrw
7320       };
7321       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7322       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7323     }
7324 
7325     // vsplti + sra self.
7326     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7327       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7328       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7329         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
7330         Intrinsic::ppc_altivec_vsraw
7331       };
7332       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7333       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7334     }
7335 
7336     // vsplti + rol self.
7337     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
7338                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
7339       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7340       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7341         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
7342         Intrinsic::ppc_altivec_vrlw
7343       };
7344       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7345       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7346     }
7347 
7348     // t = vsplti c, result = vsldoi t, t, 1
7349     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
7350       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7351       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
7352       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7353     }
7354     // t = vsplti c, result = vsldoi t, t, 2
7355     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
7356       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7357       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
7358       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7359     }
7360     // t = vsplti c, result = vsldoi t, t, 3
7361     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
7362       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7363       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
7364       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7365     }
7366   }
7367 
7368   return SDValue();
7369 }
7370 
7371 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7372 /// the specified operations to build the shuffle.
7373 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7374                                       SDValue RHS, SelectionDAG &DAG,
7375                                       const SDLoc &dl) {
7376   unsigned OpNum = (PFEntry >> 26) & 0x0F;
7377   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7378   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
7379 
7380   enum {
7381     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7382     OP_VMRGHW,
7383     OP_VMRGLW,
7384     OP_VSPLTISW0,
7385     OP_VSPLTISW1,
7386     OP_VSPLTISW2,
7387     OP_VSPLTISW3,
7388     OP_VSLDOI4,
7389     OP_VSLDOI8,
7390     OP_VSLDOI12
7391   };
7392 
7393   if (OpNum == OP_COPY) {
7394     if (LHSID == (1*9+2)*9+3) return LHS;
7395     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7396     return RHS;
7397   }
7398 
7399   SDValue OpLHS, OpRHS;
7400   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7401   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7402 
7403   int ShufIdxs[16];
7404   switch (OpNum) {
7405   default: llvm_unreachable("Unknown i32 permute!");
7406   case OP_VMRGHW:
7407     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
7408     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
7409     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
7410     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
7411     break;
7412   case OP_VMRGLW:
7413     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
7414     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
7415     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
7416     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
7417     break;
7418   case OP_VSPLTISW0:
7419     for (unsigned i = 0; i != 16; ++i)
7420       ShufIdxs[i] = (i&3)+0;
7421     break;
7422   case OP_VSPLTISW1:
7423     for (unsigned i = 0; i != 16; ++i)
7424       ShufIdxs[i] = (i&3)+4;
7425     break;
7426   case OP_VSPLTISW2:
7427     for (unsigned i = 0; i != 16; ++i)
7428       ShufIdxs[i] = (i&3)+8;
7429     break;
7430   case OP_VSPLTISW3:
7431     for (unsigned i = 0; i != 16; ++i)
7432       ShufIdxs[i] = (i&3)+12;
7433     break;
7434   case OP_VSLDOI4:
7435     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
7436   case OP_VSLDOI8:
7437     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
7438   case OP_VSLDOI12:
7439     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
7440   }
7441   EVT VT = OpLHS.getValueType();
7442   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
7443   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
7444   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
7445   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7446 }
7447 
7448 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
7449 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
7450 /// return the code it can be lowered into.  Worst case, it can always be
7451 /// lowered into a vperm.
7452 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
7453                                                SelectionDAG &DAG) const {
7454   SDLoc dl(Op);
7455   SDValue V1 = Op.getOperand(0);
7456   SDValue V2 = Op.getOperand(1);
7457   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7458   EVT VT = Op.getValueType();
7459   bool isLittleEndian = Subtarget.isLittleEndian();
7460 
7461   unsigned ShiftElts, InsertAtByte;
7462   bool Swap;
7463   if (Subtarget.hasP9Vector() &&
7464       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
7465                            isLittleEndian)) {
7466     if (Swap)
7467       std::swap(V1, V2);
7468     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7469     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
7470     if (ShiftElts) {
7471       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
7472                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
7473       SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl,
7474                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
7475       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7476     }
7477     SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2,
7478                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
7479     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7480   }
7481 
7482   if (Subtarget.hasVSX()) {
7483     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
7484       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
7485 
7486       // If the source for the shuffle is a scalar_to_vector that came from a
7487       // 32-bit load, it will have used LXVWSX so we don't need to splat again.
7488       if (Subtarget.isISA3_0() &&
7489           ((isLittleEndian && SplatIdx == 3) ||
7490            (!isLittleEndian && SplatIdx == 0))) {
7491         SDValue Src = V1.getOperand(0);
7492         if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
7493             Src.getOperand(0).getOpcode() == ISD::LOAD &&
7494             Src.getOperand(0).hasOneUse())
7495           return V1;
7496       }
7497       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7498       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
7499                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
7500       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
7501     }
7502 
7503     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
7504     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
7505       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
7506       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
7507       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
7508     }
7509 
7510   }
7511 
7512   if (Subtarget.hasQPX()) {
7513     if (VT.getVectorNumElements() != 4)
7514       return SDValue();
7515 
7516     if (V2.isUndef()) V2 = V1;
7517 
7518     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
7519     if (AlignIdx != -1) {
7520       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
7521                          DAG.getConstant(AlignIdx, dl, MVT::i32));
7522     } else if (SVOp->isSplat()) {
7523       int SplatIdx = SVOp->getSplatIndex();
7524       if (SplatIdx >= 4) {
7525         std::swap(V1, V2);
7526         SplatIdx -= 4;
7527       }
7528 
7529       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
7530                          DAG.getConstant(SplatIdx, dl, MVT::i32));
7531     }
7532 
7533     // Lower this into a qvgpci/qvfperm pair.
7534 
7535     // Compute the qvgpci literal
7536     unsigned idx = 0;
7537     for (unsigned i = 0; i < 4; ++i) {
7538       int m = SVOp->getMaskElt(i);
7539       unsigned mm = m >= 0 ? (unsigned) m : i;
7540       idx |= mm << (3-i)*3;
7541     }
7542 
7543     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
7544                              DAG.getConstant(idx, dl, MVT::i32));
7545     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
7546   }
7547 
7548   // Cases that are handled by instructions that take permute immediates
7549   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
7550   // selected by the instruction selector.
7551   if (V2.isUndef()) {
7552     if (PPC::isSplatShuffleMask(SVOp, 1) ||
7553         PPC::isSplatShuffleMask(SVOp, 2) ||
7554         PPC::isSplatShuffleMask(SVOp, 4) ||
7555         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
7556         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
7557         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
7558         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
7559         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
7560         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
7561         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
7562         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
7563         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
7564         (Subtarget.hasP8Altivec() && (
7565          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
7566          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
7567          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
7568       return Op;
7569     }
7570   }
7571 
7572   // Altivec has a variety of "shuffle immediates" that take two vector inputs
7573   // and produce a fixed permutation.  If any of these match, do not lower to
7574   // VPERM.
7575   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
7576   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7577       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7578       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
7579       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7580       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7581       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7582       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7583       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7584       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7585       (Subtarget.hasP8Altivec() && (
7586        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7587        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
7588        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
7589     return Op;
7590 
7591   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
7592   // perfect shuffle table to emit an optimal matching sequence.
7593   ArrayRef<int> PermMask = SVOp->getMask();
7594 
7595   unsigned PFIndexes[4];
7596   bool isFourElementShuffle = true;
7597   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
7598     unsigned EltNo = 8;   // Start out undef.
7599     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
7600       if (PermMask[i*4+j] < 0)
7601         continue;   // Undef, ignore it.
7602 
7603       unsigned ByteSource = PermMask[i*4+j];
7604       if ((ByteSource & 3) != j) {
7605         isFourElementShuffle = false;
7606         break;
7607       }
7608 
7609       if (EltNo == 8) {
7610         EltNo = ByteSource/4;
7611       } else if (EltNo != ByteSource/4) {
7612         isFourElementShuffle = false;
7613         break;
7614       }
7615     }
7616     PFIndexes[i] = EltNo;
7617   }
7618 
7619   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
7620   // perfect shuffle vector to determine if it is cost effective to do this as
7621   // discrete instructions, or whether we should use a vperm.
7622   // For now, we skip this for little endian until such time as we have a
7623   // little-endian perfect shuffle table.
7624   if (isFourElementShuffle && !isLittleEndian) {
7625     // Compute the index in the perfect shuffle table.
7626     unsigned PFTableIndex =
7627       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7628 
7629     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7630     unsigned Cost  = (PFEntry >> 30);
7631 
7632     // Determining when to avoid vperm is tricky.  Many things affect the cost
7633     // of vperm, particularly how many times the perm mask needs to be computed.
7634     // For example, if the perm mask can be hoisted out of a loop or is already
7635     // used (perhaps because there are multiple permutes with the same shuffle
7636     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
7637     // the loop requires an extra register.
7638     //
7639     // As a compromise, we only emit discrete instructions if the shuffle can be
7640     // generated in 3 or fewer operations.  When we have loop information
7641     // available, if this block is within a loop, we should avoid using vperm
7642     // for 3-operation perms and use a constant pool load instead.
7643     if (Cost < 3)
7644       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7645   }
7646 
7647   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
7648   // vector that will get spilled to the constant pool.
7649   if (V2.isUndef()) V2 = V1;
7650 
7651   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
7652   // that it is in input element units, not in bytes.  Convert now.
7653 
7654   // For little endian, the order of the input vectors is reversed, and
7655   // the permutation mask is complemented with respect to 31.  This is
7656   // necessary to produce proper semantics with the big-endian-biased vperm
7657   // instruction.
7658   EVT EltVT = V1.getValueType().getVectorElementType();
7659   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
7660 
7661   SmallVector<SDValue, 16> ResultMask;
7662   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
7663     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
7664 
7665     for (unsigned j = 0; j != BytesPerElement; ++j)
7666       if (isLittleEndian)
7667         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
7668                                              dl, MVT::i32));
7669       else
7670         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
7671                                              MVT::i32));
7672   }
7673 
7674   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
7675   if (isLittleEndian)
7676     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7677                        V2, V1, VPermMask);
7678   else
7679     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7680                        V1, V2, VPermMask);
7681 }
7682 
7683 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
7684 /// vector comparison.  If it is, return true and fill in Opc/isDot with
7685 /// information about the intrinsic.
7686 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
7687                                  bool &isDot, const PPCSubtarget &Subtarget) {
7688   unsigned IntrinsicID =
7689     cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
7690   CompareOpc = -1;
7691   isDot = false;
7692   switch (IntrinsicID) {
7693   default: return false;
7694     // Comparison predicates.
7695   case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
7696   case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
7697   case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
7698   case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
7699   case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
7700   case Intrinsic::ppc_altivec_vcmpequd_p:
7701     if (Subtarget.hasP8Altivec()) {
7702       CompareOpc = 199;
7703       isDot = 1;
7704     } else
7705       return false;
7706 
7707     break;
7708   case Intrinsic::ppc_altivec_vcmpneb_p:
7709   case Intrinsic::ppc_altivec_vcmpneh_p:
7710   case Intrinsic::ppc_altivec_vcmpnew_p:
7711   case Intrinsic::ppc_altivec_vcmpnezb_p:
7712   case Intrinsic::ppc_altivec_vcmpnezh_p:
7713   case Intrinsic::ppc_altivec_vcmpnezw_p:
7714     if (Subtarget.hasP9Altivec()) {
7715       switch(IntrinsicID) {
7716       default: llvm_unreachable("Unknown comparison intrinsic.");
7717       case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break;
7718       case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break;
7719       case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break;
7720       case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break;
7721       case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break;
7722       case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break;
7723       }
7724       isDot = 1;
7725     } else
7726       return false;
7727 
7728     break;
7729   case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
7730   case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
7731   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
7732   case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
7733   case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
7734   case Intrinsic::ppc_altivec_vcmpgtsd_p:
7735     if (Subtarget.hasP8Altivec()) {
7736       CompareOpc = 967;
7737       isDot = 1;
7738     } else
7739       return false;
7740 
7741     break;
7742   case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
7743   case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
7744   case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
7745   case Intrinsic::ppc_altivec_vcmpgtud_p:
7746     if (Subtarget.hasP8Altivec()) {
7747       CompareOpc = 711;
7748       isDot = 1;
7749     } else
7750       return false;
7751 
7752     break;
7753     // VSX predicate comparisons use the same infrastructure
7754   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
7755   case Intrinsic::ppc_vsx_xvcmpgedp_p:
7756   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
7757   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
7758   case Intrinsic::ppc_vsx_xvcmpgesp_p:
7759   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
7760     if (Subtarget.hasVSX()) {
7761       switch (IntrinsicID) {
7762       case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
7763       case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
7764       case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
7765       case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
7766       case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
7767       case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
7768       }
7769       isDot = 1;
7770     }
7771     else
7772       return false;
7773 
7774     break;
7775 
7776     // Normal Comparisons.
7777   case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
7778   case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
7779   case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
7780   case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
7781   case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
7782   case Intrinsic::ppc_altivec_vcmpequd:
7783     if (Subtarget.hasP8Altivec()) {
7784       CompareOpc = 199;
7785       isDot = 0;
7786     } else
7787       return false;
7788 
7789     break;
7790   case Intrinsic::ppc_altivec_vcmpneb:
7791   case Intrinsic::ppc_altivec_vcmpneh:
7792   case Intrinsic::ppc_altivec_vcmpnew:
7793   case Intrinsic::ppc_altivec_vcmpnezb:
7794   case Intrinsic::ppc_altivec_vcmpnezh:
7795   case Intrinsic::ppc_altivec_vcmpnezw:
7796     if (Subtarget.hasP9Altivec()) {
7797       switch (IntrinsicID) {
7798       default: llvm_unreachable("Unknown comparison intrinsic.");
7799       case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break;
7800       case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break;
7801       case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break;
7802       case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break;
7803       case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break;
7804       case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break;
7805       }
7806       isDot = 0;
7807     } else
7808       return false;
7809     break;
7810   case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
7811   case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
7812   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
7813   case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
7814   case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
7815   case Intrinsic::ppc_altivec_vcmpgtsd:
7816     if (Subtarget.hasP8Altivec()) {
7817       CompareOpc = 967;
7818       isDot = 0;
7819     } else
7820       return false;
7821 
7822     break;
7823   case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
7824   case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
7825   case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
7826   case Intrinsic::ppc_altivec_vcmpgtud:
7827     if (Subtarget.hasP8Altivec()) {
7828       CompareOpc = 711;
7829       isDot = 0;
7830     } else
7831       return false;
7832 
7833     break;
7834   }
7835   return true;
7836 }
7837 
7838 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
7839 /// lower, do it, otherwise return null.
7840 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
7841                                                    SelectionDAG &DAG) const {
7842   unsigned IntrinsicID =
7843     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7844 
7845   if (IntrinsicID == Intrinsic::thread_pointer) {
7846     // Reads the thread pointer register, used for __builtin_thread_pointer.
7847     bool is64bit = Subtarget.isPPC64();
7848     return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
7849                            is64bit ? MVT::i64 : MVT::i32);
7850   }
7851 
7852   // If this is a lowered altivec predicate compare, CompareOpc is set to the
7853   // opcode number of the comparison.
7854   SDLoc dl(Op);
7855   int CompareOpc;
7856   bool isDot;
7857   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
7858     return SDValue();    // Don't custom lower most intrinsics.
7859 
7860   // If this is a non-dot comparison, make the VCMP node and we are done.
7861   if (!isDot) {
7862     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
7863                               Op.getOperand(1), Op.getOperand(2),
7864                               DAG.getConstant(CompareOpc, dl, MVT::i32));
7865     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
7866   }
7867 
7868   // Create the PPCISD altivec 'dot' comparison node.
7869   SDValue Ops[] = {
7870     Op.getOperand(2),  // LHS
7871     Op.getOperand(3),  // RHS
7872     DAG.getConstant(CompareOpc, dl, MVT::i32)
7873   };
7874   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
7875   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
7876 
7877   // Now that we have the comparison, emit a copy from the CR to a GPR.
7878   // This is flagged to the above dot comparison.
7879   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
7880                                 DAG.getRegister(PPC::CR6, MVT::i32),
7881                                 CompNode.getValue(1));
7882 
7883   // Unpack the result based on how the target uses it.
7884   unsigned BitNo;   // Bit # of CR6.
7885   bool InvertBit;   // Invert result?
7886   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
7887   default:  // Can't happen, don't crash on invalid number though.
7888   case 0:   // Return the value of the EQ bit of CR6.
7889     BitNo = 0; InvertBit = false;
7890     break;
7891   case 1:   // Return the inverted value of the EQ bit of CR6.
7892     BitNo = 0; InvertBit = true;
7893     break;
7894   case 2:   // Return the value of the LT bit of CR6.
7895     BitNo = 2; InvertBit = false;
7896     break;
7897   case 3:   // Return the inverted value of the LT bit of CR6.
7898     BitNo = 2; InvertBit = true;
7899     break;
7900   }
7901 
7902   // Shift the bit into the low position.
7903   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
7904                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
7905   // Isolate the bit.
7906   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
7907                       DAG.getConstant(1, dl, MVT::i32));
7908 
7909   // If we are supposed to, toggle the bit.
7910   if (InvertBit)
7911     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
7912                         DAG.getConstant(1, dl, MVT::i32));
7913   return Flags;
7914 }
7915 
7916 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
7917                                                   SelectionDAG &DAG) const {
7918   SDLoc dl(Op);
7919   // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
7920   // instructions), but for smaller types, we need to first extend up to v2i32
7921   // before doing going farther.
7922   if (Op.getValueType() == MVT::v2i64) {
7923     EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
7924     if (ExtVT != MVT::v2i32) {
7925       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
7926       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
7927                        DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
7928                                         ExtVT.getVectorElementType(), 4)));
7929       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
7930       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
7931                        DAG.getValueType(MVT::v2i32));
7932     }
7933 
7934     return Op;
7935   }
7936 
7937   return SDValue();
7938 }
7939 
7940 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
7941                                                    SelectionDAG &DAG) const {
7942   SDLoc dl(Op);
7943   // Create a stack slot that is 16-byte aligned.
7944   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7945   int FrameIdx = MFI.CreateStackObject(16, 16, false);
7946   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7947   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7948 
7949   // Store the input value into Value#0 of the stack slot.
7950   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7951                                MachinePointerInfo());
7952   // Load it out.
7953   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
7954 }
7955 
7956 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
7957                                                   SelectionDAG &DAG) const {
7958   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
7959          "Should only be called for ISD::INSERT_VECTOR_ELT");
7960   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
7961   // We have legal lowering for constant indices but not for variable ones.
7962   if (C)
7963     return Op;
7964   return SDValue();
7965 }
7966 
7967 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
7968                                                    SelectionDAG &DAG) const {
7969   SDLoc dl(Op);
7970   SDNode *N = Op.getNode();
7971 
7972   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
7973          "Unknown extract_vector_elt type");
7974 
7975   SDValue Value = N->getOperand(0);
7976 
7977   // The first part of this is like the store lowering except that we don't
7978   // need to track the chain.
7979 
7980   // The values are now known to be -1 (false) or 1 (true). To convert this
7981   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7982   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7983   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7984 
7985   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
7986   // understand how to form the extending load.
7987   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
7988 
7989   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7990 
7991   // Now convert to an integer and store.
7992   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7993     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
7994     Value);
7995 
7996   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7997   int FrameIdx = MFI.CreateStackObject(16, 16, false);
7998   MachinePointerInfo PtrInfo =
7999       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8000   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8001   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8002 
8003   SDValue StoreChain = DAG.getEntryNode();
8004   SDValue Ops[] = {StoreChain,
8005                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8006                    Value, FIdx};
8007   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8008 
8009   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8010     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8011 
8012   // Extract the value requested.
8013   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
8014   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8015   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8016 
8017   SDValue IntVal =
8018       DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
8019 
8020   if (!Subtarget.useCRBits())
8021     return IntVal;
8022 
8023   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
8024 }
8025 
8026 /// Lowering for QPX v4i1 loads
8027 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
8028                                            SelectionDAG &DAG) const {
8029   SDLoc dl(Op);
8030   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
8031   SDValue LoadChain = LN->getChain();
8032   SDValue BasePtr = LN->getBasePtr();
8033 
8034   if (Op.getValueType() == MVT::v4f64 ||
8035       Op.getValueType() == MVT::v4f32) {
8036     EVT MemVT = LN->getMemoryVT();
8037     unsigned Alignment = LN->getAlignment();
8038 
8039     // If this load is properly aligned, then it is legal.
8040     if (Alignment >= MemVT.getStoreSize())
8041       return Op;
8042 
8043     EVT ScalarVT = Op.getValueType().getScalarType(),
8044         ScalarMemVT = MemVT.getScalarType();
8045     unsigned Stride = ScalarMemVT.getStoreSize();
8046 
8047     SDValue Vals[4], LoadChains[4];
8048     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8049       SDValue Load;
8050       if (ScalarVT != ScalarMemVT)
8051         Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
8052                               BasePtr,
8053                               LN->getPointerInfo().getWithOffset(Idx * Stride),
8054                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8055                               LN->getMemOperand()->getFlags(), LN->getAAInfo());
8056       else
8057         Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
8058                            LN->getPointerInfo().getWithOffset(Idx * Stride),
8059                            MinAlign(Alignment, Idx * Stride),
8060                            LN->getMemOperand()->getFlags(), LN->getAAInfo());
8061 
8062       if (Idx == 0 && LN->isIndexed()) {
8063         assert(LN->getAddressingMode() == ISD::PRE_INC &&
8064                "Unknown addressing mode on vector load");
8065         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
8066                                   LN->getAddressingMode());
8067       }
8068 
8069       Vals[Idx] = Load;
8070       LoadChains[Idx] = Load.getValue(1);
8071 
8072       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8073                             DAG.getConstant(Stride, dl,
8074                                             BasePtr.getValueType()));
8075     }
8076 
8077     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8078     SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
8079 
8080     if (LN->isIndexed()) {
8081       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
8082       return DAG.getMergeValues(RetOps, dl);
8083     }
8084 
8085     SDValue RetOps[] = { Value, TF };
8086     return DAG.getMergeValues(RetOps, dl);
8087   }
8088 
8089   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
8090   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
8091 
8092   // To lower v4i1 from a byte array, we load the byte elements of the
8093   // vector and then reuse the BUILD_VECTOR logic.
8094 
8095   SDValue VectElmts[4], VectElmtChains[4];
8096   for (unsigned i = 0; i < 4; ++i) {
8097     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8098     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8099 
8100     VectElmts[i] = DAG.getExtLoad(
8101         ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
8102         LN->getPointerInfo().getWithOffset(i), MVT::i8,
8103         /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
8104     VectElmtChains[i] = VectElmts[i].getValue(1);
8105   }
8106 
8107   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
8108   SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
8109 
8110   SDValue RVals[] = { Value, LoadChain };
8111   return DAG.getMergeValues(RVals, dl);
8112 }
8113 
8114 /// Lowering for QPX v4i1 stores
8115 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
8116                                             SelectionDAG &DAG) const {
8117   SDLoc dl(Op);
8118   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
8119   SDValue StoreChain = SN->getChain();
8120   SDValue BasePtr = SN->getBasePtr();
8121   SDValue Value = SN->getValue();
8122 
8123   if (Value.getValueType() == MVT::v4f64 ||
8124       Value.getValueType() == MVT::v4f32) {
8125     EVT MemVT = SN->getMemoryVT();
8126     unsigned Alignment = SN->getAlignment();
8127 
8128     // If this store is properly aligned, then it is legal.
8129     if (Alignment >= MemVT.getStoreSize())
8130       return Op;
8131 
8132     EVT ScalarVT = Value.getValueType().getScalarType(),
8133         ScalarMemVT = MemVT.getScalarType();
8134     unsigned Stride = ScalarMemVT.getStoreSize();
8135 
8136     SDValue Stores[4];
8137     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8138       SDValue Ex = DAG.getNode(
8139           ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
8140           DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
8141       SDValue Store;
8142       if (ScalarVT != ScalarMemVT)
8143         Store =
8144             DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
8145                               SN->getPointerInfo().getWithOffset(Idx * Stride),
8146                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8147                               SN->getMemOperand()->getFlags(), SN->getAAInfo());
8148       else
8149         Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
8150                              SN->getPointerInfo().getWithOffset(Idx * Stride),
8151                              MinAlign(Alignment, Idx * Stride),
8152                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
8153 
8154       if (Idx == 0 && SN->isIndexed()) {
8155         assert(SN->getAddressingMode() == ISD::PRE_INC &&
8156                "Unknown addressing mode on vector store");
8157         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
8158                                     SN->getAddressingMode());
8159       }
8160 
8161       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8162                             DAG.getConstant(Stride, dl,
8163                                             BasePtr.getValueType()));
8164       Stores[Idx] = Store;
8165     }
8166 
8167     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8168 
8169     if (SN->isIndexed()) {
8170       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
8171       return DAG.getMergeValues(RetOps, dl);
8172     }
8173 
8174     return TF;
8175   }
8176 
8177   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
8178   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
8179 
8180   // The values are now known to be -1 (false) or 1 (true). To convert this
8181   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8182   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8183   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8184 
8185   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8186   // understand how to form the extending load.
8187   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8188 
8189   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8190 
8191   // Now convert to an integer and store.
8192   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8193     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8194     Value);
8195 
8196   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8197   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8198   MachinePointerInfo PtrInfo =
8199       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8200   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8201   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8202 
8203   SDValue Ops[] = {StoreChain,
8204                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8205                    Value, FIdx};
8206   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8207 
8208   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8209     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8210 
8211   // Move data into the byte array.
8212   SDValue Loads[4], LoadChains[4];
8213   for (unsigned i = 0; i < 4; ++i) {
8214     unsigned Offset = 4*i;
8215     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8216     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8217 
8218     Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
8219                            PtrInfo.getWithOffset(Offset));
8220     LoadChains[i] = Loads[i].getValue(1);
8221   }
8222 
8223   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8224 
8225   SDValue Stores[4];
8226   for (unsigned i = 0; i < 4; ++i) {
8227     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8228     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8229 
8230     Stores[i] = DAG.getTruncStore(
8231         StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
8232         MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
8233         SN->getAAInfo());
8234   }
8235 
8236   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8237 
8238   return StoreChain;
8239 }
8240 
8241 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
8242   SDLoc dl(Op);
8243   if (Op.getValueType() == MVT::v4i32) {
8244     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8245 
8246     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
8247     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
8248 
8249     SDValue RHSSwap =   // = vrlw RHS, 16
8250       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
8251 
8252     // Shrinkify inputs to v8i16.
8253     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
8254     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
8255     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
8256 
8257     // Low parts multiplied together, generating 32-bit results (we ignore the
8258     // top parts).
8259     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
8260                                         LHS, RHS, DAG, dl, MVT::v4i32);
8261 
8262     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
8263                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
8264     // Shift the high parts up 16 bits.
8265     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
8266                               Neg16, DAG, dl);
8267     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
8268   } else if (Op.getValueType() == MVT::v8i16) {
8269     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8270 
8271     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
8272 
8273     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
8274                             LHS, RHS, Zero, DAG, dl);
8275   } else if (Op.getValueType() == MVT::v16i8) {
8276     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8277     bool isLittleEndian = Subtarget.isLittleEndian();
8278 
8279     // Multiply the even 8-bit parts, producing 16-bit sums.
8280     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
8281                                            LHS, RHS, DAG, dl, MVT::v8i16);
8282     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
8283 
8284     // Multiply the odd 8-bit parts, producing 16-bit sums.
8285     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
8286                                           LHS, RHS, DAG, dl, MVT::v8i16);
8287     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
8288 
8289     // Merge the results together.  Because vmuleub and vmuloub are
8290     // instructions with a big-endian bias, we must reverse the
8291     // element numbering and reverse the meaning of "odd" and "even"
8292     // when generating little endian code.
8293     int Ops[16];
8294     for (unsigned i = 0; i != 8; ++i) {
8295       if (isLittleEndian) {
8296         Ops[i*2  ] = 2*i;
8297         Ops[i*2+1] = 2*i+16;
8298       } else {
8299         Ops[i*2  ] = 2*i+1;
8300         Ops[i*2+1] = 2*i+1+16;
8301       }
8302     }
8303     if (isLittleEndian)
8304       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
8305     else
8306       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
8307   } else {
8308     llvm_unreachable("Unknown mul to lower!");
8309   }
8310 }
8311 
8312 /// LowerOperation - Provide custom lowering hooks for some operations.
8313 ///
8314 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
8315   switch (Op.getOpcode()) {
8316   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
8317   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
8318   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
8319   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
8320   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
8321   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
8322   case ISD::SETCC:              return LowerSETCC(Op, DAG);
8323   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
8324   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
8325   case ISD::VASTART:
8326     return LowerVASTART(Op, DAG);
8327 
8328   case ISD::VAARG:
8329     return LowerVAARG(Op, DAG);
8330 
8331   case ISD::VACOPY:
8332     return LowerVACOPY(Op, DAG);
8333 
8334   case ISD::STACKRESTORE:
8335     return LowerSTACKRESTORE(Op, DAG);
8336 
8337   case ISD::DYNAMIC_STACKALLOC:
8338     return LowerDYNAMIC_STACKALLOC(Op, DAG);
8339 
8340   case ISD::GET_DYNAMIC_AREA_OFFSET:
8341     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
8342 
8343   case ISD::EH_DWARF_CFA:
8344     return LowerEH_DWARF_CFA(Op, DAG);
8345 
8346   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
8347   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
8348 
8349   case ISD::LOAD:               return LowerLOAD(Op, DAG);
8350   case ISD::STORE:              return LowerSTORE(Op, DAG);
8351   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
8352   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
8353   case ISD::FP_TO_UINT:
8354   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
8355                                                       SDLoc(Op));
8356   case ISD::UINT_TO_FP:
8357   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
8358   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
8359 
8360   // Lower 64-bit shifts.
8361   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
8362   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
8363   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
8364 
8365   // Vector-related lowering.
8366   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
8367   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
8368   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
8369   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
8370   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
8371   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
8372   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
8373   case ISD::MUL:                return LowerMUL(Op, DAG);
8374 
8375   // For counter-based loop handling.
8376   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
8377 
8378   // Frame & Return address.
8379   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
8380   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
8381   }
8382 }
8383 
8384 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
8385                                            SmallVectorImpl<SDValue>&Results,
8386                                            SelectionDAG &DAG) const {
8387   SDLoc dl(N);
8388   switch (N->getOpcode()) {
8389   default:
8390     llvm_unreachable("Do not know how to custom type legalize this operation!");
8391   case ISD::READCYCLECOUNTER: {
8392     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
8393     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
8394 
8395     Results.push_back(RTB);
8396     Results.push_back(RTB.getValue(1));
8397     Results.push_back(RTB.getValue(2));
8398     break;
8399   }
8400   case ISD::INTRINSIC_W_CHAIN: {
8401     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
8402         Intrinsic::ppc_is_decremented_ctr_nonzero)
8403       break;
8404 
8405     assert(N->getValueType(0) == MVT::i1 &&
8406            "Unexpected result type for CTR decrement intrinsic");
8407     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
8408                                  N->getValueType(0));
8409     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
8410     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
8411                                  N->getOperand(1));
8412 
8413     Results.push_back(NewInt);
8414     Results.push_back(NewInt.getValue(1));
8415     break;
8416   }
8417   case ISD::VAARG: {
8418     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
8419       return;
8420 
8421     EVT VT = N->getValueType(0);
8422 
8423     if (VT == MVT::i64) {
8424       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
8425 
8426       Results.push_back(NewNode);
8427       Results.push_back(NewNode.getValue(1));
8428     }
8429     return;
8430   }
8431   case ISD::FP_ROUND_INREG: {
8432     assert(N->getValueType(0) == MVT::ppcf128);
8433     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
8434     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
8435                              MVT::f64, N->getOperand(0),
8436                              DAG.getIntPtrConstant(0, dl));
8437     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
8438                              MVT::f64, N->getOperand(0),
8439                              DAG.getIntPtrConstant(1, dl));
8440 
8441     // Add the two halves of the long double in round-to-zero mode.
8442     SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8443 
8444     // We know the low half is about to be thrown away, so just use something
8445     // convenient.
8446     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
8447                                 FPreg, FPreg));
8448     return;
8449   }
8450   case ISD::FP_TO_SINT:
8451   case ISD::FP_TO_UINT:
8452     // LowerFP_TO_INT() can only handle f32 and f64.
8453     if (N->getOperand(0).getValueType() == MVT::ppcf128)
8454       return;
8455     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
8456     return;
8457   }
8458 }
8459 
8460 //===----------------------------------------------------------------------===//
8461 //  Other Lowering Code
8462 //===----------------------------------------------------------------------===//
8463 
8464 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
8465   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
8466   Function *Func = Intrinsic::getDeclaration(M, Id);
8467   return Builder.CreateCall(Func, {});
8468 }
8469 
8470 // The mappings for emitLeading/TrailingFence is taken from
8471 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
8472 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
8473                                          AtomicOrdering Ord, bool IsStore,
8474                                          bool IsLoad) const {
8475   if (Ord == AtomicOrdering::SequentiallyConsistent)
8476     return callIntrinsic(Builder, Intrinsic::ppc_sync);
8477   if (isReleaseOrStronger(Ord))
8478     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8479   return nullptr;
8480 }
8481 
8482 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
8483                                           AtomicOrdering Ord, bool IsStore,
8484                                           bool IsLoad) const {
8485   if (IsLoad && isAcquireOrStronger(Ord))
8486     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8487   // FIXME: this is too conservative, a dependent branch + isync is enough.
8488   // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
8489   // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
8490   // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
8491   return nullptr;
8492 }
8493 
8494 MachineBasicBlock *
8495 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
8496                                     unsigned AtomicSize,
8497                                     unsigned BinOpcode,
8498                                     unsigned CmpOpcode,
8499                                     unsigned CmpPred) const {
8500   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8501   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8502 
8503   auto LoadMnemonic = PPC::LDARX;
8504   auto StoreMnemonic = PPC::STDCX;
8505   switch (AtomicSize) {
8506   default:
8507     llvm_unreachable("Unexpected size of atomic entity");
8508   case 1:
8509     LoadMnemonic = PPC::LBARX;
8510     StoreMnemonic = PPC::STBCX;
8511     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8512     break;
8513   case 2:
8514     LoadMnemonic = PPC::LHARX;
8515     StoreMnemonic = PPC::STHCX;
8516     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8517     break;
8518   case 4:
8519     LoadMnemonic = PPC::LWARX;
8520     StoreMnemonic = PPC::STWCX;
8521     break;
8522   case 8:
8523     LoadMnemonic = PPC::LDARX;
8524     StoreMnemonic = PPC::STDCX;
8525     break;
8526   }
8527 
8528   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8529   MachineFunction *F = BB->getParent();
8530   MachineFunction::iterator It = ++BB->getIterator();
8531 
8532   unsigned dest = MI.getOperand(0).getReg();
8533   unsigned ptrA = MI.getOperand(1).getReg();
8534   unsigned ptrB = MI.getOperand(2).getReg();
8535   unsigned incr = MI.getOperand(3).getReg();
8536   DebugLoc dl = MI.getDebugLoc();
8537 
8538   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8539   MachineBasicBlock *loop2MBB =
8540     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
8541   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8542   F->insert(It, loopMBB);
8543   if (CmpOpcode)
8544     F->insert(It, loop2MBB);
8545   F->insert(It, exitMBB);
8546   exitMBB->splice(exitMBB->begin(), BB,
8547                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
8548   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8549 
8550   MachineRegisterInfo &RegInfo = F->getRegInfo();
8551   unsigned TmpReg = (!BinOpcode) ? incr :
8552     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
8553                                            : &PPC::GPRCRegClass);
8554 
8555   //  thisMBB:
8556   //   ...
8557   //   fallthrough --> loopMBB
8558   BB->addSuccessor(loopMBB);
8559 
8560   //  loopMBB:
8561   //   l[wd]arx dest, ptr
8562   //   add r0, dest, incr
8563   //   st[wd]cx. r0, ptr
8564   //   bne- loopMBB
8565   //   fallthrough --> exitMBB
8566 
8567   // For max/min...
8568   //  loopMBB:
8569   //   l[wd]arx dest, ptr
8570   //   cmpl?[wd] incr, dest
8571   //   bgt exitMBB
8572   //  loop2MBB:
8573   //   st[wd]cx. dest, ptr
8574   //   bne- loopMBB
8575   //   fallthrough --> exitMBB
8576 
8577   BB = loopMBB;
8578   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
8579     .addReg(ptrA).addReg(ptrB);
8580   if (BinOpcode)
8581     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
8582   if (CmpOpcode) {
8583     // Signed comparisons of byte or halfword values must be sign-extended.
8584     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
8585       unsigned ExtReg =  RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
8586       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
8587               ExtReg).addReg(dest);
8588       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8589         .addReg(incr).addReg(ExtReg);
8590     } else
8591       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8592         .addReg(incr).addReg(dest);
8593 
8594     BuildMI(BB, dl, TII->get(PPC::BCC))
8595       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
8596     BB->addSuccessor(loop2MBB);
8597     BB->addSuccessor(exitMBB);
8598     BB = loop2MBB;
8599   }
8600   BuildMI(BB, dl, TII->get(StoreMnemonic))
8601     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
8602   BuildMI(BB, dl, TII->get(PPC::BCC))
8603     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8604   BB->addSuccessor(loopMBB);
8605   BB->addSuccessor(exitMBB);
8606 
8607   //  exitMBB:
8608   //   ...
8609   BB = exitMBB;
8610   return BB;
8611 }
8612 
8613 MachineBasicBlock *
8614 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
8615                                             MachineBasicBlock *BB,
8616                                             bool is8bit, // operation
8617                                             unsigned BinOpcode,
8618                                             unsigned CmpOpcode,
8619                                             unsigned CmpPred) const {
8620   // If we support part-word atomic mnemonics, just use them
8621   if (Subtarget.hasPartwordAtomics())
8622     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
8623                             CmpOpcode, CmpPred);
8624 
8625   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8626   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8627   // In 64 bit mode we have to use 64 bits for addresses, even though the
8628   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
8629   // registers without caring whether they're 32 or 64, but here we're
8630   // doing actual arithmetic on the addresses.
8631   bool is64bit = Subtarget.isPPC64();
8632   bool isLittleEndian = Subtarget.isLittleEndian();
8633   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
8634 
8635   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8636   MachineFunction *F = BB->getParent();
8637   MachineFunction::iterator It = ++BB->getIterator();
8638 
8639   unsigned dest = MI.getOperand(0).getReg();
8640   unsigned ptrA = MI.getOperand(1).getReg();
8641   unsigned ptrB = MI.getOperand(2).getReg();
8642   unsigned incr = MI.getOperand(3).getReg();
8643   DebugLoc dl = MI.getDebugLoc();
8644 
8645   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8646   MachineBasicBlock *loop2MBB =
8647     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
8648   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8649   F->insert(It, loopMBB);
8650   if (CmpOpcode)
8651     F->insert(It, loop2MBB);
8652   F->insert(It, exitMBB);
8653   exitMBB->splice(exitMBB->begin(), BB,
8654                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
8655   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8656 
8657   MachineRegisterInfo &RegInfo = F->getRegInfo();
8658   const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
8659                                           : &PPC::GPRCRegClass;
8660   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
8661   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
8662   unsigned ShiftReg =
8663     isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
8664   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
8665   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
8666   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
8667   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
8668   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
8669   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
8670   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
8671   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
8672   unsigned Ptr1Reg;
8673   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
8674 
8675   //  thisMBB:
8676   //   ...
8677   //   fallthrough --> loopMBB
8678   BB->addSuccessor(loopMBB);
8679 
8680   // The 4-byte load must be aligned, while a char or short may be
8681   // anywhere in the word.  Hence all this nasty bookkeeping code.
8682   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
8683   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
8684   //   xori shift, shift1, 24 [16]
8685   //   rlwinm ptr, ptr1, 0, 0, 29
8686   //   slw incr2, incr, shift
8687   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
8688   //   slw mask, mask2, shift
8689   //  loopMBB:
8690   //   lwarx tmpDest, ptr
8691   //   add tmp, tmpDest, incr2
8692   //   andc tmp2, tmpDest, mask
8693   //   and tmp3, tmp, mask
8694   //   or tmp4, tmp3, tmp2
8695   //   stwcx. tmp4, ptr
8696   //   bne- loopMBB
8697   //   fallthrough --> exitMBB
8698   //   srw dest, tmpDest, shift
8699   if (ptrA != ZeroReg) {
8700     Ptr1Reg = RegInfo.createVirtualRegister(RC);
8701     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
8702       .addReg(ptrA).addReg(ptrB);
8703   } else {
8704     Ptr1Reg = ptrB;
8705   }
8706   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
8707       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
8708   if (!isLittleEndian)
8709     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
8710         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
8711   if (is64bit)
8712     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
8713       .addReg(Ptr1Reg).addImm(0).addImm(61);
8714   else
8715     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
8716       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
8717   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
8718       .addReg(incr).addReg(ShiftReg);
8719   if (is8bit)
8720     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
8721   else {
8722     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
8723     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
8724   }
8725   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
8726       .addReg(Mask2Reg).addReg(ShiftReg);
8727 
8728   BB = loopMBB;
8729   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
8730     .addReg(ZeroReg).addReg(PtrReg);
8731   if (BinOpcode)
8732     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
8733       .addReg(Incr2Reg).addReg(TmpDestReg);
8734   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
8735     .addReg(TmpDestReg).addReg(MaskReg);
8736   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
8737     .addReg(TmpReg).addReg(MaskReg);
8738   if (CmpOpcode) {
8739     // For unsigned comparisons, we can directly compare the shifted values.
8740     // For signed comparisons we shift and sign extend.
8741     unsigned SReg = RegInfo.createVirtualRegister(RC);
8742     BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
8743       .addReg(TmpDestReg).addReg(MaskReg);
8744     unsigned ValueReg = SReg;
8745     unsigned CmpReg = Incr2Reg;
8746     if (CmpOpcode == PPC::CMPW) {
8747       ValueReg = RegInfo.createVirtualRegister(RC);
8748       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
8749         .addReg(SReg).addReg(ShiftReg);
8750       unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
8751       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
8752         .addReg(ValueReg);
8753       ValueReg = ValueSReg;
8754       CmpReg = incr;
8755     }
8756     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8757       .addReg(CmpReg).addReg(ValueReg);
8758     BuildMI(BB, dl, TII->get(PPC::BCC))
8759       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
8760     BB->addSuccessor(loop2MBB);
8761     BB->addSuccessor(exitMBB);
8762     BB = loop2MBB;
8763   }
8764   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
8765     .addReg(Tmp3Reg).addReg(Tmp2Reg);
8766   BuildMI(BB, dl, TII->get(PPC::STWCX))
8767     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
8768   BuildMI(BB, dl, TII->get(PPC::BCC))
8769     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8770   BB->addSuccessor(loopMBB);
8771   BB->addSuccessor(exitMBB);
8772 
8773   //  exitMBB:
8774   //   ...
8775   BB = exitMBB;
8776   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
8777     .addReg(ShiftReg);
8778   return BB;
8779 }
8780 
8781 llvm::MachineBasicBlock *
8782 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
8783                                     MachineBasicBlock *MBB) const {
8784   DebugLoc DL = MI.getDebugLoc();
8785   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8786 
8787   MachineFunction *MF = MBB->getParent();
8788   MachineRegisterInfo &MRI = MF->getRegInfo();
8789 
8790   const BasicBlock *BB = MBB->getBasicBlock();
8791   MachineFunction::iterator I = ++MBB->getIterator();
8792 
8793   // Memory Reference
8794   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
8795   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
8796 
8797   unsigned DstReg = MI.getOperand(0).getReg();
8798   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
8799   assert(RC->hasType(MVT::i32) && "Invalid destination!");
8800   unsigned mainDstReg = MRI.createVirtualRegister(RC);
8801   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
8802 
8803   MVT PVT = getPointerTy(MF->getDataLayout());
8804   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8805          "Invalid Pointer Size!");
8806   // For v = setjmp(buf), we generate
8807   //
8808   // thisMBB:
8809   //  SjLjSetup mainMBB
8810   //  bl mainMBB
8811   //  v_restore = 1
8812   //  b sinkMBB
8813   //
8814   // mainMBB:
8815   //  buf[LabelOffset] = LR
8816   //  v_main = 0
8817   //
8818   // sinkMBB:
8819   //  v = phi(main, restore)
8820   //
8821 
8822   MachineBasicBlock *thisMBB = MBB;
8823   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
8824   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
8825   MF->insert(I, mainMBB);
8826   MF->insert(I, sinkMBB);
8827 
8828   MachineInstrBuilder MIB;
8829 
8830   // Transfer the remainder of BB and its successor edges to sinkMBB.
8831   sinkMBB->splice(sinkMBB->begin(), MBB,
8832                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
8833   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8834 
8835   // Note that the structure of the jmp_buf used here is not compatible
8836   // with that used by libc, and is not designed to be. Specifically, it
8837   // stores only those 'reserved' registers that LLVM does not otherwise
8838   // understand how to spill. Also, by convention, by the time this
8839   // intrinsic is called, Clang has already stored the frame address in the
8840   // first slot of the buffer and stack address in the third. Following the
8841   // X86 target code, we'll store the jump address in the second slot. We also
8842   // need to save the TOC pointer (R2) to handle jumps between shared
8843   // libraries, and that will be stored in the fourth slot. The thread
8844   // identifier (R13) is not affected.
8845 
8846   // thisMBB:
8847   const int64_t LabelOffset = 1 * PVT.getStoreSize();
8848   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
8849   const int64_t BPOffset    = 4 * PVT.getStoreSize();
8850 
8851   // Prepare IP either in reg.
8852   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
8853   unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
8854   unsigned BufReg = MI.getOperand(1).getReg();
8855 
8856   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
8857     setUsesTOCBasePtr(*MBB->getParent());
8858     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
8859             .addReg(PPC::X2)
8860             .addImm(TOCOffset)
8861             .addReg(BufReg);
8862     MIB.setMemRefs(MMOBegin, MMOEnd);
8863   }
8864 
8865   // Naked functions never have a base pointer, and so we use r1. For all
8866   // other functions, this decision must be delayed until during PEI.
8867   unsigned BaseReg;
8868   if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
8869     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
8870   else
8871     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
8872 
8873   MIB = BuildMI(*thisMBB, MI, DL,
8874                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
8875             .addReg(BaseReg)
8876             .addImm(BPOffset)
8877             .addReg(BufReg);
8878   MIB.setMemRefs(MMOBegin, MMOEnd);
8879 
8880   // Setup
8881   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
8882   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
8883   MIB.addRegMask(TRI->getNoPreservedMask());
8884 
8885   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
8886 
8887   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
8888           .addMBB(mainMBB);
8889   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
8890 
8891   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
8892   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
8893 
8894   // mainMBB:
8895   //  mainDstReg = 0
8896   MIB =
8897       BuildMI(mainMBB, DL,
8898               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
8899 
8900   // Store IP
8901   if (Subtarget.isPPC64()) {
8902     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
8903             .addReg(LabelReg)
8904             .addImm(LabelOffset)
8905             .addReg(BufReg);
8906   } else {
8907     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
8908             .addReg(LabelReg)
8909             .addImm(LabelOffset)
8910             .addReg(BufReg);
8911   }
8912 
8913   MIB.setMemRefs(MMOBegin, MMOEnd);
8914 
8915   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
8916   mainMBB->addSuccessor(sinkMBB);
8917 
8918   // sinkMBB:
8919   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
8920           TII->get(PPC::PHI), DstReg)
8921     .addReg(mainDstReg).addMBB(mainMBB)
8922     .addReg(restoreDstReg).addMBB(thisMBB);
8923 
8924   MI.eraseFromParent();
8925   return sinkMBB;
8926 }
8927 
8928 MachineBasicBlock *
8929 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
8930                                      MachineBasicBlock *MBB) const {
8931   DebugLoc DL = MI.getDebugLoc();
8932   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8933 
8934   MachineFunction *MF = MBB->getParent();
8935   MachineRegisterInfo &MRI = MF->getRegInfo();
8936 
8937   // Memory Reference
8938   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
8939   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
8940 
8941   MVT PVT = getPointerTy(MF->getDataLayout());
8942   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8943          "Invalid Pointer Size!");
8944 
8945   const TargetRegisterClass *RC =
8946     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
8947   unsigned Tmp = MRI.createVirtualRegister(RC);
8948   // Since FP is only updated here but NOT referenced, it's treated as GPR.
8949   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
8950   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
8951   unsigned BP =
8952       (PVT == MVT::i64)
8953           ? PPC::X30
8954           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
8955                                                               : PPC::R30);
8956 
8957   MachineInstrBuilder MIB;
8958 
8959   const int64_t LabelOffset = 1 * PVT.getStoreSize();
8960   const int64_t SPOffset    = 2 * PVT.getStoreSize();
8961   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
8962   const int64_t BPOffset    = 4 * PVT.getStoreSize();
8963 
8964   unsigned BufReg = MI.getOperand(0).getReg();
8965 
8966   // Reload FP (the jumped-to function may not have had a
8967   // frame pointer, and if so, then its r31 will be restored
8968   // as necessary).
8969   if (PVT == MVT::i64) {
8970     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
8971             .addImm(0)
8972             .addReg(BufReg);
8973   } else {
8974     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
8975             .addImm(0)
8976             .addReg(BufReg);
8977   }
8978   MIB.setMemRefs(MMOBegin, MMOEnd);
8979 
8980   // Reload IP
8981   if (PVT == MVT::i64) {
8982     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
8983             .addImm(LabelOffset)
8984             .addReg(BufReg);
8985   } else {
8986     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
8987             .addImm(LabelOffset)
8988             .addReg(BufReg);
8989   }
8990   MIB.setMemRefs(MMOBegin, MMOEnd);
8991 
8992   // Reload SP
8993   if (PVT == MVT::i64) {
8994     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
8995             .addImm(SPOffset)
8996             .addReg(BufReg);
8997   } else {
8998     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
8999             .addImm(SPOffset)
9000             .addReg(BufReg);
9001   }
9002   MIB.setMemRefs(MMOBegin, MMOEnd);
9003 
9004   // Reload BP
9005   if (PVT == MVT::i64) {
9006     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
9007             .addImm(BPOffset)
9008             .addReg(BufReg);
9009   } else {
9010     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
9011             .addImm(BPOffset)
9012             .addReg(BufReg);
9013   }
9014   MIB.setMemRefs(MMOBegin, MMOEnd);
9015 
9016   // Reload TOC
9017   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
9018     setUsesTOCBasePtr(*MBB->getParent());
9019     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
9020             .addImm(TOCOffset)
9021             .addReg(BufReg);
9022 
9023     MIB.setMemRefs(MMOBegin, MMOEnd);
9024   }
9025 
9026   // Jump
9027   BuildMI(*MBB, MI, DL,
9028           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
9029   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
9030 
9031   MI.eraseFromParent();
9032   return MBB;
9033 }
9034 
9035 MachineBasicBlock *
9036 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
9037                                                MachineBasicBlock *BB) const {
9038   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
9039       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9040     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
9041         MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9042       // Call lowering should have added an r2 operand to indicate a dependence
9043       // on the TOC base pointer value. It can't however, because there is no
9044       // way to mark the dependence as implicit there, and so the stackmap code
9045       // will confuse it with a regular operand. Instead, add the dependence
9046       // here.
9047       setUsesTOCBasePtr(*BB->getParent());
9048       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
9049     }
9050 
9051     return emitPatchPoint(MI, BB);
9052   }
9053 
9054   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
9055       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
9056     return emitEHSjLjSetJmp(MI, BB);
9057   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
9058              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
9059     return emitEHSjLjLongJmp(MI, BB);
9060   }
9061 
9062   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9063 
9064   // To "insert" these instructions we actually have to insert their
9065   // control-flow patterns.
9066   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9067   MachineFunction::iterator It = ++BB->getIterator();
9068 
9069   MachineFunction *F = BB->getParent();
9070 
9071   if (Subtarget.hasISEL() &&
9072       (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9073        MI.getOpcode() == PPC::SELECT_CC_I8 ||
9074        MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
9075     SmallVector<MachineOperand, 2> Cond;
9076     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9077         MI.getOpcode() == PPC::SELECT_CC_I8)
9078       Cond.push_back(MI.getOperand(4));
9079     else
9080       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
9081     Cond.push_back(MI.getOperand(1));
9082 
9083     DebugLoc dl = MI.getDebugLoc();
9084     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
9085                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
9086   } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9087              MI.getOpcode() == PPC::SELECT_CC_I8 ||
9088              MI.getOpcode() == PPC::SELECT_CC_F4 ||
9089              MI.getOpcode() == PPC::SELECT_CC_F8 ||
9090              MI.getOpcode() == PPC::SELECT_CC_QFRC ||
9091              MI.getOpcode() == PPC::SELECT_CC_QSRC ||
9092              MI.getOpcode() == PPC::SELECT_CC_QBRC ||
9093              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
9094              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
9095              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
9096              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
9097              MI.getOpcode() == PPC::SELECT_I4 ||
9098              MI.getOpcode() == PPC::SELECT_I8 ||
9099              MI.getOpcode() == PPC::SELECT_F4 ||
9100              MI.getOpcode() == PPC::SELECT_F8 ||
9101              MI.getOpcode() == PPC::SELECT_QFRC ||
9102              MI.getOpcode() == PPC::SELECT_QSRC ||
9103              MI.getOpcode() == PPC::SELECT_QBRC ||
9104              MI.getOpcode() == PPC::SELECT_VRRC ||
9105              MI.getOpcode() == PPC::SELECT_VSFRC ||
9106              MI.getOpcode() == PPC::SELECT_VSSRC ||
9107              MI.getOpcode() == PPC::SELECT_VSRC) {
9108     // The incoming instruction knows the destination vreg to set, the
9109     // condition code register to branch on, the true/false values to
9110     // select between, and a branch opcode to use.
9111 
9112     //  thisMBB:
9113     //  ...
9114     //   TrueVal = ...
9115     //   cmpTY ccX, r1, r2
9116     //   bCC copy1MBB
9117     //   fallthrough --> copy0MBB
9118     MachineBasicBlock *thisMBB = BB;
9119     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
9120     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9121     DebugLoc dl = MI.getDebugLoc();
9122     F->insert(It, copy0MBB);
9123     F->insert(It, sinkMBB);
9124 
9125     // Transfer the remainder of BB and its successor edges to sinkMBB.
9126     sinkMBB->splice(sinkMBB->begin(), BB,
9127                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9128     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9129 
9130     // Next, add the true and fallthrough blocks as its successors.
9131     BB->addSuccessor(copy0MBB);
9132     BB->addSuccessor(sinkMBB);
9133 
9134     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
9135         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
9136         MI.getOpcode() == PPC::SELECT_QFRC ||
9137         MI.getOpcode() == PPC::SELECT_QSRC ||
9138         MI.getOpcode() == PPC::SELECT_QBRC ||
9139         MI.getOpcode() == PPC::SELECT_VRRC ||
9140         MI.getOpcode() == PPC::SELECT_VSFRC ||
9141         MI.getOpcode() == PPC::SELECT_VSSRC ||
9142         MI.getOpcode() == PPC::SELECT_VSRC) {
9143       BuildMI(BB, dl, TII->get(PPC::BC))
9144           .addReg(MI.getOperand(1).getReg())
9145           .addMBB(sinkMBB);
9146     } else {
9147       unsigned SelectPred = MI.getOperand(4).getImm();
9148       BuildMI(BB, dl, TII->get(PPC::BCC))
9149           .addImm(SelectPred)
9150           .addReg(MI.getOperand(1).getReg())
9151           .addMBB(sinkMBB);
9152     }
9153 
9154     //  copy0MBB:
9155     //   %FalseValue = ...
9156     //   # fallthrough to sinkMBB
9157     BB = copy0MBB;
9158 
9159     // Update machine-CFG edges
9160     BB->addSuccessor(sinkMBB);
9161 
9162     //  sinkMBB:
9163     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9164     //  ...
9165     BB = sinkMBB;
9166     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
9167         .addReg(MI.getOperand(3).getReg())
9168         .addMBB(copy0MBB)
9169         .addReg(MI.getOperand(2).getReg())
9170         .addMBB(thisMBB);
9171   } else if (MI.getOpcode() == PPC::ReadTB) {
9172     // To read the 64-bit time-base register on a 32-bit target, we read the
9173     // two halves. Should the counter have wrapped while it was being read, we
9174     // need to try again.
9175     // ...
9176     // readLoop:
9177     // mfspr Rx,TBU # load from TBU
9178     // mfspr Ry,TB  # load from TB
9179     // mfspr Rz,TBU # load from TBU
9180     // cmpw crX,Rx,Rz # check if 'old'='new'
9181     // bne readLoop   # branch if they're not equal
9182     // ...
9183 
9184     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
9185     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9186     DebugLoc dl = MI.getDebugLoc();
9187     F->insert(It, readMBB);
9188     F->insert(It, sinkMBB);
9189 
9190     // Transfer the remainder of BB and its successor edges to sinkMBB.
9191     sinkMBB->splice(sinkMBB->begin(), BB,
9192                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9193     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9194 
9195     BB->addSuccessor(readMBB);
9196     BB = readMBB;
9197 
9198     MachineRegisterInfo &RegInfo = F->getRegInfo();
9199     unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9200     unsigned LoReg = MI.getOperand(0).getReg();
9201     unsigned HiReg = MI.getOperand(1).getReg();
9202 
9203     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
9204     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
9205     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
9206 
9207     unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9208 
9209     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
9210       .addReg(HiReg).addReg(ReadAgainReg);
9211     BuildMI(BB, dl, TII->get(PPC::BCC))
9212       .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
9213 
9214     BB->addSuccessor(readMBB);
9215     BB->addSuccessor(sinkMBB);
9216   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
9217     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
9218   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
9219     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
9220   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
9221     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
9222   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
9223     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
9224 
9225   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
9226     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
9227   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
9228     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
9229   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
9230     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
9231   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
9232     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
9233 
9234   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
9235     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
9236   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
9237     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
9238   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
9239     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
9240   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
9241     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
9242 
9243   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
9244     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
9245   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
9246     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
9247   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
9248     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
9249   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
9250     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
9251 
9252   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
9253     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
9254   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
9255     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
9256   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
9257     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
9258   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
9259     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
9260 
9261   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
9262     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
9263   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
9264     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
9265   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
9266     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
9267   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
9268     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
9269 
9270   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
9271     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
9272   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
9273     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
9274   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
9275     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
9276   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
9277     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
9278 
9279   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
9280     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
9281   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
9282     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
9283   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
9284     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
9285   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
9286     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
9287 
9288   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
9289     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
9290   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
9291     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
9292   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
9293     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
9294   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
9295     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
9296 
9297   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
9298     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
9299   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
9300     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
9301   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
9302     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
9303   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
9304     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
9305 
9306   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
9307     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
9308   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
9309     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
9310   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
9311     BB = EmitAtomicBinary(MI, BB, 4, 0);
9312   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
9313     BB = EmitAtomicBinary(MI, BB, 8, 0);
9314 
9315   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
9316            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
9317            (Subtarget.hasPartwordAtomics() &&
9318             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
9319            (Subtarget.hasPartwordAtomics() &&
9320             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
9321     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
9322 
9323     auto LoadMnemonic = PPC::LDARX;
9324     auto StoreMnemonic = PPC::STDCX;
9325     switch (MI.getOpcode()) {
9326     default:
9327       llvm_unreachable("Compare and swap of unknown size");
9328     case PPC::ATOMIC_CMP_SWAP_I8:
9329       LoadMnemonic = PPC::LBARX;
9330       StoreMnemonic = PPC::STBCX;
9331       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9332       break;
9333     case PPC::ATOMIC_CMP_SWAP_I16:
9334       LoadMnemonic = PPC::LHARX;
9335       StoreMnemonic = PPC::STHCX;
9336       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9337       break;
9338     case PPC::ATOMIC_CMP_SWAP_I32:
9339       LoadMnemonic = PPC::LWARX;
9340       StoreMnemonic = PPC::STWCX;
9341       break;
9342     case PPC::ATOMIC_CMP_SWAP_I64:
9343       LoadMnemonic = PPC::LDARX;
9344       StoreMnemonic = PPC::STDCX;
9345       break;
9346     }
9347     unsigned dest = MI.getOperand(0).getReg();
9348     unsigned ptrA = MI.getOperand(1).getReg();
9349     unsigned ptrB = MI.getOperand(2).getReg();
9350     unsigned oldval = MI.getOperand(3).getReg();
9351     unsigned newval = MI.getOperand(4).getReg();
9352     DebugLoc dl = MI.getDebugLoc();
9353 
9354     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9355     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9356     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9357     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9358     F->insert(It, loop1MBB);
9359     F->insert(It, loop2MBB);
9360     F->insert(It, midMBB);
9361     F->insert(It, exitMBB);
9362     exitMBB->splice(exitMBB->begin(), BB,
9363                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9364     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9365 
9366     //  thisMBB:
9367     //   ...
9368     //   fallthrough --> loopMBB
9369     BB->addSuccessor(loop1MBB);
9370 
9371     // loop1MBB:
9372     //   l[bhwd]arx dest, ptr
9373     //   cmp[wd] dest, oldval
9374     //   bne- midMBB
9375     // loop2MBB:
9376     //   st[bhwd]cx. newval, ptr
9377     //   bne- loopMBB
9378     //   b exitBB
9379     // midMBB:
9380     //   st[bhwd]cx. dest, ptr
9381     // exitBB:
9382     BB = loop1MBB;
9383     BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9384       .addReg(ptrA).addReg(ptrB);
9385     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
9386       .addReg(oldval).addReg(dest);
9387     BuildMI(BB, dl, TII->get(PPC::BCC))
9388       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9389     BB->addSuccessor(loop2MBB);
9390     BB->addSuccessor(midMBB);
9391 
9392     BB = loop2MBB;
9393     BuildMI(BB, dl, TII->get(StoreMnemonic))
9394       .addReg(newval).addReg(ptrA).addReg(ptrB);
9395     BuildMI(BB, dl, TII->get(PPC::BCC))
9396       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9397     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9398     BB->addSuccessor(loop1MBB);
9399     BB->addSuccessor(exitMBB);
9400 
9401     BB = midMBB;
9402     BuildMI(BB, dl, TII->get(StoreMnemonic))
9403       .addReg(dest).addReg(ptrA).addReg(ptrB);
9404     BB->addSuccessor(exitMBB);
9405 
9406     //  exitMBB:
9407     //   ...
9408     BB = exitMBB;
9409   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
9410              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
9411     // We must use 64-bit registers for addresses when targeting 64-bit,
9412     // since we're actually doing arithmetic on them.  Other registers
9413     // can be 32-bit.
9414     bool is64bit = Subtarget.isPPC64();
9415     bool isLittleEndian = Subtarget.isLittleEndian();
9416     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
9417 
9418     unsigned dest = MI.getOperand(0).getReg();
9419     unsigned ptrA = MI.getOperand(1).getReg();
9420     unsigned ptrB = MI.getOperand(2).getReg();
9421     unsigned oldval = MI.getOperand(3).getReg();
9422     unsigned newval = MI.getOperand(4).getReg();
9423     DebugLoc dl = MI.getDebugLoc();
9424 
9425     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9426     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9427     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9428     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9429     F->insert(It, loop1MBB);
9430     F->insert(It, loop2MBB);
9431     F->insert(It, midMBB);
9432     F->insert(It, exitMBB);
9433     exitMBB->splice(exitMBB->begin(), BB,
9434                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9435     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9436 
9437     MachineRegisterInfo &RegInfo = F->getRegInfo();
9438     const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
9439                                             : &PPC::GPRCRegClass;
9440     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
9441     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
9442     unsigned ShiftReg =
9443       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
9444     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
9445     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
9446     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
9447     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
9448     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
9449     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
9450     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
9451     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
9452     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
9453     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
9454     unsigned Ptr1Reg;
9455     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
9456     unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
9457     //  thisMBB:
9458     //   ...
9459     //   fallthrough --> loopMBB
9460     BB->addSuccessor(loop1MBB);
9461 
9462     // The 4-byte load must be aligned, while a char or short may be
9463     // anywhere in the word.  Hence all this nasty bookkeeping code.
9464     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
9465     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
9466     //   xori shift, shift1, 24 [16]
9467     //   rlwinm ptr, ptr1, 0, 0, 29
9468     //   slw newval2, newval, shift
9469     //   slw oldval2, oldval,shift
9470     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
9471     //   slw mask, mask2, shift
9472     //   and newval3, newval2, mask
9473     //   and oldval3, oldval2, mask
9474     // loop1MBB:
9475     //   lwarx tmpDest, ptr
9476     //   and tmp, tmpDest, mask
9477     //   cmpw tmp, oldval3
9478     //   bne- midMBB
9479     // loop2MBB:
9480     //   andc tmp2, tmpDest, mask
9481     //   or tmp4, tmp2, newval3
9482     //   stwcx. tmp4, ptr
9483     //   bne- loop1MBB
9484     //   b exitBB
9485     // midMBB:
9486     //   stwcx. tmpDest, ptr
9487     // exitBB:
9488     //   srw dest, tmpDest, shift
9489     if (ptrA != ZeroReg) {
9490       Ptr1Reg = RegInfo.createVirtualRegister(RC);
9491       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
9492         .addReg(ptrA).addReg(ptrB);
9493     } else {
9494       Ptr1Reg = ptrB;
9495     }
9496     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
9497         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
9498     if (!isLittleEndian)
9499       BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
9500           .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
9501     if (is64bit)
9502       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
9503         .addReg(Ptr1Reg).addImm(0).addImm(61);
9504     else
9505       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
9506         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
9507     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
9508         .addReg(newval).addReg(ShiftReg);
9509     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
9510         .addReg(oldval).addReg(ShiftReg);
9511     if (is8bit)
9512       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
9513     else {
9514       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
9515       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
9516         .addReg(Mask3Reg).addImm(65535);
9517     }
9518     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
9519         .addReg(Mask2Reg).addReg(ShiftReg);
9520     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
9521         .addReg(NewVal2Reg).addReg(MaskReg);
9522     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
9523         .addReg(OldVal2Reg).addReg(MaskReg);
9524 
9525     BB = loop1MBB;
9526     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
9527         .addReg(ZeroReg).addReg(PtrReg);
9528     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
9529         .addReg(TmpDestReg).addReg(MaskReg);
9530     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
9531         .addReg(TmpReg).addReg(OldVal3Reg);
9532     BuildMI(BB, dl, TII->get(PPC::BCC))
9533         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9534     BB->addSuccessor(loop2MBB);
9535     BB->addSuccessor(midMBB);
9536 
9537     BB = loop2MBB;
9538     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
9539         .addReg(TmpDestReg).addReg(MaskReg);
9540     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
9541         .addReg(Tmp2Reg).addReg(NewVal3Reg);
9542     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
9543         .addReg(ZeroReg).addReg(PtrReg);
9544     BuildMI(BB, dl, TII->get(PPC::BCC))
9545       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9546     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9547     BB->addSuccessor(loop1MBB);
9548     BB->addSuccessor(exitMBB);
9549 
9550     BB = midMBB;
9551     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
9552       .addReg(ZeroReg).addReg(PtrReg);
9553     BB->addSuccessor(exitMBB);
9554 
9555     //  exitMBB:
9556     //   ...
9557     BB = exitMBB;
9558     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
9559       .addReg(ShiftReg);
9560   } else if (MI.getOpcode() == PPC::FADDrtz) {
9561     // This pseudo performs an FADD with rounding mode temporarily forced
9562     // to round-to-zero.  We emit this via custom inserter since the FPSCR
9563     // is not modeled at the SelectionDAG level.
9564     unsigned Dest = MI.getOperand(0).getReg();
9565     unsigned Src1 = MI.getOperand(1).getReg();
9566     unsigned Src2 = MI.getOperand(2).getReg();
9567     DebugLoc dl = MI.getDebugLoc();
9568 
9569     MachineRegisterInfo &RegInfo = F->getRegInfo();
9570     unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
9571 
9572     // Save FPSCR value.
9573     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
9574 
9575     // Set rounding mode to round-to-zero.
9576     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
9577     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
9578 
9579     // Perform addition.
9580     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
9581 
9582     // Restore FPSCR value.
9583     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
9584   } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9585              MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
9586              MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9587              MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
9588     unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9589                        MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
9590                           ? PPC::ANDIo8
9591                           : PPC::ANDIo;
9592     bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9593                  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
9594 
9595     MachineRegisterInfo &RegInfo = F->getRegInfo();
9596     unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
9597                                                   &PPC::GPRCRegClass :
9598                                                   &PPC::G8RCRegClass);
9599 
9600     DebugLoc dl = MI.getDebugLoc();
9601     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
9602         .addReg(MI.getOperand(1).getReg())
9603         .addImm(1);
9604     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
9605             MI.getOperand(0).getReg())
9606         .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
9607   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
9608     DebugLoc Dl = MI.getDebugLoc();
9609     MachineRegisterInfo &RegInfo = F->getRegInfo();
9610     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9611     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
9612     return BB;
9613   } else {
9614     llvm_unreachable("Unexpected instr type to insert");
9615   }
9616 
9617   MI.eraseFromParent(); // The pseudo instruction is gone now.
9618   return BB;
9619 }
9620 
9621 //===----------------------------------------------------------------------===//
9622 // Target Optimization Hooks
9623 //===----------------------------------------------------------------------===//
9624 
9625 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
9626   // For the estimates, convergence is quadratic, so we essentially double the
9627   // number of digits correct after every iteration. For both FRE and FRSQRTE,
9628   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
9629   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
9630   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
9631   if (VT.getScalarType() == MVT::f64)
9632     RefinementSteps++;
9633   return RefinementSteps;
9634 }
9635 
9636 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG,
9637                                             int Enabled, int &RefinementSteps,
9638                                             bool &UseOneConstNR) const {
9639   EVT VT = Operand.getValueType();
9640   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
9641       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
9642       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9643       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9644       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9645       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9646     if (RefinementSteps == ReciprocalEstimate::Unspecified)
9647       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9648 
9649     UseOneConstNR = true;
9650     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
9651   }
9652   return SDValue();
9653 }
9654 
9655 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
9656                                             int Enabled,
9657                                             int &RefinementSteps) const {
9658   EVT VT = Operand.getValueType();
9659   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
9660       (VT == MVT::f64 && Subtarget.hasFRE()) ||
9661       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9662       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9663       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9664       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9665     if (RefinementSteps == ReciprocalEstimate::Unspecified)
9666       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9667     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
9668   }
9669   return SDValue();
9670 }
9671 
9672 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
9673   // Note: This functionality is used only when unsafe-fp-math is enabled, and
9674   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
9675   // enabled for division), this functionality is redundant with the default
9676   // combiner logic (once the division -> reciprocal/multiply transformation
9677   // has taken place). As a result, this matters more for older cores than for
9678   // newer ones.
9679 
9680   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9681   // reciprocal if there are two or more FDIVs (for embedded cores with only
9682   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
9683   switch (Subtarget.getDarwinDirective()) {
9684   default:
9685     return 3;
9686   case PPC::DIR_440:
9687   case PPC::DIR_A2:
9688   case PPC::DIR_E500mc:
9689   case PPC::DIR_E5500:
9690     return 2;
9691   }
9692 }
9693 
9694 // isConsecutiveLSLoc needs to work even if all adds have not yet been
9695 // collapsed, and so we need to look through chains of them.
9696 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
9697                                      int64_t& Offset, SelectionDAG &DAG) {
9698   if (DAG.isBaseWithConstantOffset(Loc)) {
9699     Base = Loc.getOperand(0);
9700     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
9701 
9702     // The base might itself be a base plus an offset, and if so, accumulate
9703     // that as well.
9704     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
9705   }
9706 }
9707 
9708 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
9709                             unsigned Bytes, int Dist,
9710                             SelectionDAG &DAG) {
9711   if (VT.getSizeInBits() / 8 != Bytes)
9712     return false;
9713 
9714   SDValue BaseLoc = Base->getBasePtr();
9715   if (Loc.getOpcode() == ISD::FrameIndex) {
9716     if (BaseLoc.getOpcode() != ISD::FrameIndex)
9717       return false;
9718     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9719     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
9720     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
9721     int FS  = MFI.getObjectSize(FI);
9722     int BFS = MFI.getObjectSize(BFI);
9723     if (FS != BFS || FS != (int)Bytes) return false;
9724     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
9725   }
9726 
9727   SDValue Base1 = Loc, Base2 = BaseLoc;
9728   int64_t Offset1 = 0, Offset2 = 0;
9729   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
9730   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
9731   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
9732     return true;
9733 
9734   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9735   const GlobalValue *GV1 = nullptr;
9736   const GlobalValue *GV2 = nullptr;
9737   Offset1 = 0;
9738   Offset2 = 0;
9739   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
9740   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
9741   if (isGA1 && isGA2 && GV1 == GV2)
9742     return Offset1 == (Offset2 + Dist*Bytes);
9743   return false;
9744 }
9745 
9746 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
9747 // not enforce equality of the chain operands.
9748 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
9749                             unsigned Bytes, int Dist,
9750                             SelectionDAG &DAG) {
9751   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
9752     EVT VT = LS->getMemoryVT();
9753     SDValue Loc = LS->getBasePtr();
9754     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
9755   }
9756 
9757   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
9758     EVT VT;
9759     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9760     default: return false;
9761     case Intrinsic::ppc_qpx_qvlfd:
9762     case Intrinsic::ppc_qpx_qvlfda:
9763       VT = MVT::v4f64;
9764       break;
9765     case Intrinsic::ppc_qpx_qvlfs:
9766     case Intrinsic::ppc_qpx_qvlfsa:
9767       VT = MVT::v4f32;
9768       break;
9769     case Intrinsic::ppc_qpx_qvlfcd:
9770     case Intrinsic::ppc_qpx_qvlfcda:
9771       VT = MVT::v2f64;
9772       break;
9773     case Intrinsic::ppc_qpx_qvlfcs:
9774     case Intrinsic::ppc_qpx_qvlfcsa:
9775       VT = MVT::v2f32;
9776       break;
9777     case Intrinsic::ppc_qpx_qvlfiwa:
9778     case Intrinsic::ppc_qpx_qvlfiwz:
9779     case Intrinsic::ppc_altivec_lvx:
9780     case Intrinsic::ppc_altivec_lvxl:
9781     case Intrinsic::ppc_vsx_lxvw4x:
9782       VT = MVT::v4i32;
9783       break;
9784     case Intrinsic::ppc_vsx_lxvd2x:
9785       VT = MVT::v2f64;
9786       break;
9787     case Intrinsic::ppc_altivec_lvebx:
9788       VT = MVT::i8;
9789       break;
9790     case Intrinsic::ppc_altivec_lvehx:
9791       VT = MVT::i16;
9792       break;
9793     case Intrinsic::ppc_altivec_lvewx:
9794       VT = MVT::i32;
9795       break;
9796     }
9797 
9798     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
9799   }
9800 
9801   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
9802     EVT VT;
9803     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9804     default: return false;
9805     case Intrinsic::ppc_qpx_qvstfd:
9806     case Intrinsic::ppc_qpx_qvstfda:
9807       VT = MVT::v4f64;
9808       break;
9809     case Intrinsic::ppc_qpx_qvstfs:
9810     case Intrinsic::ppc_qpx_qvstfsa:
9811       VT = MVT::v4f32;
9812       break;
9813     case Intrinsic::ppc_qpx_qvstfcd:
9814     case Intrinsic::ppc_qpx_qvstfcda:
9815       VT = MVT::v2f64;
9816       break;
9817     case Intrinsic::ppc_qpx_qvstfcs:
9818     case Intrinsic::ppc_qpx_qvstfcsa:
9819       VT = MVT::v2f32;
9820       break;
9821     case Intrinsic::ppc_qpx_qvstfiw:
9822     case Intrinsic::ppc_qpx_qvstfiwa:
9823     case Intrinsic::ppc_altivec_stvx:
9824     case Intrinsic::ppc_altivec_stvxl:
9825     case Intrinsic::ppc_vsx_stxvw4x:
9826       VT = MVT::v4i32;
9827       break;
9828     case Intrinsic::ppc_vsx_stxvd2x:
9829       VT = MVT::v2f64;
9830       break;
9831     case Intrinsic::ppc_altivec_stvebx:
9832       VT = MVT::i8;
9833       break;
9834     case Intrinsic::ppc_altivec_stvehx:
9835       VT = MVT::i16;
9836       break;
9837     case Intrinsic::ppc_altivec_stvewx:
9838       VT = MVT::i32;
9839       break;
9840     }
9841 
9842     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
9843   }
9844 
9845   return false;
9846 }
9847 
9848 // Return true is there is a nearyby consecutive load to the one provided
9849 // (regardless of alignment). We search up and down the chain, looking though
9850 // token factors and other loads (but nothing else). As a result, a true result
9851 // indicates that it is safe to create a new consecutive load adjacent to the
9852 // load provided.
9853 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
9854   SDValue Chain = LD->getChain();
9855   EVT VT = LD->getMemoryVT();
9856 
9857   SmallSet<SDNode *, 16> LoadRoots;
9858   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
9859   SmallSet<SDNode *, 16> Visited;
9860 
9861   // First, search up the chain, branching to follow all token-factor operands.
9862   // If we find a consecutive load, then we're done, otherwise, record all
9863   // nodes just above the top-level loads and token factors.
9864   while (!Queue.empty()) {
9865     SDNode *ChainNext = Queue.pop_back_val();
9866     if (!Visited.insert(ChainNext).second)
9867       continue;
9868 
9869     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
9870       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9871         return true;
9872 
9873       if (!Visited.count(ChainLD->getChain().getNode()))
9874         Queue.push_back(ChainLD->getChain().getNode());
9875     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
9876       for (const SDUse &O : ChainNext->ops())
9877         if (!Visited.count(O.getNode()))
9878           Queue.push_back(O.getNode());
9879     } else
9880       LoadRoots.insert(ChainNext);
9881   }
9882 
9883   // Second, search down the chain, starting from the top-level nodes recorded
9884   // in the first phase. These top-level nodes are the nodes just above all
9885   // loads and token factors. Starting with their uses, recursively look though
9886   // all loads (just the chain uses) and token factors to find a consecutive
9887   // load.
9888   Visited.clear();
9889   Queue.clear();
9890 
9891   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
9892        IE = LoadRoots.end(); I != IE; ++I) {
9893     Queue.push_back(*I);
9894 
9895     while (!Queue.empty()) {
9896       SDNode *LoadRoot = Queue.pop_back_val();
9897       if (!Visited.insert(LoadRoot).second)
9898         continue;
9899 
9900       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
9901         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9902           return true;
9903 
9904       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
9905            UE = LoadRoot->use_end(); UI != UE; ++UI)
9906         if (((isa<MemSDNode>(*UI) &&
9907             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
9908             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
9909           Queue.push_back(*UI);
9910     }
9911   }
9912 
9913   return false;
9914 }
9915 
9916 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
9917                                                   DAGCombinerInfo &DCI) const {
9918   SelectionDAG &DAG = DCI.DAG;
9919   SDLoc dl(N);
9920 
9921   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
9922   // If we're tracking CR bits, we need to be careful that we don't have:
9923   //   trunc(binary-ops(zext(x), zext(y)))
9924   // or
9925   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
9926   // such that we're unnecessarily moving things into GPRs when it would be
9927   // better to keep them in CR bits.
9928 
9929   // Note that trunc here can be an actual i1 trunc, or can be the effective
9930   // truncation that comes from a setcc or select_cc.
9931   if (N->getOpcode() == ISD::TRUNCATE &&
9932       N->getValueType(0) != MVT::i1)
9933     return SDValue();
9934 
9935   if (N->getOperand(0).getValueType() != MVT::i32 &&
9936       N->getOperand(0).getValueType() != MVT::i64)
9937     return SDValue();
9938 
9939   if (N->getOpcode() == ISD::SETCC ||
9940       N->getOpcode() == ISD::SELECT_CC) {
9941     // If we're looking at a comparison, then we need to make sure that the
9942     // high bits (all except for the first) don't matter the result.
9943     ISD::CondCode CC =
9944       cast<CondCodeSDNode>(N->getOperand(
9945         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
9946     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
9947 
9948     if (ISD::isSignedIntSetCC(CC)) {
9949       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
9950           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
9951         return SDValue();
9952     } else if (ISD::isUnsignedIntSetCC(CC)) {
9953       if (!DAG.MaskedValueIsZero(N->getOperand(0),
9954                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
9955           !DAG.MaskedValueIsZero(N->getOperand(1),
9956                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
9957         return SDValue();
9958     } else {
9959       // This is neither a signed nor an unsigned comparison, just make sure
9960       // that the high bits are equal.
9961       APInt Op1Zero, Op1One;
9962       APInt Op2Zero, Op2One;
9963       DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
9964       DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
9965 
9966       // We don't really care about what is known about the first bit (if
9967       // anything), so clear it in all masks prior to comparing them.
9968       Op1Zero.clearBit(0); Op1One.clearBit(0);
9969       Op2Zero.clearBit(0); Op2One.clearBit(0);
9970 
9971       if (Op1Zero != Op2Zero || Op1One != Op2One)
9972         return SDValue();
9973     }
9974   }
9975 
9976   // We now know that the higher-order bits are irrelevant, we just need to
9977   // make sure that all of the intermediate operations are bit operations, and
9978   // all inputs are extensions.
9979   if (N->getOperand(0).getOpcode() != ISD::AND &&
9980       N->getOperand(0).getOpcode() != ISD::OR  &&
9981       N->getOperand(0).getOpcode() != ISD::XOR &&
9982       N->getOperand(0).getOpcode() != ISD::SELECT &&
9983       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
9984       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
9985       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
9986       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
9987       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
9988     return SDValue();
9989 
9990   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
9991       N->getOperand(1).getOpcode() != ISD::AND &&
9992       N->getOperand(1).getOpcode() != ISD::OR  &&
9993       N->getOperand(1).getOpcode() != ISD::XOR &&
9994       N->getOperand(1).getOpcode() != ISD::SELECT &&
9995       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
9996       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
9997       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
9998       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
9999       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
10000     return SDValue();
10001 
10002   SmallVector<SDValue, 4> Inputs;
10003   SmallVector<SDValue, 8> BinOps, PromOps;
10004   SmallPtrSet<SDNode *, 16> Visited;
10005 
10006   for (unsigned i = 0; i < 2; ++i) {
10007     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10008           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10009           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10010           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10011         isa<ConstantSDNode>(N->getOperand(i)))
10012       Inputs.push_back(N->getOperand(i));
10013     else
10014       BinOps.push_back(N->getOperand(i));
10015 
10016     if (N->getOpcode() == ISD::TRUNCATE)
10017       break;
10018   }
10019 
10020   // Visit all inputs, collect all binary operations (and, or, xor and
10021   // select) that are all fed by extensions.
10022   while (!BinOps.empty()) {
10023     SDValue BinOp = BinOps.back();
10024     BinOps.pop_back();
10025 
10026     if (!Visited.insert(BinOp.getNode()).second)
10027       continue;
10028 
10029     PromOps.push_back(BinOp);
10030 
10031     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10032       // The condition of the select is not promoted.
10033       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10034         continue;
10035       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10036         continue;
10037 
10038       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10039             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10040             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10041            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10042           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10043         Inputs.push_back(BinOp.getOperand(i));
10044       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10045                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10046                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10047                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10048                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
10049                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10050                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10051                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10052                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
10053         BinOps.push_back(BinOp.getOperand(i));
10054       } else {
10055         // We have an input that is not an extension or another binary
10056         // operation; we'll abort this transformation.
10057         return SDValue();
10058       }
10059     }
10060   }
10061 
10062   // Make sure that this is a self-contained cluster of operations (which
10063   // is not quite the same thing as saying that everything has only one
10064   // use).
10065   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10066     if (isa<ConstantSDNode>(Inputs[i]))
10067       continue;
10068 
10069     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10070                               UE = Inputs[i].getNode()->use_end();
10071          UI != UE; ++UI) {
10072       SDNode *User = *UI;
10073       if (User != N && !Visited.count(User))
10074         return SDValue();
10075 
10076       // Make sure that we're not going to promote the non-output-value
10077       // operand(s) or SELECT or SELECT_CC.
10078       // FIXME: Although we could sometimes handle this, and it does occur in
10079       // practice that one of the condition inputs to the select is also one of
10080       // the outputs, we currently can't deal with this.
10081       if (User->getOpcode() == ISD::SELECT) {
10082         if (User->getOperand(0) == Inputs[i])
10083           return SDValue();
10084       } else if (User->getOpcode() == ISD::SELECT_CC) {
10085         if (User->getOperand(0) == Inputs[i] ||
10086             User->getOperand(1) == Inputs[i])
10087           return SDValue();
10088       }
10089     }
10090   }
10091 
10092   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10093     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10094                               UE = PromOps[i].getNode()->use_end();
10095          UI != UE; ++UI) {
10096       SDNode *User = *UI;
10097       if (User != N && !Visited.count(User))
10098         return SDValue();
10099 
10100       // Make sure that we're not going to promote the non-output-value
10101       // operand(s) or SELECT or SELECT_CC.
10102       // FIXME: Although we could sometimes handle this, and it does occur in
10103       // practice that one of the condition inputs to the select is also one of
10104       // the outputs, we currently can't deal with this.
10105       if (User->getOpcode() == ISD::SELECT) {
10106         if (User->getOperand(0) == PromOps[i])
10107           return SDValue();
10108       } else if (User->getOpcode() == ISD::SELECT_CC) {
10109         if (User->getOperand(0) == PromOps[i] ||
10110             User->getOperand(1) == PromOps[i])
10111           return SDValue();
10112       }
10113     }
10114   }
10115 
10116   // Replace all inputs with the extension operand.
10117   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10118     // Constants may have users outside the cluster of to-be-promoted nodes,
10119     // and so we need to replace those as we do the promotions.
10120     if (isa<ConstantSDNode>(Inputs[i]))
10121       continue;
10122     else
10123       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
10124   }
10125 
10126   std::list<HandleSDNode> PromOpHandles;
10127   for (auto &PromOp : PromOps)
10128     PromOpHandles.emplace_back(PromOp);
10129 
10130   // Replace all operations (these are all the same, but have a different
10131   // (i1) return type). DAG.getNode will validate that the types of
10132   // a binary operator match, so go through the list in reverse so that
10133   // we've likely promoted both operands first. Any intermediate truncations or
10134   // extensions disappear.
10135   while (!PromOpHandles.empty()) {
10136     SDValue PromOp = PromOpHandles.back().getValue();
10137     PromOpHandles.pop_back();
10138 
10139     if (PromOp.getOpcode() == ISD::TRUNCATE ||
10140         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
10141         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
10142         PromOp.getOpcode() == ISD::ANY_EXTEND) {
10143       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
10144           PromOp.getOperand(0).getValueType() != MVT::i1) {
10145         // The operand is not yet ready (see comment below).
10146         PromOpHandles.emplace_front(PromOp);
10147         continue;
10148       }
10149 
10150       SDValue RepValue = PromOp.getOperand(0);
10151       if (isa<ConstantSDNode>(RepValue))
10152         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
10153 
10154       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
10155       continue;
10156     }
10157 
10158     unsigned C;
10159     switch (PromOp.getOpcode()) {
10160     default:             C = 0; break;
10161     case ISD::SELECT:    C = 1; break;
10162     case ISD::SELECT_CC: C = 2; break;
10163     }
10164 
10165     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10166          PromOp.getOperand(C).getValueType() != MVT::i1) ||
10167         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10168          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
10169       // The to-be-promoted operands of this node have not yet been
10170       // promoted (this should be rare because we're going through the
10171       // list backward, but if one of the operands has several users in
10172       // this cluster of to-be-promoted nodes, it is possible).
10173       PromOpHandles.emplace_front(PromOp);
10174       continue;
10175     }
10176 
10177     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10178                                 PromOp.getNode()->op_end());
10179 
10180     // If there are any constant inputs, make sure they're replaced now.
10181     for (unsigned i = 0; i < 2; ++i)
10182       if (isa<ConstantSDNode>(Ops[C+i]))
10183         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
10184 
10185     DAG.ReplaceAllUsesOfValueWith(PromOp,
10186       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
10187   }
10188 
10189   // Now we're left with the initial truncation itself.
10190   if (N->getOpcode() == ISD::TRUNCATE)
10191     return N->getOperand(0);
10192 
10193   // Otherwise, this is a comparison. The operands to be compared have just
10194   // changed type (to i1), but everything else is the same.
10195   return SDValue(N, 0);
10196 }
10197 
10198 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
10199                                                   DAGCombinerInfo &DCI) const {
10200   SelectionDAG &DAG = DCI.DAG;
10201   SDLoc dl(N);
10202 
10203   // If we're tracking CR bits, we need to be careful that we don't have:
10204   //   zext(binary-ops(trunc(x), trunc(y)))
10205   // or
10206   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
10207   // such that we're unnecessarily moving things into CR bits that can more
10208   // efficiently stay in GPRs. Note that if we're not certain that the high
10209   // bits are set as required by the final extension, we still may need to do
10210   // some masking to get the proper behavior.
10211 
10212   // This same functionality is important on PPC64 when dealing with
10213   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
10214   // the return values of functions. Because it is so similar, it is handled
10215   // here as well.
10216 
10217   if (N->getValueType(0) != MVT::i32 &&
10218       N->getValueType(0) != MVT::i64)
10219     return SDValue();
10220 
10221   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
10222         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
10223     return SDValue();
10224 
10225   if (N->getOperand(0).getOpcode() != ISD::AND &&
10226       N->getOperand(0).getOpcode() != ISD::OR  &&
10227       N->getOperand(0).getOpcode() != ISD::XOR &&
10228       N->getOperand(0).getOpcode() != ISD::SELECT &&
10229       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
10230     return SDValue();
10231 
10232   SmallVector<SDValue, 4> Inputs;
10233   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
10234   SmallPtrSet<SDNode *, 16> Visited;
10235 
10236   // Visit all inputs, collect all binary operations (and, or, xor and
10237   // select) that are all fed by truncations.
10238   while (!BinOps.empty()) {
10239     SDValue BinOp = BinOps.back();
10240     BinOps.pop_back();
10241 
10242     if (!Visited.insert(BinOp.getNode()).second)
10243       continue;
10244 
10245     PromOps.push_back(BinOp);
10246 
10247     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10248       // The condition of the select is not promoted.
10249       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10250         continue;
10251       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10252         continue;
10253 
10254       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10255           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10256         Inputs.push_back(BinOp.getOperand(i));
10257       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10258                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10259                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10260                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10261                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
10262         BinOps.push_back(BinOp.getOperand(i));
10263       } else {
10264         // We have an input that is not a truncation or another binary
10265         // operation; we'll abort this transformation.
10266         return SDValue();
10267       }
10268     }
10269   }
10270 
10271   // The operands of a select that must be truncated when the select is
10272   // promoted because the operand is actually part of the to-be-promoted set.
10273   DenseMap<SDNode *, EVT> SelectTruncOp[2];
10274 
10275   // Make sure that this is a self-contained cluster of operations (which
10276   // is not quite the same thing as saying that everything has only one
10277   // use).
10278   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10279     if (isa<ConstantSDNode>(Inputs[i]))
10280       continue;
10281 
10282     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10283                               UE = Inputs[i].getNode()->use_end();
10284          UI != UE; ++UI) {
10285       SDNode *User = *UI;
10286       if (User != N && !Visited.count(User))
10287         return SDValue();
10288 
10289       // If we're going to promote the non-output-value operand(s) or SELECT or
10290       // SELECT_CC, record them for truncation.
10291       if (User->getOpcode() == ISD::SELECT) {
10292         if (User->getOperand(0) == Inputs[i])
10293           SelectTruncOp[0].insert(std::make_pair(User,
10294                                     User->getOperand(0).getValueType()));
10295       } else if (User->getOpcode() == ISD::SELECT_CC) {
10296         if (User->getOperand(0) == Inputs[i])
10297           SelectTruncOp[0].insert(std::make_pair(User,
10298                                     User->getOperand(0).getValueType()));
10299         if (User->getOperand(1) == Inputs[i])
10300           SelectTruncOp[1].insert(std::make_pair(User,
10301                                     User->getOperand(1).getValueType()));
10302       }
10303     }
10304   }
10305 
10306   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10307     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10308                               UE = PromOps[i].getNode()->use_end();
10309          UI != UE; ++UI) {
10310       SDNode *User = *UI;
10311       if (User != N && !Visited.count(User))
10312         return SDValue();
10313 
10314       // If we're going to promote the non-output-value operand(s) or SELECT or
10315       // SELECT_CC, record them for truncation.
10316       if (User->getOpcode() == ISD::SELECT) {
10317         if (User->getOperand(0) == PromOps[i])
10318           SelectTruncOp[0].insert(std::make_pair(User,
10319                                     User->getOperand(0).getValueType()));
10320       } else if (User->getOpcode() == ISD::SELECT_CC) {
10321         if (User->getOperand(0) == PromOps[i])
10322           SelectTruncOp[0].insert(std::make_pair(User,
10323                                     User->getOperand(0).getValueType()));
10324         if (User->getOperand(1) == PromOps[i])
10325           SelectTruncOp[1].insert(std::make_pair(User,
10326                                     User->getOperand(1).getValueType()));
10327       }
10328     }
10329   }
10330 
10331   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
10332   bool ReallyNeedsExt = false;
10333   if (N->getOpcode() != ISD::ANY_EXTEND) {
10334     // If all of the inputs are not already sign/zero extended, then
10335     // we'll still need to do that at the end.
10336     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10337       if (isa<ConstantSDNode>(Inputs[i]))
10338         continue;
10339 
10340       unsigned OpBits =
10341         Inputs[i].getOperand(0).getValueSizeInBits();
10342       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
10343 
10344       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
10345            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
10346                                   APInt::getHighBitsSet(OpBits,
10347                                                         OpBits-PromBits))) ||
10348           (N->getOpcode() == ISD::SIGN_EXTEND &&
10349            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
10350              (OpBits-(PromBits-1)))) {
10351         ReallyNeedsExt = true;
10352         break;
10353       }
10354     }
10355   }
10356 
10357   // Replace all inputs, either with the truncation operand, or a
10358   // truncation or extension to the final output type.
10359   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10360     // Constant inputs need to be replaced with the to-be-promoted nodes that
10361     // use them because they might have users outside of the cluster of
10362     // promoted nodes.
10363     if (isa<ConstantSDNode>(Inputs[i]))
10364       continue;
10365 
10366     SDValue InSrc = Inputs[i].getOperand(0);
10367     if (Inputs[i].getValueType() == N->getValueType(0))
10368       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
10369     else if (N->getOpcode() == ISD::SIGN_EXTEND)
10370       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10371         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
10372     else if (N->getOpcode() == ISD::ZERO_EXTEND)
10373       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10374         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
10375     else
10376       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10377         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
10378   }
10379 
10380   std::list<HandleSDNode> PromOpHandles;
10381   for (auto &PromOp : PromOps)
10382     PromOpHandles.emplace_back(PromOp);
10383 
10384   // Replace all operations (these are all the same, but have a different
10385   // (promoted) return type). DAG.getNode will validate that the types of
10386   // a binary operator match, so go through the list in reverse so that
10387   // we've likely promoted both operands first.
10388   while (!PromOpHandles.empty()) {
10389     SDValue PromOp = PromOpHandles.back().getValue();
10390     PromOpHandles.pop_back();
10391 
10392     unsigned C;
10393     switch (PromOp.getOpcode()) {
10394     default:             C = 0; break;
10395     case ISD::SELECT:    C = 1; break;
10396     case ISD::SELECT_CC: C = 2; break;
10397     }
10398 
10399     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10400          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
10401         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10402          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
10403       // The to-be-promoted operands of this node have not yet been
10404       // promoted (this should be rare because we're going through the
10405       // list backward, but if one of the operands has several users in
10406       // this cluster of to-be-promoted nodes, it is possible).
10407       PromOpHandles.emplace_front(PromOp);
10408       continue;
10409     }
10410 
10411     // For SELECT and SELECT_CC nodes, we do a similar check for any
10412     // to-be-promoted comparison inputs.
10413     if (PromOp.getOpcode() == ISD::SELECT ||
10414         PromOp.getOpcode() == ISD::SELECT_CC) {
10415       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
10416            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
10417           (SelectTruncOp[1].count(PromOp.getNode()) &&
10418            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
10419         PromOpHandles.emplace_front(PromOp);
10420         continue;
10421       }
10422     }
10423 
10424     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10425                                 PromOp.getNode()->op_end());
10426 
10427     // If this node has constant inputs, then they'll need to be promoted here.
10428     for (unsigned i = 0; i < 2; ++i) {
10429       if (!isa<ConstantSDNode>(Ops[C+i]))
10430         continue;
10431       if (Ops[C+i].getValueType() == N->getValueType(0))
10432         continue;
10433 
10434       if (N->getOpcode() == ISD::SIGN_EXTEND)
10435         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10436       else if (N->getOpcode() == ISD::ZERO_EXTEND)
10437         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10438       else
10439         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10440     }
10441 
10442     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
10443     // truncate them again to the original value type.
10444     if (PromOp.getOpcode() == ISD::SELECT ||
10445         PromOp.getOpcode() == ISD::SELECT_CC) {
10446       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
10447       if (SI0 != SelectTruncOp[0].end())
10448         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
10449       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
10450       if (SI1 != SelectTruncOp[1].end())
10451         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
10452     }
10453 
10454     DAG.ReplaceAllUsesOfValueWith(PromOp,
10455       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
10456   }
10457 
10458   // Now we're left with the initial extension itself.
10459   if (!ReallyNeedsExt)
10460     return N->getOperand(0);
10461 
10462   // To zero extend, just mask off everything except for the first bit (in the
10463   // i1 case).
10464   if (N->getOpcode() == ISD::ZERO_EXTEND)
10465     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
10466                        DAG.getConstant(APInt::getLowBitsSet(
10467                                          N->getValueSizeInBits(0), PromBits),
10468                                        dl, N->getValueType(0)));
10469 
10470   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
10471          "Invalid extension type");
10472   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
10473   SDValue ShiftCst =
10474       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
10475   return DAG.getNode(
10476       ISD::SRA, dl, N->getValueType(0),
10477       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
10478       ShiftCst);
10479 }
10480 
10481 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
10482                                                  DAGCombinerInfo &DCI) const {
10483   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
10484          "Should be called with a BUILD_VECTOR node");
10485 
10486   SelectionDAG &DAG = DCI.DAG;
10487   SDLoc dl(N);
10488   if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX())
10489     return SDValue();
10490 
10491   // Looking for:
10492   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
10493   if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP &&
10494       N->getOperand(0).getOpcode() != ISD::UINT_TO_FP)
10495     return SDValue();
10496   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
10497       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
10498     return SDValue();
10499   if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode())
10500     return SDValue();
10501 
10502   SDValue Ext1 = N->getOperand(0).getOperand(0);
10503   SDValue Ext2 = N->getOperand(1).getOperand(0);
10504   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
10505      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
10506     return SDValue();
10507 
10508   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
10509   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
10510   if (!Ext1Op || !Ext2Op)
10511     return SDValue();
10512   if (Ext1.getValueType() != MVT::i32 ||
10513       Ext2.getValueType() != MVT::i32)
10514   if (Ext1.getOperand(0) != Ext2.getOperand(0))
10515     return SDValue();
10516 
10517   int FirstElem = Ext1Op->getZExtValue();
10518   int SecondElem = Ext2Op->getZExtValue();
10519   int SubvecIdx;
10520   if (FirstElem == 0 && SecondElem == 1)
10521     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
10522   else if (FirstElem == 2 && SecondElem == 3)
10523     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
10524   else
10525     return SDValue();
10526 
10527   SDValue SrcVec = Ext1.getOperand(0);
10528   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
10529     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
10530   return DAG.getNode(NodeType, dl, MVT::v2f64,
10531                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
10532 }
10533 
10534 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
10535                                               DAGCombinerInfo &DCI) const {
10536   assert((N->getOpcode() == ISD::SINT_TO_FP ||
10537           N->getOpcode() == ISD::UINT_TO_FP) &&
10538          "Need an int -> FP conversion node here");
10539 
10540   if (useSoftFloat() || !Subtarget.has64BitSupport())
10541     return SDValue();
10542 
10543   SelectionDAG &DAG = DCI.DAG;
10544   SDLoc dl(N);
10545   SDValue Op(N, 0);
10546 
10547   SDValue FirstOperand(Op.getOperand(0));
10548   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
10549     (FirstOperand.getValueType() == MVT::i8 ||
10550      FirstOperand.getValueType() == MVT::i16);
10551   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
10552     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
10553     bool DstDouble = Op.getValueType() == MVT::f64;
10554     unsigned ConvOp = Signed ?
10555       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
10556       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
10557     SDValue WidthConst =
10558       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
10559                             dl, false);
10560     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
10561     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
10562     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
10563                                          DAG.getVTList(MVT::f64, MVT::Other),
10564                                          Ops, MVT::i8, LDN->getMemOperand());
10565 
10566     // For signed conversion, we need to sign-extend the value in the VSR
10567     if (Signed) {
10568       SDValue ExtOps[] = { Ld, WidthConst };
10569       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
10570       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
10571     } else
10572       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
10573   }
10574 
10575   // Don't handle ppc_fp128 here or i1 conversions.
10576   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
10577     return SDValue();
10578   if (Op.getOperand(0).getValueType() == MVT::i1)
10579     return SDValue();
10580 
10581   // For i32 intermediate values, unfortunately, the conversion functions
10582   // leave the upper 32 bits of the value are undefined. Within the set of
10583   // scalar instructions, we have no method for zero- or sign-extending the
10584   // value. Thus, we cannot handle i32 intermediate values here.
10585   if (Op.getOperand(0).getValueType() == MVT::i32)
10586     return SDValue();
10587 
10588   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
10589          "UINT_TO_FP is supported only with FPCVT");
10590 
10591   // If we have FCFIDS, then use it when converting to single-precision.
10592   // Otherwise, convert to double-precision and then round.
10593   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
10594                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
10595                                                             : PPCISD::FCFIDS)
10596                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
10597                                                             : PPCISD::FCFID);
10598   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
10599                   ? MVT::f32
10600                   : MVT::f64;
10601 
10602   // If we're converting from a float, to an int, and back to a float again,
10603   // then we don't need the store/load pair at all.
10604   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
10605        Subtarget.hasFPCVT()) ||
10606       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
10607     SDValue Src = Op.getOperand(0).getOperand(0);
10608     if (Src.getValueType() == MVT::f32) {
10609       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
10610       DCI.AddToWorklist(Src.getNode());
10611     } else if (Src.getValueType() != MVT::f64) {
10612       // Make sure that we don't pick up a ppc_fp128 source value.
10613       return SDValue();
10614     }
10615 
10616     unsigned FCTOp =
10617       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
10618                                                         PPCISD::FCTIDUZ;
10619 
10620     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
10621     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
10622 
10623     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
10624       FP = DAG.getNode(ISD::FP_ROUND, dl,
10625                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
10626       DCI.AddToWorklist(FP.getNode());
10627     }
10628 
10629     return FP;
10630   }
10631 
10632   return SDValue();
10633 }
10634 
10635 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
10636 // builtins) into loads with swaps.
10637 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
10638                                               DAGCombinerInfo &DCI) const {
10639   SelectionDAG &DAG = DCI.DAG;
10640   SDLoc dl(N);
10641   SDValue Chain;
10642   SDValue Base;
10643   MachineMemOperand *MMO;
10644 
10645   switch (N->getOpcode()) {
10646   default:
10647     llvm_unreachable("Unexpected opcode for little endian VSX load");
10648   case ISD::LOAD: {
10649     LoadSDNode *LD = cast<LoadSDNode>(N);
10650     Chain = LD->getChain();
10651     Base = LD->getBasePtr();
10652     MMO = LD->getMemOperand();
10653     // If the MMO suggests this isn't a load of a full vector, leave
10654     // things alone.  For a built-in, we have to make the change for
10655     // correctness, so if there is a size problem that will be a bug.
10656     if (MMO->getSize() < 16)
10657       return SDValue();
10658     break;
10659   }
10660   case ISD::INTRINSIC_W_CHAIN: {
10661     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
10662     Chain = Intrin->getChain();
10663     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
10664     // us what we want. Get operand 2 instead.
10665     Base = Intrin->getOperand(2);
10666     MMO = Intrin->getMemOperand();
10667     break;
10668   }
10669   }
10670 
10671   MVT VecTy = N->getValueType(0).getSimpleVT();
10672   SDValue LoadOps[] = { Chain, Base };
10673   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
10674                                          DAG.getVTList(MVT::v2f64, MVT::Other),
10675                                          LoadOps, MVT::v2f64, MMO);
10676 
10677   DCI.AddToWorklist(Load.getNode());
10678   Chain = Load.getValue(1);
10679   SDValue Swap = DAG.getNode(
10680       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
10681   DCI.AddToWorklist(Swap.getNode());
10682 
10683   // Add a bitcast if the resulting load type doesn't match v2f64.
10684   if (VecTy != MVT::v2f64) {
10685     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
10686     DCI.AddToWorklist(N.getNode());
10687     // Package {bitcast value, swap's chain} to match Load's shape.
10688     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
10689                        N, Swap.getValue(1));
10690   }
10691 
10692   return Swap;
10693 }
10694 
10695 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
10696 // builtins) into stores with swaps.
10697 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
10698                                                DAGCombinerInfo &DCI) const {
10699   SelectionDAG &DAG = DCI.DAG;
10700   SDLoc dl(N);
10701   SDValue Chain;
10702   SDValue Base;
10703   unsigned SrcOpnd;
10704   MachineMemOperand *MMO;
10705 
10706   switch (N->getOpcode()) {
10707   default:
10708     llvm_unreachable("Unexpected opcode for little endian VSX store");
10709   case ISD::STORE: {
10710     StoreSDNode *ST = cast<StoreSDNode>(N);
10711     Chain = ST->getChain();
10712     Base = ST->getBasePtr();
10713     MMO = ST->getMemOperand();
10714     SrcOpnd = 1;
10715     // If the MMO suggests this isn't a store of a full vector, leave
10716     // things alone.  For a built-in, we have to make the change for
10717     // correctness, so if there is a size problem that will be a bug.
10718     if (MMO->getSize() < 16)
10719       return SDValue();
10720     break;
10721   }
10722   case ISD::INTRINSIC_VOID: {
10723     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
10724     Chain = Intrin->getChain();
10725     // Intrin->getBasePtr() oddly does not get what we want.
10726     Base = Intrin->getOperand(3);
10727     MMO = Intrin->getMemOperand();
10728     SrcOpnd = 2;
10729     break;
10730   }
10731   }
10732 
10733   SDValue Src = N->getOperand(SrcOpnd);
10734   MVT VecTy = Src.getValueType().getSimpleVT();
10735 
10736   // All stores are done as v2f64 and possible bit cast.
10737   if (VecTy != MVT::v2f64) {
10738     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
10739     DCI.AddToWorklist(Src.getNode());
10740   }
10741 
10742   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
10743                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
10744   DCI.AddToWorklist(Swap.getNode());
10745   Chain = Swap.getValue(1);
10746   SDValue StoreOps[] = { Chain, Swap, Base };
10747   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
10748                                           DAG.getVTList(MVT::Other),
10749                                           StoreOps, VecTy, MMO);
10750   DCI.AddToWorklist(Store.getNode());
10751   return Store;
10752 }
10753 
10754 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
10755                                              DAGCombinerInfo &DCI) const {
10756   SelectionDAG &DAG = DCI.DAG;
10757   SDLoc dl(N);
10758   switch (N->getOpcode()) {
10759   default: break;
10760   case PPCISD::SHL:
10761     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
10762         return N->getOperand(0);
10763     break;
10764   case PPCISD::SRL:
10765     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
10766         return N->getOperand(0);
10767     break;
10768   case PPCISD::SRA:
10769     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
10770       if (C->isNullValue() ||   //  0 >>s V -> 0.
10771           C->isAllOnesValue())    // -1 >>s V -> -1.
10772         return N->getOperand(0);
10773     }
10774     break;
10775   case ISD::SIGN_EXTEND:
10776   case ISD::ZERO_EXTEND:
10777   case ISD::ANY_EXTEND:
10778     return DAGCombineExtBoolTrunc(N, DCI);
10779   case ISD::TRUNCATE:
10780   case ISD::SETCC:
10781   case ISD::SELECT_CC:
10782     return DAGCombineTruncBoolExt(N, DCI);
10783   case ISD::SINT_TO_FP:
10784   case ISD::UINT_TO_FP:
10785     return combineFPToIntToFP(N, DCI);
10786   case ISD::STORE: {
10787     EVT Op1VT = N->getOperand(1).getValueType();
10788     bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
10789       (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
10790 
10791     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
10792     if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
10793         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
10794         ValidTypeForStoreFltAsInt &&
10795         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
10796       SDValue Val = N->getOperand(1).getOperand(0);
10797       if (Val.getValueType() == MVT::f32) {
10798         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
10799         DCI.AddToWorklist(Val.getNode());
10800       }
10801       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
10802       DCI.AddToWorklist(Val.getNode());
10803 
10804       if (Op1VT == MVT::i32) {
10805         SDValue Ops[] = {
10806           N->getOperand(0), Val, N->getOperand(2),
10807           DAG.getValueType(N->getOperand(1).getValueType())
10808         };
10809 
10810         Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
10811                 DAG.getVTList(MVT::Other), Ops,
10812                 cast<StoreSDNode>(N)->getMemoryVT(),
10813                 cast<StoreSDNode>(N)->getMemOperand());
10814       } else {
10815         unsigned WidthInBytes =
10816           N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
10817         SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
10818 
10819         SDValue Ops[] = {
10820           N->getOperand(0), Val, N->getOperand(2), WidthConst,
10821           DAG.getValueType(N->getOperand(1).getValueType())
10822         };
10823         Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
10824                                       DAG.getVTList(MVT::Other), Ops,
10825                                       cast<StoreSDNode>(N)->getMemoryVT(),
10826                                       cast<StoreSDNode>(N)->getMemOperand());
10827       }
10828 
10829       DCI.AddToWorklist(Val.getNode());
10830       return Val;
10831     }
10832 
10833     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
10834     if (cast<StoreSDNode>(N)->isUnindexed() &&
10835         N->getOperand(1).getOpcode() == ISD::BSWAP &&
10836         N->getOperand(1).getNode()->hasOneUse() &&
10837         (N->getOperand(1).getValueType() == MVT::i32 ||
10838          N->getOperand(1).getValueType() == MVT::i16 ||
10839          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
10840           N->getOperand(1).getValueType() == MVT::i64))) {
10841       SDValue BSwapOp = N->getOperand(1).getOperand(0);
10842       // Do an any-extend to 32-bits if this is a half-word input.
10843       if (BSwapOp.getValueType() == MVT::i16)
10844         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
10845 
10846       SDValue Ops[] = {
10847         N->getOperand(0), BSwapOp, N->getOperand(2),
10848         DAG.getValueType(N->getOperand(1).getValueType())
10849       };
10850       return
10851         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
10852                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
10853                                 cast<StoreSDNode>(N)->getMemOperand());
10854     }
10855 
10856     // For little endian, VSX stores require generating xxswapd/lxvd2x.
10857     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
10858     EVT VT = N->getOperand(1).getValueType();
10859     if (VT.isSimple()) {
10860       MVT StoreVT = VT.getSimpleVT();
10861       if (Subtarget.needsSwapsForVSXMemOps() &&
10862           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
10863            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
10864         return expandVSXStoreForLE(N, DCI);
10865     }
10866     break;
10867   }
10868   case ISD::LOAD: {
10869     LoadSDNode *LD = cast<LoadSDNode>(N);
10870     EVT VT = LD->getValueType(0);
10871 
10872     // For little endian, VSX loads require generating lxvd2x/xxswapd.
10873     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
10874     if (VT.isSimple()) {
10875       MVT LoadVT = VT.getSimpleVT();
10876       if (Subtarget.needsSwapsForVSXMemOps() &&
10877           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
10878            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
10879         return expandVSXLoadForLE(N, DCI);
10880     }
10881 
10882     // We sometimes end up with a 64-bit integer load, from which we extract
10883     // two single-precision floating-point numbers. This happens with
10884     // std::complex<float>, and other similar structures, because of the way we
10885     // canonicalize structure copies. However, if we lack direct moves,
10886     // then the final bitcasts from the extracted integer values to the
10887     // floating-point numbers turn into store/load pairs. Even with direct moves,
10888     // just loading the two floating-point numbers is likely better.
10889     auto ReplaceTwoFloatLoad = [&]() {
10890       if (VT != MVT::i64)
10891         return false;
10892 
10893       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
10894           LD->isVolatile())
10895         return false;
10896 
10897       //  We're looking for a sequence like this:
10898       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
10899       //      t16: i64 = srl t13, Constant:i32<32>
10900       //    t17: i32 = truncate t16
10901       //  t18: f32 = bitcast t17
10902       //    t19: i32 = truncate t13
10903       //  t20: f32 = bitcast t19
10904 
10905       if (!LD->hasNUsesOfValue(2, 0))
10906         return false;
10907 
10908       auto UI = LD->use_begin();
10909       while (UI.getUse().getResNo() != 0) ++UI;
10910       SDNode *Trunc = *UI++;
10911       while (UI.getUse().getResNo() != 0) ++UI;
10912       SDNode *RightShift = *UI;
10913       if (Trunc->getOpcode() != ISD::TRUNCATE)
10914         std::swap(Trunc, RightShift);
10915 
10916       if (Trunc->getOpcode() != ISD::TRUNCATE ||
10917           Trunc->getValueType(0) != MVT::i32 ||
10918           !Trunc->hasOneUse())
10919         return false;
10920       if (RightShift->getOpcode() != ISD::SRL ||
10921           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
10922           RightShift->getConstantOperandVal(1) != 32 ||
10923           !RightShift->hasOneUse())
10924         return false;
10925 
10926       SDNode *Trunc2 = *RightShift->use_begin();
10927       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
10928           Trunc2->getValueType(0) != MVT::i32 ||
10929           !Trunc2->hasOneUse())
10930         return false;
10931 
10932       SDNode *Bitcast = *Trunc->use_begin();
10933       SDNode *Bitcast2 = *Trunc2->use_begin();
10934 
10935       if (Bitcast->getOpcode() != ISD::BITCAST ||
10936           Bitcast->getValueType(0) != MVT::f32)
10937         return false;
10938       if (Bitcast2->getOpcode() != ISD::BITCAST ||
10939           Bitcast2->getValueType(0) != MVT::f32)
10940         return false;
10941 
10942       if (Subtarget.isLittleEndian())
10943         std::swap(Bitcast, Bitcast2);
10944 
10945       // Bitcast has the second float (in memory-layout order) and Bitcast2
10946       // has the first one.
10947 
10948       SDValue BasePtr = LD->getBasePtr();
10949       if (LD->isIndexed()) {
10950         assert(LD->getAddressingMode() == ISD::PRE_INC &&
10951                "Non-pre-inc AM on PPC?");
10952         BasePtr =
10953           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10954                       LD->getOffset());
10955       }
10956 
10957       auto MMOFlags =
10958           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
10959       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
10960                                       LD->getPointerInfo(), LD->getAlignment(),
10961                                       MMOFlags, LD->getAAInfo());
10962       SDValue AddPtr =
10963         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
10964                     BasePtr, DAG.getIntPtrConstant(4, dl));
10965       SDValue FloatLoad2 = DAG.getLoad(
10966           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
10967           LD->getPointerInfo().getWithOffset(4),
10968           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
10969 
10970       if (LD->isIndexed()) {
10971         // Note that DAGCombine should re-form any pre-increment load(s) from
10972         // what is produced here if that makes sense.
10973         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
10974       }
10975 
10976       DCI.CombineTo(Bitcast2, FloatLoad);
10977       DCI.CombineTo(Bitcast, FloatLoad2);
10978 
10979       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
10980                                     SDValue(FloatLoad2.getNode(), 1));
10981       return true;
10982     };
10983 
10984     if (ReplaceTwoFloatLoad())
10985       return SDValue(N, 0);
10986 
10987     EVT MemVT = LD->getMemoryVT();
10988     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
10989     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
10990     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
10991     unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
10992     if (LD->isUnindexed() && VT.isVector() &&
10993         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
10994           // P8 and later hardware should just use LOAD.
10995           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
10996                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
10997          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
10998           LD->getAlignment() >= ScalarABIAlignment)) &&
10999         LD->getAlignment() < ABIAlignment) {
11000       // This is a type-legal unaligned Altivec or QPX load.
11001       SDValue Chain = LD->getChain();
11002       SDValue Ptr = LD->getBasePtr();
11003       bool isLittleEndian = Subtarget.isLittleEndian();
11004 
11005       // This implements the loading of unaligned vectors as described in
11006       // the venerable Apple Velocity Engine overview. Specifically:
11007       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
11008       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
11009       //
11010       // The general idea is to expand a sequence of one or more unaligned
11011       // loads into an alignment-based permutation-control instruction (lvsl
11012       // or lvsr), a series of regular vector loads (which always truncate
11013       // their input address to an aligned address), and a series of
11014       // permutations.  The results of these permutations are the requested
11015       // loaded values.  The trick is that the last "extra" load is not taken
11016       // from the address you might suspect (sizeof(vector) bytes after the
11017       // last requested load), but rather sizeof(vector) - 1 bytes after the
11018       // last requested vector. The point of this is to avoid a page fault if
11019       // the base address happened to be aligned. This works because if the
11020       // base address is aligned, then adding less than a full vector length
11021       // will cause the last vector in the sequence to be (re)loaded.
11022       // Otherwise, the next vector will be fetched as you might suspect was
11023       // necessary.
11024 
11025       // We might be able to reuse the permutation generation from
11026       // a different base address offset from this one by an aligned amount.
11027       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
11028       // optimization later.
11029       Intrinsic::ID Intr, IntrLD, IntrPerm;
11030       MVT PermCntlTy, PermTy, LDTy;
11031       if (Subtarget.hasAltivec()) {
11032         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
11033                                  Intrinsic::ppc_altivec_lvsl;
11034         IntrLD = Intrinsic::ppc_altivec_lvx;
11035         IntrPerm = Intrinsic::ppc_altivec_vperm;
11036         PermCntlTy = MVT::v16i8;
11037         PermTy = MVT::v4i32;
11038         LDTy = MVT::v4i32;
11039       } else {
11040         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
11041                                        Intrinsic::ppc_qpx_qvlpcls;
11042         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
11043                                        Intrinsic::ppc_qpx_qvlfs;
11044         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
11045         PermCntlTy = MVT::v4f64;
11046         PermTy = MVT::v4f64;
11047         LDTy = MemVT.getSimpleVT();
11048       }
11049 
11050       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
11051 
11052       // Create the new MMO for the new base load. It is like the original MMO,
11053       // but represents an area in memory almost twice the vector size centered
11054       // on the original address. If the address is unaligned, we might start
11055       // reading up to (sizeof(vector)-1) bytes below the address of the
11056       // original unaligned load.
11057       MachineFunction &MF = DAG.getMachineFunction();
11058       MachineMemOperand *BaseMMO =
11059         MF.getMachineMemOperand(LD->getMemOperand(),
11060                                 -(long)MemVT.getStoreSize()+1,
11061                                 2*MemVT.getStoreSize()-1);
11062 
11063       // Create the new base load.
11064       SDValue LDXIntID =
11065           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
11066       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
11067       SDValue BaseLoad =
11068         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
11069                                 DAG.getVTList(PermTy, MVT::Other),
11070                                 BaseLoadOps, LDTy, BaseMMO);
11071 
11072       // Note that the value of IncOffset (which is provided to the next
11073       // load's pointer info offset value, and thus used to calculate the
11074       // alignment), and the value of IncValue (which is actually used to
11075       // increment the pointer value) are different! This is because we
11076       // require the next load to appear to be aligned, even though it
11077       // is actually offset from the base pointer by a lesser amount.
11078       int IncOffset = VT.getSizeInBits() / 8;
11079       int IncValue = IncOffset;
11080 
11081       // Walk (both up and down) the chain looking for another load at the real
11082       // (aligned) offset (the alignment of the other load does not matter in
11083       // this case). If found, then do not use the offset reduction trick, as
11084       // that will prevent the loads from being later combined (as they would
11085       // otherwise be duplicates).
11086       if (!findConsecutiveLoad(LD, DAG))
11087         --IncValue;
11088 
11089       SDValue Increment =
11090           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
11091       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
11092 
11093       MachineMemOperand *ExtraMMO =
11094         MF.getMachineMemOperand(LD->getMemOperand(),
11095                                 1, 2*MemVT.getStoreSize()-1);
11096       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
11097       SDValue ExtraLoad =
11098         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
11099                                 DAG.getVTList(PermTy, MVT::Other),
11100                                 ExtraLoadOps, LDTy, ExtraMMO);
11101 
11102       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
11103         BaseLoad.getValue(1), ExtraLoad.getValue(1));
11104 
11105       // Because vperm has a big-endian bias, we must reverse the order
11106       // of the input vectors and complement the permute control vector
11107       // when generating little endian code.  We have already handled the
11108       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
11109       // and ExtraLoad here.
11110       SDValue Perm;
11111       if (isLittleEndian)
11112         Perm = BuildIntrinsicOp(IntrPerm,
11113                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
11114       else
11115         Perm = BuildIntrinsicOp(IntrPerm,
11116                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
11117 
11118       if (VT != PermTy)
11119         Perm = Subtarget.hasAltivec() ?
11120                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
11121                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
11122                                DAG.getTargetConstant(1, dl, MVT::i64));
11123                                // second argument is 1 because this rounding
11124                                // is always exact.
11125 
11126       // The output of the permutation is our loaded result, the TokenFactor is
11127       // our new chain.
11128       DCI.CombineTo(N, Perm, TF);
11129       return SDValue(N, 0);
11130     }
11131     }
11132     break;
11133     case ISD::INTRINSIC_WO_CHAIN: {
11134       bool isLittleEndian = Subtarget.isLittleEndian();
11135       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
11136       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
11137                                            : Intrinsic::ppc_altivec_lvsl);
11138       if ((IID == Intr ||
11139            IID == Intrinsic::ppc_qpx_qvlpcld  ||
11140            IID == Intrinsic::ppc_qpx_qvlpcls) &&
11141         N->getOperand(1)->getOpcode() == ISD::ADD) {
11142         SDValue Add = N->getOperand(1);
11143 
11144         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
11145                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
11146 
11147         if (DAG.MaskedValueIsZero(Add->getOperand(1),
11148                                   APInt::getAllOnesValue(Bits /* alignment */)
11149                                       .zext(Add.getScalarValueSizeInBits()))) {
11150           SDNode *BasePtr = Add->getOperand(0).getNode();
11151           for (SDNode::use_iterator UI = BasePtr->use_begin(),
11152                                     UE = BasePtr->use_end();
11153                UI != UE; ++UI) {
11154             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
11155                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
11156               // We've found another LVSL/LVSR, and this address is an aligned
11157               // multiple of that one. The results will be the same, so use the
11158               // one we've just found instead.
11159 
11160               return SDValue(*UI, 0);
11161             }
11162           }
11163         }
11164 
11165         if (isa<ConstantSDNode>(Add->getOperand(1))) {
11166           SDNode *BasePtr = Add->getOperand(0).getNode();
11167           for (SDNode::use_iterator UI = BasePtr->use_begin(),
11168                UE = BasePtr->use_end(); UI != UE; ++UI) {
11169             if (UI->getOpcode() == ISD::ADD &&
11170                 isa<ConstantSDNode>(UI->getOperand(1)) &&
11171                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
11172                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
11173                 (1ULL << Bits) == 0) {
11174               SDNode *OtherAdd = *UI;
11175               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
11176                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
11177                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
11178                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
11179                   return SDValue(*VI, 0);
11180                 }
11181               }
11182             }
11183           }
11184         }
11185       }
11186     }
11187 
11188     break;
11189   case ISD::INTRINSIC_W_CHAIN: {
11190     // For little endian, VSX loads require generating lxvd2x/xxswapd.
11191     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
11192     if (Subtarget.needsSwapsForVSXMemOps()) {
11193       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11194       default:
11195         break;
11196       case Intrinsic::ppc_vsx_lxvw4x:
11197       case Intrinsic::ppc_vsx_lxvd2x:
11198         return expandVSXLoadForLE(N, DCI);
11199       }
11200     }
11201     break;
11202   }
11203   case ISD::INTRINSIC_VOID: {
11204     // For little endian, VSX stores require generating xxswapd/stxvd2x.
11205     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
11206     if (Subtarget.needsSwapsForVSXMemOps()) {
11207       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11208       default:
11209         break;
11210       case Intrinsic::ppc_vsx_stxvw4x:
11211       case Intrinsic::ppc_vsx_stxvd2x:
11212         return expandVSXStoreForLE(N, DCI);
11213       }
11214     }
11215     break;
11216   }
11217   case ISD::BSWAP:
11218     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
11219     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
11220         N->getOperand(0).hasOneUse() &&
11221         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
11222          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
11223           N->getValueType(0) == MVT::i64))) {
11224       SDValue Load = N->getOperand(0);
11225       LoadSDNode *LD = cast<LoadSDNode>(Load);
11226       // Create the byte-swapping load.
11227       SDValue Ops[] = {
11228         LD->getChain(),    // Chain
11229         LD->getBasePtr(),  // Ptr
11230         DAG.getValueType(N->getValueType(0)) // VT
11231       };
11232       SDValue BSLoad =
11233         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
11234                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
11235                                               MVT::i64 : MVT::i32, MVT::Other),
11236                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
11237 
11238       // If this is an i16 load, insert the truncate.
11239       SDValue ResVal = BSLoad;
11240       if (N->getValueType(0) == MVT::i16)
11241         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
11242 
11243       // First, combine the bswap away.  This makes the value produced by the
11244       // load dead.
11245       DCI.CombineTo(N, ResVal);
11246 
11247       // Next, combine the load away, we give it a bogus result value but a real
11248       // chain result.  The result value is dead because the bswap is dead.
11249       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
11250 
11251       // Return N so it doesn't get rechecked!
11252       return SDValue(N, 0);
11253     }
11254 
11255     break;
11256   case PPCISD::VCMP: {
11257     // If a VCMPo node already exists with exactly the same operands as this
11258     // node, use its result instead of this node (VCMPo computes both a CR6 and
11259     // a normal output).
11260     //
11261     if (!N->getOperand(0).hasOneUse() &&
11262         !N->getOperand(1).hasOneUse() &&
11263         !N->getOperand(2).hasOneUse()) {
11264 
11265       // Scan all of the users of the LHS, looking for VCMPo's that match.
11266       SDNode *VCMPoNode = nullptr;
11267 
11268       SDNode *LHSN = N->getOperand(0).getNode();
11269       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
11270            UI != E; ++UI)
11271         if (UI->getOpcode() == PPCISD::VCMPo &&
11272             UI->getOperand(1) == N->getOperand(1) &&
11273             UI->getOperand(2) == N->getOperand(2) &&
11274             UI->getOperand(0) == N->getOperand(0)) {
11275           VCMPoNode = *UI;
11276           break;
11277         }
11278 
11279       // If there is no VCMPo node, or if the flag value has a single use, don't
11280       // transform this.
11281       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
11282         break;
11283 
11284       // Look at the (necessarily single) use of the flag value.  If it has a
11285       // chain, this transformation is more complex.  Note that multiple things
11286       // could use the value result, which we should ignore.
11287       SDNode *FlagUser = nullptr;
11288       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
11289            FlagUser == nullptr; ++UI) {
11290         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
11291         SDNode *User = *UI;
11292         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
11293           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
11294             FlagUser = User;
11295             break;
11296           }
11297         }
11298       }
11299 
11300       // If the user is a MFOCRF instruction, we know this is safe.
11301       // Otherwise we give up for right now.
11302       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
11303         return SDValue(VCMPoNode, 0);
11304     }
11305     break;
11306   }
11307   case ISD::BRCOND: {
11308     SDValue Cond = N->getOperand(1);
11309     SDValue Target = N->getOperand(2);
11310 
11311     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
11312         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
11313           Intrinsic::ppc_is_decremented_ctr_nonzero) {
11314 
11315       // We now need to make the intrinsic dead (it cannot be instruction
11316       // selected).
11317       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
11318       assert(Cond.getNode()->hasOneUse() &&
11319              "Counter decrement has more than one use");
11320 
11321       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
11322                          N->getOperand(0), Target);
11323     }
11324   }
11325   break;
11326   case ISD::BR_CC: {
11327     // If this is a branch on an altivec predicate comparison, lower this so
11328     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
11329     // lowering is done pre-legalize, because the legalizer lowers the predicate
11330     // compare down to code that is difficult to reassemble.
11331     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
11332     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
11333 
11334     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
11335     // value. If so, pass-through the AND to get to the intrinsic.
11336     if (LHS.getOpcode() == ISD::AND &&
11337         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
11338         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
11339           Intrinsic::ppc_is_decremented_ctr_nonzero &&
11340         isa<ConstantSDNode>(LHS.getOperand(1)) &&
11341         !isNullConstant(LHS.getOperand(1)))
11342       LHS = LHS.getOperand(0);
11343 
11344     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
11345         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
11346           Intrinsic::ppc_is_decremented_ctr_nonzero &&
11347         isa<ConstantSDNode>(RHS)) {
11348       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
11349              "Counter decrement comparison is not EQ or NE");
11350 
11351       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
11352       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
11353                     (CC == ISD::SETNE && !Val);
11354 
11355       // We now need to make the intrinsic dead (it cannot be instruction
11356       // selected).
11357       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
11358       assert(LHS.getNode()->hasOneUse() &&
11359              "Counter decrement has more than one use");
11360 
11361       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
11362                          N->getOperand(0), N->getOperand(4));
11363     }
11364 
11365     int CompareOpc;
11366     bool isDot;
11367 
11368     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
11369         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
11370         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
11371       assert(isDot && "Can't compare against a vector result!");
11372 
11373       // If this is a comparison against something other than 0/1, then we know
11374       // that the condition is never/always true.
11375       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
11376       if (Val != 0 && Val != 1) {
11377         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
11378           return N->getOperand(0);
11379         // Always !=, turn it into an unconditional branch.
11380         return DAG.getNode(ISD::BR, dl, MVT::Other,
11381                            N->getOperand(0), N->getOperand(4));
11382       }
11383 
11384       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
11385 
11386       // Create the PPCISD altivec 'dot' comparison node.
11387       SDValue Ops[] = {
11388         LHS.getOperand(2),  // LHS of compare
11389         LHS.getOperand(3),  // RHS of compare
11390         DAG.getConstant(CompareOpc, dl, MVT::i32)
11391       };
11392       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
11393       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
11394 
11395       // Unpack the result based on how the target uses it.
11396       PPC::Predicate CompOpc;
11397       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
11398       default:  // Can't happen, don't crash on invalid number though.
11399       case 0:   // Branch on the value of the EQ bit of CR6.
11400         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
11401         break;
11402       case 1:   // Branch on the inverted value of the EQ bit of CR6.
11403         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
11404         break;
11405       case 2:   // Branch on the value of the LT bit of CR6.
11406         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
11407         break;
11408       case 3:   // Branch on the inverted value of the LT bit of CR6.
11409         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
11410         break;
11411       }
11412 
11413       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
11414                          DAG.getConstant(CompOpc, dl, MVT::i32),
11415                          DAG.getRegister(PPC::CR6, MVT::i32),
11416                          N->getOperand(4), CompNode.getValue(1));
11417     }
11418     break;
11419   }
11420   case ISD::BUILD_VECTOR:
11421     return DAGCombineBuildVector(N, DCI);
11422   }
11423 
11424   return SDValue();
11425 }
11426 
11427 SDValue
11428 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
11429                                   SelectionDAG &DAG,
11430                                   std::vector<SDNode *> *Created) const {
11431   // fold (sdiv X, pow2)
11432   EVT VT = N->getValueType(0);
11433   if (VT == MVT::i64 && !Subtarget.isPPC64())
11434     return SDValue();
11435   if ((VT != MVT::i32 && VT != MVT::i64) ||
11436       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
11437     return SDValue();
11438 
11439   SDLoc DL(N);
11440   SDValue N0 = N->getOperand(0);
11441 
11442   bool IsNegPow2 = (-Divisor).isPowerOf2();
11443   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
11444   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
11445 
11446   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
11447   if (Created)
11448     Created->push_back(Op.getNode());
11449 
11450   if (IsNegPow2) {
11451     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
11452     if (Created)
11453       Created->push_back(Op.getNode());
11454   }
11455 
11456   return Op;
11457 }
11458 
11459 //===----------------------------------------------------------------------===//
11460 // Inline Assembly Support
11461 //===----------------------------------------------------------------------===//
11462 
11463 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
11464                                                       APInt &KnownZero,
11465                                                       APInt &KnownOne,
11466                                                       const SelectionDAG &DAG,
11467                                                       unsigned Depth) const {
11468   KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
11469   switch (Op.getOpcode()) {
11470   default: break;
11471   case PPCISD::LBRX: {
11472     // lhbrx is known to have the top bits cleared out.
11473     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
11474       KnownZero = 0xFFFF0000;
11475     break;
11476   }
11477   case ISD::INTRINSIC_WO_CHAIN: {
11478     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
11479     default: break;
11480     case Intrinsic::ppc_altivec_vcmpbfp_p:
11481     case Intrinsic::ppc_altivec_vcmpeqfp_p:
11482     case Intrinsic::ppc_altivec_vcmpequb_p:
11483     case Intrinsic::ppc_altivec_vcmpequh_p:
11484     case Intrinsic::ppc_altivec_vcmpequw_p:
11485     case Intrinsic::ppc_altivec_vcmpequd_p:
11486     case Intrinsic::ppc_altivec_vcmpgefp_p:
11487     case Intrinsic::ppc_altivec_vcmpgtfp_p:
11488     case Intrinsic::ppc_altivec_vcmpgtsb_p:
11489     case Intrinsic::ppc_altivec_vcmpgtsh_p:
11490     case Intrinsic::ppc_altivec_vcmpgtsw_p:
11491     case Intrinsic::ppc_altivec_vcmpgtsd_p:
11492     case Intrinsic::ppc_altivec_vcmpgtub_p:
11493     case Intrinsic::ppc_altivec_vcmpgtuh_p:
11494     case Intrinsic::ppc_altivec_vcmpgtuw_p:
11495     case Intrinsic::ppc_altivec_vcmpgtud_p:
11496       KnownZero = ~1U;  // All bits but the low one are known to be zero.
11497       break;
11498     }
11499   }
11500   }
11501 }
11502 
11503 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
11504   switch (Subtarget.getDarwinDirective()) {
11505   default: break;
11506   case PPC::DIR_970:
11507   case PPC::DIR_PWR4:
11508   case PPC::DIR_PWR5:
11509   case PPC::DIR_PWR5X:
11510   case PPC::DIR_PWR6:
11511   case PPC::DIR_PWR6X:
11512   case PPC::DIR_PWR7:
11513   case PPC::DIR_PWR8:
11514   case PPC::DIR_PWR9: {
11515     if (!ML)
11516       break;
11517 
11518     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11519 
11520     // For small loops (between 5 and 8 instructions), align to a 32-byte
11521     // boundary so that the entire loop fits in one instruction-cache line.
11522     uint64_t LoopSize = 0;
11523     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
11524       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
11525         LoopSize += TII->getInstSizeInBytes(*J);
11526         if (LoopSize > 32)
11527           break;
11528       }
11529 
11530     if (LoopSize > 16 && LoopSize <= 32)
11531       return 5;
11532 
11533     break;
11534   }
11535   }
11536 
11537   return TargetLowering::getPrefLoopAlignment(ML);
11538 }
11539 
11540 /// getConstraintType - Given a constraint, return the type of
11541 /// constraint it is for this target.
11542 PPCTargetLowering::ConstraintType
11543 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
11544   if (Constraint.size() == 1) {
11545     switch (Constraint[0]) {
11546     default: break;
11547     case 'b':
11548     case 'r':
11549     case 'f':
11550     case 'd':
11551     case 'v':
11552     case 'y':
11553       return C_RegisterClass;
11554     case 'Z':
11555       // FIXME: While Z does indicate a memory constraint, it specifically
11556       // indicates an r+r address (used in conjunction with the 'y' modifier
11557       // in the replacement string). Currently, we're forcing the base
11558       // register to be r0 in the asm printer (which is interpreted as zero)
11559       // and forming the complete address in the second register. This is
11560       // suboptimal.
11561       return C_Memory;
11562     }
11563   } else if (Constraint == "wc") { // individual CR bits.
11564     return C_RegisterClass;
11565   } else if (Constraint == "wa" || Constraint == "wd" ||
11566              Constraint == "wf" || Constraint == "ws") {
11567     return C_RegisterClass; // VSX registers.
11568   }
11569   return TargetLowering::getConstraintType(Constraint);
11570 }
11571 
11572 /// Examine constraint type and operand type and determine a weight value.
11573 /// This object must already have been set up with the operand type
11574 /// and the current alternative constraint selected.
11575 TargetLowering::ConstraintWeight
11576 PPCTargetLowering::getSingleConstraintMatchWeight(
11577     AsmOperandInfo &info, const char *constraint) const {
11578   ConstraintWeight weight = CW_Invalid;
11579   Value *CallOperandVal = info.CallOperandVal;
11580     // If we don't have a value, we can't do a match,
11581     // but allow it at the lowest weight.
11582   if (!CallOperandVal)
11583     return CW_Default;
11584   Type *type = CallOperandVal->getType();
11585 
11586   // Look at the constraint type.
11587   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
11588     return CW_Register; // an individual CR bit.
11589   else if ((StringRef(constraint) == "wa" ||
11590             StringRef(constraint) == "wd" ||
11591             StringRef(constraint) == "wf") &&
11592            type->isVectorTy())
11593     return CW_Register;
11594   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
11595     return CW_Register;
11596 
11597   switch (*constraint) {
11598   default:
11599     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
11600     break;
11601   case 'b':
11602     if (type->isIntegerTy())
11603       weight = CW_Register;
11604     break;
11605   case 'f':
11606     if (type->isFloatTy())
11607       weight = CW_Register;
11608     break;
11609   case 'd':
11610     if (type->isDoubleTy())
11611       weight = CW_Register;
11612     break;
11613   case 'v':
11614     if (type->isVectorTy())
11615       weight = CW_Register;
11616     break;
11617   case 'y':
11618     weight = CW_Register;
11619     break;
11620   case 'Z':
11621     weight = CW_Memory;
11622     break;
11623   }
11624   return weight;
11625 }
11626 
11627 std::pair<unsigned, const TargetRegisterClass *>
11628 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
11629                                                 StringRef Constraint,
11630                                                 MVT VT) const {
11631   if (Constraint.size() == 1) {
11632     // GCC RS6000 Constraint Letters
11633     switch (Constraint[0]) {
11634     case 'b':   // R1-R31
11635       if (VT == MVT::i64 && Subtarget.isPPC64())
11636         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
11637       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
11638     case 'r':   // R0-R31
11639       if (VT == MVT::i64 && Subtarget.isPPC64())
11640         return std::make_pair(0U, &PPC::G8RCRegClass);
11641       return std::make_pair(0U, &PPC::GPRCRegClass);
11642     // 'd' and 'f' constraints are both defined to be "the floating point
11643     // registers", where one is for 32-bit and the other for 64-bit. We don't
11644     // really care overly much here so just give them all the same reg classes.
11645     case 'd':
11646     case 'f':
11647       if (VT == MVT::f32 || VT == MVT::i32)
11648         return std::make_pair(0U, &PPC::F4RCRegClass);
11649       if (VT == MVT::f64 || VT == MVT::i64)
11650         return std::make_pair(0U, &PPC::F8RCRegClass);
11651       if (VT == MVT::v4f64 && Subtarget.hasQPX())
11652         return std::make_pair(0U, &PPC::QFRCRegClass);
11653       if (VT == MVT::v4f32 && Subtarget.hasQPX())
11654         return std::make_pair(0U, &PPC::QSRCRegClass);
11655       break;
11656     case 'v':
11657       if (VT == MVT::v4f64 && Subtarget.hasQPX())
11658         return std::make_pair(0U, &PPC::QFRCRegClass);
11659       if (VT == MVT::v4f32 && Subtarget.hasQPX())
11660         return std::make_pair(0U, &PPC::QSRCRegClass);
11661       if (Subtarget.hasAltivec())
11662         return std::make_pair(0U, &PPC::VRRCRegClass);
11663     case 'y':   // crrc
11664       return std::make_pair(0U, &PPC::CRRCRegClass);
11665     }
11666   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
11667     // An individual CR bit.
11668     return std::make_pair(0U, &PPC::CRBITRCRegClass);
11669   } else if ((Constraint == "wa" || Constraint == "wd" ||
11670              Constraint == "wf") && Subtarget.hasVSX()) {
11671     return std::make_pair(0U, &PPC::VSRCRegClass);
11672   } else if (Constraint == "ws" && Subtarget.hasVSX()) {
11673     if (VT == MVT::f32 && Subtarget.hasP8Vector())
11674       return std::make_pair(0U, &PPC::VSSRCRegClass);
11675     else
11676       return std::make_pair(0U, &PPC::VSFRCRegClass);
11677   }
11678 
11679   std::pair<unsigned, const TargetRegisterClass *> R =
11680       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
11681 
11682   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
11683   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
11684   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
11685   // register.
11686   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
11687   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
11688   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
11689       PPC::GPRCRegClass.contains(R.first))
11690     return std::make_pair(TRI->getMatchingSuperReg(R.first,
11691                             PPC::sub_32, &PPC::G8RCRegClass),
11692                           &PPC::G8RCRegClass);
11693 
11694   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
11695   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
11696     R.first = PPC::CR0;
11697     R.second = &PPC::CRRCRegClass;
11698   }
11699 
11700   return R;
11701 }
11702 
11703 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
11704 /// vector.  If it is invalid, don't add anything to Ops.
11705 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
11706                                                      std::string &Constraint,
11707                                                      std::vector<SDValue>&Ops,
11708                                                      SelectionDAG &DAG) const {
11709   SDValue Result;
11710 
11711   // Only support length 1 constraints.
11712   if (Constraint.length() > 1) return;
11713 
11714   char Letter = Constraint[0];
11715   switch (Letter) {
11716   default: break;
11717   case 'I':
11718   case 'J':
11719   case 'K':
11720   case 'L':
11721   case 'M':
11722   case 'N':
11723   case 'O':
11724   case 'P': {
11725     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
11726     if (!CST) return; // Must be an immediate to match.
11727     SDLoc dl(Op);
11728     int64_t Value = CST->getSExtValue();
11729     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
11730                          // numbers are printed as such.
11731     switch (Letter) {
11732     default: llvm_unreachable("Unknown constraint letter!");
11733     case 'I':  // "I" is a signed 16-bit constant.
11734       if (isInt<16>(Value))
11735         Result = DAG.getTargetConstant(Value, dl, TCVT);
11736       break;
11737     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
11738       if (isShiftedUInt<16, 16>(Value))
11739         Result = DAG.getTargetConstant(Value, dl, TCVT);
11740       break;
11741     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
11742       if (isShiftedInt<16, 16>(Value))
11743         Result = DAG.getTargetConstant(Value, dl, TCVT);
11744       break;
11745     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
11746       if (isUInt<16>(Value))
11747         Result = DAG.getTargetConstant(Value, dl, TCVT);
11748       break;
11749     case 'M':  // "M" is a constant that is greater than 31.
11750       if (Value > 31)
11751         Result = DAG.getTargetConstant(Value, dl, TCVT);
11752       break;
11753     case 'N':  // "N" is a positive constant that is an exact power of two.
11754       if (Value > 0 && isPowerOf2_64(Value))
11755         Result = DAG.getTargetConstant(Value, dl, TCVT);
11756       break;
11757     case 'O':  // "O" is the constant zero.
11758       if (Value == 0)
11759         Result = DAG.getTargetConstant(Value, dl, TCVT);
11760       break;
11761     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
11762       if (isInt<16>(-Value))
11763         Result = DAG.getTargetConstant(Value, dl, TCVT);
11764       break;
11765     }
11766     break;
11767   }
11768   }
11769 
11770   if (Result.getNode()) {
11771     Ops.push_back(Result);
11772     return;
11773   }
11774 
11775   // Handle standard constraint letters.
11776   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
11777 }
11778 
11779 // isLegalAddressingMode - Return true if the addressing mode represented
11780 // by AM is legal for this target, for a load/store of the specified type.
11781 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
11782                                               const AddrMode &AM, Type *Ty,
11783                                               unsigned AS) const {
11784   // PPC does not allow r+i addressing modes for vectors!
11785   if (Ty->isVectorTy() && AM.BaseOffs != 0)
11786     return false;
11787 
11788   // PPC allows a sign-extended 16-bit immediate field.
11789   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
11790     return false;
11791 
11792   // No global is ever allowed as a base.
11793   if (AM.BaseGV)
11794     return false;
11795 
11796   // PPC only support r+r,
11797   switch (AM.Scale) {
11798   case 0:  // "r+i" or just "i", depending on HasBaseReg.
11799     break;
11800   case 1:
11801     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
11802       return false;
11803     // Otherwise we have r+r or r+i.
11804     break;
11805   case 2:
11806     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
11807       return false;
11808     // Allow 2*r as r+r.
11809     break;
11810   default:
11811     // No other scales are supported.
11812     return false;
11813   }
11814 
11815   return true;
11816 }
11817 
11818 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
11819                                            SelectionDAG &DAG) const {
11820   MachineFunction &MF = DAG.getMachineFunction();
11821   MachineFrameInfo &MFI = MF.getFrameInfo();
11822   MFI.setReturnAddressIsTaken(true);
11823 
11824   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
11825     return SDValue();
11826 
11827   SDLoc dl(Op);
11828   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11829 
11830   // Make sure the function does not optimize away the store of the RA to
11831   // the stack.
11832   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
11833   FuncInfo->setLRStoreRequired();
11834   bool isPPC64 = Subtarget.isPPC64();
11835   auto PtrVT = getPointerTy(MF.getDataLayout());
11836 
11837   if (Depth > 0) {
11838     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
11839     SDValue Offset =
11840         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
11841                         isPPC64 ? MVT::i64 : MVT::i32);
11842     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
11843                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
11844                        MachinePointerInfo());
11845   }
11846 
11847   // Just load the return address off the stack.
11848   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
11849   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
11850                      MachinePointerInfo());
11851 }
11852 
11853 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
11854                                           SelectionDAG &DAG) const {
11855   SDLoc dl(Op);
11856   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11857 
11858   MachineFunction &MF = DAG.getMachineFunction();
11859   MachineFrameInfo &MFI = MF.getFrameInfo();
11860   MFI.setFrameAddressIsTaken(true);
11861 
11862   EVT PtrVT = getPointerTy(MF.getDataLayout());
11863   bool isPPC64 = PtrVT == MVT::i64;
11864 
11865   // Naked functions never have a frame pointer, and so we use r1. For all
11866   // other functions, this decision must be delayed until during PEI.
11867   unsigned FrameReg;
11868   if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
11869     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
11870   else
11871     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
11872 
11873   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
11874                                          PtrVT);
11875   while (Depth--)
11876     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
11877                             FrameAddr, MachinePointerInfo());
11878   return FrameAddr;
11879 }
11880 
11881 // FIXME? Maybe this could be a TableGen attribute on some registers and
11882 // this table could be generated automatically from RegInfo.
11883 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
11884                                               SelectionDAG &DAG) const {
11885   bool isPPC64 = Subtarget.isPPC64();
11886   bool isDarwinABI = Subtarget.isDarwinABI();
11887 
11888   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
11889       (!isPPC64 && VT != MVT::i32))
11890     report_fatal_error("Invalid register global variable type");
11891 
11892   bool is64Bit = isPPC64 && VT == MVT::i64;
11893   unsigned Reg = StringSwitch<unsigned>(RegName)
11894                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
11895                    .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
11896                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
11897                                   (is64Bit ? PPC::X13 : PPC::R13))
11898                    .Default(0);
11899 
11900   if (Reg)
11901     return Reg;
11902   report_fatal_error("Invalid register name global variable");
11903 }
11904 
11905 bool
11906 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
11907   // The PowerPC target isn't yet aware of offsets.
11908   return false;
11909 }
11910 
11911 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
11912                                            const CallInst &I,
11913                                            unsigned Intrinsic) const {
11914 
11915   switch (Intrinsic) {
11916   case Intrinsic::ppc_qpx_qvlfd:
11917   case Intrinsic::ppc_qpx_qvlfs:
11918   case Intrinsic::ppc_qpx_qvlfcd:
11919   case Intrinsic::ppc_qpx_qvlfcs:
11920   case Intrinsic::ppc_qpx_qvlfiwa:
11921   case Intrinsic::ppc_qpx_qvlfiwz:
11922   case Intrinsic::ppc_altivec_lvx:
11923   case Intrinsic::ppc_altivec_lvxl:
11924   case Intrinsic::ppc_altivec_lvebx:
11925   case Intrinsic::ppc_altivec_lvehx:
11926   case Intrinsic::ppc_altivec_lvewx:
11927   case Intrinsic::ppc_vsx_lxvd2x:
11928   case Intrinsic::ppc_vsx_lxvw4x: {
11929     EVT VT;
11930     switch (Intrinsic) {
11931     case Intrinsic::ppc_altivec_lvebx:
11932       VT = MVT::i8;
11933       break;
11934     case Intrinsic::ppc_altivec_lvehx:
11935       VT = MVT::i16;
11936       break;
11937     case Intrinsic::ppc_altivec_lvewx:
11938       VT = MVT::i32;
11939       break;
11940     case Intrinsic::ppc_vsx_lxvd2x:
11941       VT = MVT::v2f64;
11942       break;
11943     case Intrinsic::ppc_qpx_qvlfd:
11944       VT = MVT::v4f64;
11945       break;
11946     case Intrinsic::ppc_qpx_qvlfs:
11947       VT = MVT::v4f32;
11948       break;
11949     case Intrinsic::ppc_qpx_qvlfcd:
11950       VT = MVT::v2f64;
11951       break;
11952     case Intrinsic::ppc_qpx_qvlfcs:
11953       VT = MVT::v2f32;
11954       break;
11955     default:
11956       VT = MVT::v4i32;
11957       break;
11958     }
11959 
11960     Info.opc = ISD::INTRINSIC_W_CHAIN;
11961     Info.memVT = VT;
11962     Info.ptrVal = I.getArgOperand(0);
11963     Info.offset = -VT.getStoreSize()+1;
11964     Info.size = 2*VT.getStoreSize()-1;
11965     Info.align = 1;
11966     Info.vol = false;
11967     Info.readMem = true;
11968     Info.writeMem = false;
11969     return true;
11970   }
11971   case Intrinsic::ppc_qpx_qvlfda:
11972   case Intrinsic::ppc_qpx_qvlfsa:
11973   case Intrinsic::ppc_qpx_qvlfcda:
11974   case Intrinsic::ppc_qpx_qvlfcsa:
11975   case Intrinsic::ppc_qpx_qvlfiwaa:
11976   case Intrinsic::ppc_qpx_qvlfiwza: {
11977     EVT VT;
11978     switch (Intrinsic) {
11979     case Intrinsic::ppc_qpx_qvlfda:
11980       VT = MVT::v4f64;
11981       break;
11982     case Intrinsic::ppc_qpx_qvlfsa:
11983       VT = MVT::v4f32;
11984       break;
11985     case Intrinsic::ppc_qpx_qvlfcda:
11986       VT = MVT::v2f64;
11987       break;
11988     case Intrinsic::ppc_qpx_qvlfcsa:
11989       VT = MVT::v2f32;
11990       break;
11991     default:
11992       VT = MVT::v4i32;
11993       break;
11994     }
11995 
11996     Info.opc = ISD::INTRINSIC_W_CHAIN;
11997     Info.memVT = VT;
11998     Info.ptrVal = I.getArgOperand(0);
11999     Info.offset = 0;
12000     Info.size = VT.getStoreSize();
12001     Info.align = 1;
12002     Info.vol = false;
12003     Info.readMem = true;
12004     Info.writeMem = false;
12005     return true;
12006   }
12007   case Intrinsic::ppc_qpx_qvstfd:
12008   case Intrinsic::ppc_qpx_qvstfs:
12009   case Intrinsic::ppc_qpx_qvstfcd:
12010   case Intrinsic::ppc_qpx_qvstfcs:
12011   case Intrinsic::ppc_qpx_qvstfiw:
12012   case Intrinsic::ppc_altivec_stvx:
12013   case Intrinsic::ppc_altivec_stvxl:
12014   case Intrinsic::ppc_altivec_stvebx:
12015   case Intrinsic::ppc_altivec_stvehx:
12016   case Intrinsic::ppc_altivec_stvewx:
12017   case Intrinsic::ppc_vsx_stxvd2x:
12018   case Intrinsic::ppc_vsx_stxvw4x: {
12019     EVT VT;
12020     switch (Intrinsic) {
12021     case Intrinsic::ppc_altivec_stvebx:
12022       VT = MVT::i8;
12023       break;
12024     case Intrinsic::ppc_altivec_stvehx:
12025       VT = MVT::i16;
12026       break;
12027     case Intrinsic::ppc_altivec_stvewx:
12028       VT = MVT::i32;
12029       break;
12030     case Intrinsic::ppc_vsx_stxvd2x:
12031       VT = MVT::v2f64;
12032       break;
12033     case Intrinsic::ppc_qpx_qvstfd:
12034       VT = MVT::v4f64;
12035       break;
12036     case Intrinsic::ppc_qpx_qvstfs:
12037       VT = MVT::v4f32;
12038       break;
12039     case Intrinsic::ppc_qpx_qvstfcd:
12040       VT = MVT::v2f64;
12041       break;
12042     case Intrinsic::ppc_qpx_qvstfcs:
12043       VT = MVT::v2f32;
12044       break;
12045     default:
12046       VT = MVT::v4i32;
12047       break;
12048     }
12049 
12050     Info.opc = ISD::INTRINSIC_VOID;
12051     Info.memVT = VT;
12052     Info.ptrVal = I.getArgOperand(1);
12053     Info.offset = -VT.getStoreSize()+1;
12054     Info.size = 2*VT.getStoreSize()-1;
12055     Info.align = 1;
12056     Info.vol = false;
12057     Info.readMem = false;
12058     Info.writeMem = true;
12059     return true;
12060   }
12061   case Intrinsic::ppc_qpx_qvstfda:
12062   case Intrinsic::ppc_qpx_qvstfsa:
12063   case Intrinsic::ppc_qpx_qvstfcda:
12064   case Intrinsic::ppc_qpx_qvstfcsa:
12065   case Intrinsic::ppc_qpx_qvstfiwa: {
12066     EVT VT;
12067     switch (Intrinsic) {
12068     case Intrinsic::ppc_qpx_qvstfda:
12069       VT = MVT::v4f64;
12070       break;
12071     case Intrinsic::ppc_qpx_qvstfsa:
12072       VT = MVT::v4f32;
12073       break;
12074     case Intrinsic::ppc_qpx_qvstfcda:
12075       VT = MVT::v2f64;
12076       break;
12077     case Intrinsic::ppc_qpx_qvstfcsa:
12078       VT = MVT::v2f32;
12079       break;
12080     default:
12081       VT = MVT::v4i32;
12082       break;
12083     }
12084 
12085     Info.opc = ISD::INTRINSIC_VOID;
12086     Info.memVT = VT;
12087     Info.ptrVal = I.getArgOperand(1);
12088     Info.offset = 0;
12089     Info.size = VT.getStoreSize();
12090     Info.align = 1;
12091     Info.vol = false;
12092     Info.readMem = false;
12093     Info.writeMem = true;
12094     return true;
12095   }
12096   default:
12097     break;
12098   }
12099 
12100   return false;
12101 }
12102 
12103 /// getOptimalMemOpType - Returns the target specific optimal type for load
12104 /// and store operations as a result of memset, memcpy, and memmove
12105 /// lowering. If DstAlign is zero that means it's safe to destination
12106 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
12107 /// means there isn't a need to check it against alignment requirement,
12108 /// probably because the source does not need to be loaded. If 'IsMemset' is
12109 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
12110 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
12111 /// source is constant so it does not need to be loaded.
12112 /// It returns EVT::Other if the type should be determined using generic
12113 /// target-independent logic.
12114 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
12115                                            unsigned DstAlign, unsigned SrcAlign,
12116                                            bool IsMemset, bool ZeroMemset,
12117                                            bool MemcpyStrSrc,
12118                                            MachineFunction &MF) const {
12119   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
12120     const Function *F = MF.getFunction();
12121     // When expanding a memset, require at least two QPX instructions to cover
12122     // the cost of loading the value to be stored from the constant pool.
12123     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
12124        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
12125         !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
12126       return MVT::v4f64;
12127     }
12128 
12129     // We should use Altivec/VSX loads and stores when available. For unaligned
12130     // addresses, unaligned VSX loads are only fast starting with the P8.
12131     if (Subtarget.hasAltivec() && Size >= 16 &&
12132         (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
12133          ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
12134       return MVT::v4i32;
12135   }
12136 
12137   if (Subtarget.isPPC64()) {
12138     return MVT::i64;
12139   }
12140 
12141   return MVT::i32;
12142 }
12143 
12144 /// \brief Returns true if it is beneficial to convert a load of a constant
12145 /// to just the constant itself.
12146 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
12147                                                           Type *Ty) const {
12148   assert(Ty->isIntegerTy());
12149 
12150   unsigned BitSize = Ty->getPrimitiveSizeInBits();
12151   return !(BitSize == 0 || BitSize > 64);
12152 }
12153 
12154 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
12155   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
12156     return false;
12157   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
12158   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
12159   return NumBits1 == 64 && NumBits2 == 32;
12160 }
12161 
12162 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
12163   if (!VT1.isInteger() || !VT2.isInteger())
12164     return false;
12165   unsigned NumBits1 = VT1.getSizeInBits();
12166   unsigned NumBits2 = VT2.getSizeInBits();
12167   return NumBits1 == 64 && NumBits2 == 32;
12168 }
12169 
12170 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
12171   // Generally speaking, zexts are not free, but they are free when they can be
12172   // folded with other operations.
12173   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
12174     EVT MemVT = LD->getMemoryVT();
12175     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
12176          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
12177         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
12178          LD->getExtensionType() == ISD::ZEXTLOAD))
12179       return true;
12180   }
12181 
12182   // FIXME: Add other cases...
12183   //  - 32-bit shifts with a zext to i64
12184   //  - zext after ctlz, bswap, etc.
12185   //  - zext after and by a constant mask
12186 
12187   return TargetLowering::isZExtFree(Val, VT2);
12188 }
12189 
12190 bool PPCTargetLowering::isFPExtFree(EVT VT) const {
12191   assert(VT.isFloatingPoint());
12192   return true;
12193 }
12194 
12195 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
12196   return isInt<16>(Imm) || isUInt<16>(Imm);
12197 }
12198 
12199 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
12200   return isInt<16>(Imm) || isUInt<16>(Imm);
12201 }
12202 
12203 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
12204                                                        unsigned,
12205                                                        unsigned,
12206                                                        bool *Fast) const {
12207   if (DisablePPCUnaligned)
12208     return false;
12209 
12210   // PowerPC supports unaligned memory access for simple non-vector types.
12211   // Although accessing unaligned addresses is not as efficient as accessing
12212   // aligned addresses, it is generally more efficient than manual expansion,
12213   // and generally only traps for software emulation when crossing page
12214   // boundaries.
12215 
12216   if (!VT.isSimple())
12217     return false;
12218 
12219   if (VT.getSimpleVT().isVector()) {
12220     if (Subtarget.hasVSX()) {
12221       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
12222           VT != MVT::v4f32 && VT != MVT::v4i32)
12223         return false;
12224     } else {
12225       return false;
12226     }
12227   }
12228 
12229   if (VT == MVT::ppcf128)
12230     return false;
12231 
12232   if (Fast)
12233     *Fast = true;
12234 
12235   return true;
12236 }
12237 
12238 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
12239   VT = VT.getScalarType();
12240 
12241   if (!VT.isSimple())
12242     return false;
12243 
12244   switch (VT.getSimpleVT().SimpleTy) {
12245   case MVT::f32:
12246   case MVT::f64:
12247     return true;
12248   default:
12249     break;
12250   }
12251 
12252   return false;
12253 }
12254 
12255 const MCPhysReg *
12256 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
12257   // LR is a callee-save register, but we must treat it as clobbered by any call
12258   // site. Hence we include LR in the scratch registers, which are in turn added
12259   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
12260   // to CTR, which is used by any indirect call.
12261   static const MCPhysReg ScratchRegs[] = {
12262     PPC::X12, PPC::LR8, PPC::CTR8, 0
12263   };
12264 
12265   return ScratchRegs;
12266 }
12267 
12268 unsigned PPCTargetLowering::getExceptionPointerRegister(
12269     const Constant *PersonalityFn) const {
12270   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
12271 }
12272 
12273 unsigned PPCTargetLowering::getExceptionSelectorRegister(
12274     const Constant *PersonalityFn) const {
12275   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
12276 }
12277 
12278 bool
12279 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
12280                      EVT VT , unsigned DefinedValues) const {
12281   if (VT == MVT::v2i64)
12282     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
12283 
12284   if (Subtarget.hasVSX() || Subtarget.hasQPX())
12285     return true;
12286 
12287   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
12288 }
12289 
12290 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
12291   if (DisableILPPref || Subtarget.enableMachineScheduler())
12292     return TargetLowering::getSchedulingPreference(N);
12293 
12294   return Sched::ILP;
12295 }
12296 
12297 // Create a fast isel object.
12298 FastISel *
12299 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
12300                                   const TargetLibraryInfo *LibInfo) const {
12301   return PPC::createFastISel(FuncInfo, LibInfo);
12302 }
12303 
12304 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
12305   if (Subtarget.isDarwinABI()) return;
12306   if (!Subtarget.isPPC64()) return;
12307 
12308   // Update IsSplitCSR in PPCFunctionInfo
12309   PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
12310   PFI->setIsSplitCSR(true);
12311 }
12312 
12313 void PPCTargetLowering::insertCopiesSplitCSR(
12314   MachineBasicBlock *Entry,
12315   const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
12316   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12317   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
12318   if (!IStart)
12319     return;
12320 
12321   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12322   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
12323   MachineBasicBlock::iterator MBBI = Entry->begin();
12324   for (const MCPhysReg *I = IStart; *I; ++I) {
12325     const TargetRegisterClass *RC = nullptr;
12326     if (PPC::G8RCRegClass.contains(*I))
12327       RC = &PPC::G8RCRegClass;
12328     else if (PPC::F8RCRegClass.contains(*I))
12329       RC = &PPC::F8RCRegClass;
12330     else if (PPC::CRRCRegClass.contains(*I))
12331       RC = &PPC::CRRCRegClass;
12332     else if (PPC::VRRCRegClass.contains(*I))
12333       RC = &PPC::VRRCRegClass;
12334     else
12335       llvm_unreachable("Unexpected register class in CSRsViaCopy!");
12336 
12337     unsigned NewVR = MRI->createVirtualRegister(RC);
12338     // Create copy from CSR to a virtual register.
12339     // FIXME: this currently does not emit CFI pseudo-instructions, it works
12340     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
12341     // nounwind. If we want to generalize this later, we may need to emit
12342     // CFI pseudo-instructions.
12343     assert(Entry->getParent()->getFunction()->hasFnAttribute(
12344              Attribute::NoUnwind) &&
12345            "Function should be nounwind in insertCopiesSplitCSR!");
12346     Entry->addLiveIn(*I);
12347     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
12348       .addReg(*I);
12349 
12350     // Insert the copy-back instructions right before the terminator
12351     for (auto *Exit : Exits)
12352       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
12353               TII->get(TargetOpcode::COPY), *I)
12354         .addReg(NewVR);
12355   }
12356 }
12357 
12358 // Override to enable LOAD_STACK_GUARD lowering on Linux.
12359 bool PPCTargetLowering::useLoadStackGuardNode() const {
12360   if (!Subtarget.isTargetLinux())
12361     return TargetLowering::useLoadStackGuardNode();
12362   return true;
12363 }
12364 
12365 // Override to disable global variable loading on Linux.
12366 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
12367   if (!Subtarget.isTargetLinux())
12368     return TargetLowering::insertSSPDeclarations(M);
12369 }
12370