1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PPCISelLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPCISelLowering.h"
15 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPC.h"
17 #include "PPCCCState.h"
18 #include "PPCCallingConv.h"
19 #include "PPCFrameLowering.h"
20 #include "PPCInstrInfo.h"
21 #include "PPCMachineFunctionInfo.h"
22 #include "PPCPerfectShuffle.h"
23 #include "PPCRegisterInfo.h"
24 #include "PPCSubtarget.h"
25 #include "PPCTargetMachine.h"
26 #include "llvm/ADT/APFloat.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/None.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/ADT/StringSwitch.h"
38 #include "llvm/CodeGen/CallingConvLower.h"
39 #include "llvm/CodeGen/ISDOpcodes.h"
40 #include "llvm/CodeGen/MachineBasicBlock.h"
41 #include "llvm/CodeGen/MachineFrameInfo.h"
42 #include "llvm/CodeGen/MachineFunction.h"
43 #include "llvm/CodeGen/MachineInstr.h"
44 #include "llvm/CodeGen/MachineInstrBuilder.h"
45 #include "llvm/CodeGen/MachineJumpTableInfo.h"
46 #include "llvm/CodeGen/MachineLoopInfo.h"
47 #include "llvm/CodeGen/MachineMemOperand.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/MachineValueType.h"
51 #include "llvm/CodeGen/RuntimeLibcalls.h"
52 #include "llvm/CodeGen/SelectionDAG.h"
53 #include "llvm/CodeGen/SelectionDAGNodes.h"
54 #include "llvm/CodeGen/ValueTypes.h"
55 #include "llvm/IR/CallSite.h"
56 #include "llvm/IR/CallingConv.h"
57 #include "llvm/IR/Constant.h"
58 #include "llvm/IR/Constants.h"
59 #include "llvm/IR/DataLayout.h"
60 #include "llvm/IR/DebugLoc.h"
61 #include "llvm/IR/DerivedTypes.h"
62 #include "llvm/IR/Function.h"
63 #include "llvm/IR/GlobalValue.h"
64 #include "llvm/IR/IRBuilder.h"
65 #include "llvm/IR/Instructions.h"
66 #include "llvm/IR/Intrinsics.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/IR/Type.h"
69 #include "llvm/IR/Use.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/MC/MCExpr.h"
72 #include "llvm/MC/MCRegisterInfo.h"
73 #include "llvm/Support/AtomicOrdering.h"
74 #include "llvm/Support/BranchProbability.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/CodeGen.h"
77 #include "llvm/Support/CommandLine.h"
78 #include "llvm/Support/Compiler.h"
79 #include "llvm/Support/Debug.h"
80 #include "llvm/Support/ErrorHandling.h"
81 #include "llvm/Support/Format.h"
82 #include "llvm/Support/KnownBits.h"
83 #include "llvm/Support/MathExtras.h"
84 #include "llvm/Support/raw_ostream.h"
85 #include "llvm/Target/TargetInstrInfo.h"
86 #include "llvm/Target/TargetLowering.h"
87 #include "llvm/Target/TargetMachine.h"
88 #include "llvm/Target/TargetOptions.h"
89 #include "llvm/Target/TargetRegisterInfo.h"
90 #include <algorithm>
91 #include <cassert>
92 #include <cstdint>
93 #include <iterator>
94 #include <list>
95 #include <utility>
96 #include <vector>
97 
98 using namespace llvm;
99 
100 #define DEBUG_TYPE "ppc-lowering"
101 
102 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
103 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
104 
105 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
106 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
107 
108 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
109 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
110 
111 static cl::opt<bool> DisableSCO("disable-ppc-sco",
112 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
113 
114 STATISTIC(NumTailCalls, "Number of tail calls");
115 STATISTIC(NumSiblingCalls, "Number of sibling calls");
116 
117 // FIXME: Remove this once the bug has been fixed!
118 extern cl::opt<bool> ANDIGlueBug;
119 
120 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
121                                      const PPCSubtarget &STI)
122     : TargetLowering(TM), Subtarget(STI) {
123   // Use _setjmp/_longjmp instead of setjmp/longjmp.
124   setUseUnderscoreSetJmp(true);
125   setUseUnderscoreLongJmp(true);
126 
127   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
128   // arguments are at least 4/8 bytes aligned.
129   bool isPPC64 = Subtarget.isPPC64();
130   setMinStackArgumentAlignment(isPPC64 ? 8:4);
131 
132   // Set up the register classes.
133   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
134   if (!useSoftFloat()) {
135     addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
136     addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
137   }
138 
139   // Match BITREVERSE to customized fast code sequence in the td file.
140   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
141 
142   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
143   for (MVT VT : MVT::integer_valuetypes()) {
144     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
145     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
146   }
147 
148   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
149 
150   // PowerPC has pre-inc load and store's.
151   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
152   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
153   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
154   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
155   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
156   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
157   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
158   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
159   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
160   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
161   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
162   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
163   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
164   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
165 
166   if (Subtarget.useCRBits()) {
167     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
168 
169     if (isPPC64 || Subtarget.hasFPCVT()) {
170       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
171       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
172                          isPPC64 ? MVT::i64 : MVT::i32);
173       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
174       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
175                         isPPC64 ? MVT::i64 : MVT::i32);
176     } else {
177       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
178       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
179     }
180 
181     // PowerPC does not support direct load/store of condition registers.
182     setOperationAction(ISD::LOAD, MVT::i1, Custom);
183     setOperationAction(ISD::STORE, MVT::i1, Custom);
184 
185     // FIXME: Remove this once the ANDI glue bug is fixed:
186     if (ANDIGlueBug)
187       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
188 
189     for (MVT VT : MVT::integer_valuetypes()) {
190       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
191       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
192       setTruncStoreAction(VT, MVT::i1, Expand);
193     }
194 
195     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
196   }
197 
198   // This is used in the ppcf128->int sequence.  Note it has different semantics
199   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
200   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
201 
202   // We do not currently implement these libm ops for PowerPC.
203   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
204   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
205   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
206   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
207   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
208   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
209 
210   // PowerPC has no SREM/UREM instructions unless we are on P9
211   // On P9 we may use a hardware instruction to compute the remainder.
212   // The instructions are not legalized directly because in the cases where the
213   // result of both the remainder and the division is required it is more
214   // efficient to compute the remainder from the result of the division rather
215   // than use the remainder instruction.
216   if (Subtarget.isISA3_0()) {
217     setOperationAction(ISD::SREM, MVT::i32, Custom);
218     setOperationAction(ISD::UREM, MVT::i32, Custom);
219     setOperationAction(ISD::SREM, MVT::i64, Custom);
220     setOperationAction(ISD::UREM, MVT::i64, Custom);
221   } else {
222     setOperationAction(ISD::SREM, MVT::i32, Expand);
223     setOperationAction(ISD::UREM, MVT::i32, Expand);
224     setOperationAction(ISD::SREM, MVT::i64, Expand);
225     setOperationAction(ISD::UREM, MVT::i64, Expand);
226   }
227 
228   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
229   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
230   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
231   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
232   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
233   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
234   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
235   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
236   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
237 
238   // We don't support sin/cos/sqrt/fmod/pow
239   setOperationAction(ISD::FSIN , MVT::f64, Expand);
240   setOperationAction(ISD::FCOS , MVT::f64, Expand);
241   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
242   setOperationAction(ISD::FREM , MVT::f64, Expand);
243   setOperationAction(ISD::FPOW , MVT::f64, Expand);
244   setOperationAction(ISD::FMA  , MVT::f64, Legal);
245   setOperationAction(ISD::FSIN , MVT::f32, Expand);
246   setOperationAction(ISD::FCOS , MVT::f32, Expand);
247   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
248   setOperationAction(ISD::FREM , MVT::f32, Expand);
249   setOperationAction(ISD::FPOW , MVT::f32, Expand);
250   setOperationAction(ISD::FMA  , MVT::f32, Legal);
251 
252   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
253 
254   // If we're enabling GP optimizations, use hardware square root
255   if (!Subtarget.hasFSQRT() &&
256       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
257         Subtarget.hasFRE()))
258     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
259 
260   if (!Subtarget.hasFSQRT() &&
261       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
262         Subtarget.hasFRES()))
263     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
264 
265   if (Subtarget.hasFCPSGN()) {
266     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
267     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
268   } else {
269     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
270     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
271   }
272 
273   if (Subtarget.hasFPRND()) {
274     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
275     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
276     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
277     setOperationAction(ISD::FROUND, MVT::f64, Legal);
278 
279     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
280     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
281     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
282     setOperationAction(ISD::FROUND, MVT::f32, Legal);
283   }
284 
285   // PowerPC does not have BSWAP
286   // CTPOP or CTTZ were introduced in P8/P9 respectivelly
287   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
288   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
289   if (Subtarget.isISA3_0()) {
290     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
291     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
292   } else {
293     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
294     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
295   }
296 
297   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
298     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
299     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
300   } else {
301     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
302     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
303   }
304 
305   // PowerPC does not have ROTR
306   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
307   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
308 
309   if (!Subtarget.useCRBits()) {
310     // PowerPC does not have Select
311     setOperationAction(ISD::SELECT, MVT::i32, Expand);
312     setOperationAction(ISD::SELECT, MVT::i64, Expand);
313     setOperationAction(ISD::SELECT, MVT::f32, Expand);
314     setOperationAction(ISD::SELECT, MVT::f64, Expand);
315   }
316 
317   // PowerPC wants to turn select_cc of FP into fsel when possible.
318   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
319   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
320 
321   // PowerPC wants to optimize integer setcc a bit
322   if (!Subtarget.useCRBits())
323     setOperationAction(ISD::SETCC, MVT::i32, Custom);
324 
325   // PowerPC does not have BRCOND which requires SetCC
326   if (!Subtarget.useCRBits())
327     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
328 
329   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
330 
331   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
332   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
333 
334   // PowerPC does not have [U|S]INT_TO_FP
335   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
336   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
337 
338   if (Subtarget.hasDirectMove() && isPPC64) {
339     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
340     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
341     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
342     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
343   } else {
344     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
345     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
346     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
347     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
348   }
349 
350   // We cannot sextinreg(i1).  Expand to shifts.
351   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
352 
353   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
354   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
355   // support continuation, user-level threading, and etc.. As a result, no
356   // other SjLj exception interfaces are implemented and please don't build
357   // your own exception handling based on them.
358   // LLVM/Clang supports zero-cost DWARF exception handling.
359   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
360   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
361 
362   // We want to legalize GlobalAddress and ConstantPool nodes into the
363   // appropriate instructions to materialize the address.
364   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
365   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
366   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
367   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
368   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
369   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
370   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
371   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
372   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
373   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
374 
375   // TRAP is legal.
376   setOperationAction(ISD::TRAP, MVT::Other, Legal);
377 
378   // TRAMPOLINE is custom lowered.
379   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
380   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
381 
382   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
383   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
384 
385   if (Subtarget.isSVR4ABI()) {
386     if (isPPC64) {
387       // VAARG always uses double-word chunks, so promote anything smaller.
388       setOperationAction(ISD::VAARG, MVT::i1, Promote);
389       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
390       setOperationAction(ISD::VAARG, MVT::i8, Promote);
391       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
392       setOperationAction(ISD::VAARG, MVT::i16, Promote);
393       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
394       setOperationAction(ISD::VAARG, MVT::i32, Promote);
395       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
396       setOperationAction(ISD::VAARG, MVT::Other, Expand);
397     } else {
398       // VAARG is custom lowered with the 32-bit SVR4 ABI.
399       setOperationAction(ISD::VAARG, MVT::Other, Custom);
400       setOperationAction(ISD::VAARG, MVT::i64, Custom);
401     }
402   } else
403     setOperationAction(ISD::VAARG, MVT::Other, Expand);
404 
405   if (Subtarget.isSVR4ABI() && !isPPC64)
406     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
407     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
408   else
409     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
410 
411   // Use the default implementation.
412   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
413   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
414   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
415   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
416   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
417   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
418   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
419   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
420   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
421 
422   // We want to custom lower some of our intrinsics.
423   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
424 
425   // To handle counter-based loop conditions.
426   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
427 
428   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
429   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
430   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
431   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
432 
433   // Comparisons that require checking two conditions.
434   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
435   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
436   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
437   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
438   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
439   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
440   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
441   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
442   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
443   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
444   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
445   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
446 
447   if (Subtarget.has64BitSupport()) {
448     // They also have instructions for converting between i64 and fp.
449     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
450     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
451     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
452     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
453     // This is just the low 32 bits of a (signed) fp->i64 conversion.
454     // We cannot do this with Promote because i64 is not a legal type.
455     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
456 
457     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
458       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
459   } else {
460     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
461     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
462   }
463 
464   // With the instructions enabled under FPCVT, we can do everything.
465   if (Subtarget.hasFPCVT()) {
466     if (Subtarget.has64BitSupport()) {
467       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
468       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
469       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
470       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
471     }
472 
473     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
474     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
475     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
476     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
477   }
478 
479   if (Subtarget.use64BitRegs()) {
480     // 64-bit PowerPC implementations can support i64 types directly
481     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
482     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
483     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
484     // 64-bit PowerPC wants to expand i128 shifts itself.
485     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
486     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
487     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
488   } else {
489     // 32-bit PowerPC wants to expand i64 shifts itself.
490     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
491     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
492     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
493   }
494 
495   if (Subtarget.hasAltivec()) {
496     // First set operation action for all vector types to expand. Then we
497     // will selectively turn on ones that can be effectively codegen'd.
498     for (MVT VT : MVT::vector_valuetypes()) {
499       // add/sub are legal for all supported vector VT's.
500       setOperationAction(ISD::ADD, VT, Legal);
501       setOperationAction(ISD::SUB, VT, Legal);
502 
503       // Vector instructions introduced in P8
504       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
505         setOperationAction(ISD::CTPOP, VT, Legal);
506         setOperationAction(ISD::CTLZ, VT, Legal);
507       }
508       else {
509         setOperationAction(ISD::CTPOP, VT, Expand);
510         setOperationAction(ISD::CTLZ, VT, Expand);
511       }
512 
513       // Vector instructions introduced in P9
514       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
515         setOperationAction(ISD::CTTZ, VT, Legal);
516       else
517         setOperationAction(ISD::CTTZ, VT, Expand);
518 
519       // We promote all shuffles to v16i8.
520       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
521       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
522 
523       // We promote all non-typed operations to v4i32.
524       setOperationAction(ISD::AND   , VT, Promote);
525       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
526       setOperationAction(ISD::OR    , VT, Promote);
527       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
528       setOperationAction(ISD::XOR   , VT, Promote);
529       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
530       setOperationAction(ISD::LOAD  , VT, Promote);
531       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
532       setOperationAction(ISD::SELECT, VT, Promote);
533       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
534       setOperationAction(ISD::SELECT_CC, VT, Promote);
535       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
536       setOperationAction(ISD::STORE, VT, Promote);
537       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
538 
539       // No other operations are legal.
540       setOperationAction(ISD::MUL , VT, Expand);
541       setOperationAction(ISD::SDIV, VT, Expand);
542       setOperationAction(ISD::SREM, VT, Expand);
543       setOperationAction(ISD::UDIV, VT, Expand);
544       setOperationAction(ISD::UREM, VT, Expand);
545       setOperationAction(ISD::FDIV, VT, Expand);
546       setOperationAction(ISD::FREM, VT, Expand);
547       setOperationAction(ISD::FNEG, VT, Expand);
548       setOperationAction(ISD::FSQRT, VT, Expand);
549       setOperationAction(ISD::FLOG, VT, Expand);
550       setOperationAction(ISD::FLOG10, VT, Expand);
551       setOperationAction(ISD::FLOG2, VT, Expand);
552       setOperationAction(ISD::FEXP, VT, Expand);
553       setOperationAction(ISD::FEXP2, VT, Expand);
554       setOperationAction(ISD::FSIN, VT, Expand);
555       setOperationAction(ISD::FCOS, VT, Expand);
556       setOperationAction(ISD::FABS, VT, Expand);
557       setOperationAction(ISD::FFLOOR, VT, Expand);
558       setOperationAction(ISD::FCEIL,  VT, Expand);
559       setOperationAction(ISD::FTRUNC, VT, Expand);
560       setOperationAction(ISD::FRINT,  VT, Expand);
561       setOperationAction(ISD::FNEARBYINT, VT, Expand);
562       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
563       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
564       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
565       setOperationAction(ISD::MULHU, VT, Expand);
566       setOperationAction(ISD::MULHS, VT, Expand);
567       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
568       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
569       setOperationAction(ISD::UDIVREM, VT, Expand);
570       setOperationAction(ISD::SDIVREM, VT, Expand);
571       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
572       setOperationAction(ISD::FPOW, VT, Expand);
573       setOperationAction(ISD::BSWAP, VT, Expand);
574       setOperationAction(ISD::VSELECT, VT, Expand);
575       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
576       setOperationAction(ISD::ROTL, VT, Expand);
577       setOperationAction(ISD::ROTR, VT, Expand);
578 
579       for (MVT InnerVT : MVT::vector_valuetypes()) {
580         setTruncStoreAction(VT, InnerVT, Expand);
581         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
582         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
583         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
584       }
585     }
586 
587     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
588     // with merges, splats, etc.
589     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
590 
591     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
592     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
593     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
594     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
595     setOperationAction(ISD::SELECT, MVT::v4i32,
596                        Subtarget.useCRBits() ? Legal : Expand);
597     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
598     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
599     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
600     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
601     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
602     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
603     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
604     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
605     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
606 
607     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
608     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
609     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
610     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
611 
612     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
613     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
614 
615     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
616       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
617       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
618     }
619 
620     if (Subtarget.hasP8Altivec())
621       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
622     else
623       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
624 
625     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
626     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
627 
628     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
629     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
630 
631     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
632     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
633     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
634     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
635 
636     // Altivec does not contain unordered floating-point compare instructions
637     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
638     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
639     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
640     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
641 
642     if (Subtarget.hasVSX()) {
643       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
644       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
645       if (Subtarget.hasP8Vector()) {
646         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
647         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
648       }
649       if (Subtarget.hasDirectMove() && isPPC64) {
650         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
651         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
652         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
653         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
654         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
655         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
656         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
657         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
658       }
659       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
660 
661       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
662       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
663       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
664       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
665       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
666 
667       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
668 
669       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
670       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
671 
672       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
673       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
674 
675       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
676       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
677       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
678       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
679       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
680 
681       // Share the Altivec comparison restrictions.
682       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
683       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
684       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
685       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
686 
687       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
688       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
689 
690       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
691 
692       if (Subtarget.hasP8Vector())
693         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
694 
695       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
696 
697       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
698       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
699       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
700 
701       if (Subtarget.hasP8Altivec()) {
702         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
703         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
704         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
705 
706         // 128 bit shifts can be accomplished via 3 instructions for SHL and
707         // SRL, but not for SRA because of the instructions available:
708         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
709         // doing
710         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
711         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
712         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
713 
714         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
715       }
716       else {
717         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
718         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
719         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
720 
721         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
722 
723         // VSX v2i64 only supports non-arithmetic operations.
724         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
725         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
726       }
727 
728       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
729       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
730       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
731       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
732 
733       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
734 
735       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
736       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
737       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
738       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
739 
740       // Vector operation legalization checks the result type of
741       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
742       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
743       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
744       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
745       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
746 
747       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
748       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
749       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
750       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
751 
752       if (Subtarget.hasDirectMove())
753         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
754       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
755 
756       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
757     }
758 
759     if (Subtarget.hasP8Altivec()) {
760       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
761       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
762     }
763 
764     if (Subtarget.hasP9Vector()) {
765       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
766       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
767 
768       // 128 bit shifts can be accomplished via 3 instructions for SHL and
769       // SRL, but not for SRA because of the instructions available:
770       // VS{RL} and VS{RL}O.
771       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
772       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
773       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
774     }
775   }
776 
777   if (Subtarget.hasQPX()) {
778     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
779     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
780     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
781     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
782 
783     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
784     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
785 
786     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
787     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
788 
789     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
790     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
791 
792     if (!Subtarget.useCRBits())
793       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
794     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
795 
796     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
797     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
798     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
799     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
800     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
801     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
802     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
803 
804     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
805     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
806 
807     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
808     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
809     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
810 
811     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
812     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
813     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
814     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
815     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
816     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
817     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
818     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
819     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
820     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
821 
822     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
823     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
824 
825     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
826     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
827 
828     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
829 
830     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
831     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
832     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
833     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
834 
835     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
836     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
837 
838     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
839     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
840 
841     if (!Subtarget.useCRBits())
842       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
843     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
844 
845     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
846     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
847     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
848     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
849     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
850     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
851     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
852 
853     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
854     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
855 
856     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
857     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
858     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
859     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
860     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
861     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
862     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
863     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
864     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
865     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
866 
867     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
868     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
869 
870     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
871     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
872 
873     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
874 
875     setOperationAction(ISD::AND , MVT::v4i1, Legal);
876     setOperationAction(ISD::OR , MVT::v4i1, Legal);
877     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
878 
879     if (!Subtarget.useCRBits())
880       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
881     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
882 
883     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
884     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
885 
886     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
887     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
888     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
889     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
890     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
891     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
892     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
893 
894     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
895     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
896 
897     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
898 
899     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
900     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
901     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
902     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
903 
904     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
905     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
906     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
907     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
908 
909     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
910     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
911 
912     // These need to set FE_INEXACT, and so cannot be vectorized here.
913     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
914     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
915 
916     if (TM.Options.UnsafeFPMath) {
917       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
918       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
919 
920       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
921       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
922     } else {
923       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
924       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
925 
926       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
927       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
928     }
929   }
930 
931   if (Subtarget.has64BitSupport())
932     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
933 
934   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
935 
936   if (!isPPC64) {
937     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
938     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
939   }
940 
941   setBooleanContents(ZeroOrOneBooleanContent);
942 
943   if (Subtarget.hasAltivec()) {
944     // Altivec instructions set fields to all zeros or all ones.
945     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
946   }
947 
948   if (!isPPC64) {
949     // These libcalls are not available in 32-bit.
950     setLibcallName(RTLIB::SHL_I128, nullptr);
951     setLibcallName(RTLIB::SRL_I128, nullptr);
952     setLibcallName(RTLIB::SRA_I128, nullptr);
953   }
954 
955   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
956 
957   // We have target-specific dag combine patterns for the following nodes:
958   setTargetDAGCombine(ISD::SHL);
959   setTargetDAGCombine(ISD::SRA);
960   setTargetDAGCombine(ISD::SRL);
961   setTargetDAGCombine(ISD::SINT_TO_FP);
962   setTargetDAGCombine(ISD::BUILD_VECTOR);
963   if (Subtarget.hasFPCVT())
964     setTargetDAGCombine(ISD::UINT_TO_FP);
965   setTargetDAGCombine(ISD::LOAD);
966   setTargetDAGCombine(ISD::STORE);
967   setTargetDAGCombine(ISD::BR_CC);
968   if (Subtarget.useCRBits())
969     setTargetDAGCombine(ISD::BRCOND);
970   setTargetDAGCombine(ISD::BSWAP);
971   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
972   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
973   setTargetDAGCombine(ISD::INTRINSIC_VOID);
974 
975   setTargetDAGCombine(ISD::SIGN_EXTEND);
976   setTargetDAGCombine(ISD::ZERO_EXTEND);
977   setTargetDAGCombine(ISD::ANY_EXTEND);
978 
979   if (Subtarget.useCRBits()) {
980     setTargetDAGCombine(ISD::TRUNCATE);
981     setTargetDAGCombine(ISD::SETCC);
982     setTargetDAGCombine(ISD::SELECT_CC);
983   }
984 
985   // Use reciprocal estimates.
986   if (TM.Options.UnsafeFPMath) {
987     setTargetDAGCombine(ISD::FDIV);
988     setTargetDAGCombine(ISD::FSQRT);
989   }
990 
991   // Darwin long double math library functions have $LDBL128 appended.
992   if (Subtarget.isDarwin()) {
993     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
994     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
995     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
996     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
997     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
998     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
999     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1000     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1001     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1002     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1003   }
1004 
1005   // With 32 condition bits, we don't need to sink (and duplicate) compares
1006   // aggressively in CodeGenPrep.
1007   if (Subtarget.useCRBits()) {
1008     setHasMultipleConditionRegisters();
1009     setJumpIsExpensive();
1010   }
1011 
1012   setMinFunctionAlignment(2);
1013   if (Subtarget.isDarwin())
1014     setPrefFunctionAlignment(4);
1015 
1016   switch (Subtarget.getDarwinDirective()) {
1017   default: break;
1018   case PPC::DIR_970:
1019   case PPC::DIR_A2:
1020   case PPC::DIR_E500mc:
1021   case PPC::DIR_E5500:
1022   case PPC::DIR_PWR4:
1023   case PPC::DIR_PWR5:
1024   case PPC::DIR_PWR5X:
1025   case PPC::DIR_PWR6:
1026   case PPC::DIR_PWR6X:
1027   case PPC::DIR_PWR7:
1028   case PPC::DIR_PWR8:
1029   case PPC::DIR_PWR9:
1030     setPrefFunctionAlignment(4);
1031     setPrefLoopAlignment(4);
1032     break;
1033   }
1034 
1035   if (Subtarget.enableMachineScheduler())
1036     setSchedulingPreference(Sched::Source);
1037   else
1038     setSchedulingPreference(Sched::Hybrid);
1039 
1040   computeRegisterProperties(STI.getRegisterInfo());
1041 
1042   // The Freescale cores do better with aggressive inlining of memcpy and
1043   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1044   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1045       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
1046     MaxStoresPerMemset = 32;
1047     MaxStoresPerMemsetOptSize = 16;
1048     MaxStoresPerMemcpy = 32;
1049     MaxStoresPerMemcpyOptSize = 8;
1050     MaxStoresPerMemmove = 32;
1051     MaxStoresPerMemmoveOptSize = 8;
1052   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1053     // The A2 also benefits from (very) aggressive inlining of memcpy and
1054     // friends. The overhead of a the function call, even when warm, can be
1055     // over one hundred cycles.
1056     MaxStoresPerMemset = 128;
1057     MaxStoresPerMemcpy = 128;
1058     MaxStoresPerMemmove = 128;
1059     MaxLoadsPerMemcmp = 128;
1060   } else {
1061     MaxLoadsPerMemcmp = 8;
1062     MaxLoadsPerMemcmpOptSize = 4;
1063   }
1064 }
1065 
1066 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1067 /// the desired ByVal argument alignment.
1068 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1069                              unsigned MaxMaxAlign) {
1070   if (MaxAlign == MaxMaxAlign)
1071     return;
1072   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1073     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
1074       MaxAlign = 32;
1075     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1076       MaxAlign = 16;
1077   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1078     unsigned EltAlign = 0;
1079     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1080     if (EltAlign > MaxAlign)
1081       MaxAlign = EltAlign;
1082   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1083     for (auto *EltTy : STy->elements()) {
1084       unsigned EltAlign = 0;
1085       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1086       if (EltAlign > MaxAlign)
1087         MaxAlign = EltAlign;
1088       if (MaxAlign == MaxMaxAlign)
1089         break;
1090     }
1091   }
1092 }
1093 
1094 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1095 /// function arguments in the caller parameter area.
1096 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1097                                                   const DataLayout &DL) const {
1098   // Darwin passes everything on 4 byte boundary.
1099   if (Subtarget.isDarwin())
1100     return 4;
1101 
1102   // 16byte and wider vectors are passed on 16byte boundary.
1103   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1104   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1105   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1106     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1107   return Align;
1108 }
1109 
1110 bool PPCTargetLowering::useSoftFloat() const {
1111   return Subtarget.useSoftFloat();
1112 }
1113 
1114 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1115   switch ((PPCISD::NodeType)Opcode) {
1116   case PPCISD::FIRST_NUMBER:    break;
1117   case PPCISD::FSEL:            return "PPCISD::FSEL";
1118   case PPCISD::FCFID:           return "PPCISD::FCFID";
1119   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1120   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1121   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1122   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1123   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1124   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1125   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1126   case PPCISD::FRE:             return "PPCISD::FRE";
1127   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1128   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1129   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1130   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1131   case PPCISD::VPERM:           return "PPCISD::VPERM";
1132   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1133   case PPCISD::XXINSERT:        return "PPCISD::XXINSERT";
1134   case PPCISD::XXREVERSE:       return "PPCISD::XXREVERSE";
1135   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1136   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1137   case PPCISD::CMPB:            return "PPCISD::CMPB";
1138   case PPCISD::Hi:              return "PPCISD::Hi";
1139   case PPCISD::Lo:              return "PPCISD::Lo";
1140   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1141   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1142   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1143   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1144   case PPCISD::SRL:             return "PPCISD::SRL";
1145   case PPCISD::SRA:             return "PPCISD::SRA";
1146   case PPCISD::SHL:             return "PPCISD::SHL";
1147   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1148   case PPCISD::CALL:            return "PPCISD::CALL";
1149   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1150   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1151   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1152   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1153   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1154   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1155   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1156   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1157   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1158   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1159   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1160   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1161   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1162   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1163   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1164   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1165   case PPCISD::VCMP:            return "PPCISD::VCMP";
1166   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1167   case PPCISD::LBRX:            return "PPCISD::LBRX";
1168   case PPCISD::STBRX:           return "PPCISD::STBRX";
1169   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1170   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1171   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1172   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1173   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1174   case PPCISD::SExtVElems:      return "PPCISD::SExtVElems";
1175   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1176   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1177   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1178   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1179   case PPCISD::BDZ:             return "PPCISD::BDZ";
1180   case PPCISD::MFFS:            return "PPCISD::MFFS";
1181   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1182   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1183   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1184   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1185   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1186   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1187   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1188   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1189   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1190   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1191   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1192   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1193   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1194   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1195   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1196   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1197   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1198   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1199   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1200   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1201   case PPCISD::SC:              return "PPCISD::SC";
1202   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1203   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1204   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1205   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1206   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1207   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1208   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1209   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1210   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1211   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1212   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1213   }
1214   return nullptr;
1215 }
1216 
1217 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1218                                           EVT VT) const {
1219   if (!VT.isVector())
1220     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1221 
1222   if (Subtarget.hasQPX())
1223     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1224 
1225   return VT.changeVectorElementTypeToInteger();
1226 }
1227 
1228 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1229   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1230   return true;
1231 }
1232 
1233 //===----------------------------------------------------------------------===//
1234 // Node matching predicates, for use by the tblgen matching code.
1235 //===----------------------------------------------------------------------===//
1236 
1237 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1238 static bool isFloatingPointZero(SDValue Op) {
1239   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1240     return CFP->getValueAPF().isZero();
1241   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1242     // Maybe this has already been legalized into the constant pool?
1243     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1244       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1245         return CFP->getValueAPF().isZero();
1246   }
1247   return false;
1248 }
1249 
1250 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1251 /// true if Op is undef or if it matches the specified value.
1252 static bool isConstantOrUndef(int Op, int Val) {
1253   return Op < 0 || Op == Val;
1254 }
1255 
1256 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1257 /// VPKUHUM instruction.
1258 /// The ShuffleKind distinguishes between big-endian operations with
1259 /// two different inputs (0), either-endian operations with two identical
1260 /// inputs (1), and little-endian operations with two different inputs (2).
1261 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1262 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1263                                SelectionDAG &DAG) {
1264   bool IsLE = DAG.getDataLayout().isLittleEndian();
1265   if (ShuffleKind == 0) {
1266     if (IsLE)
1267       return false;
1268     for (unsigned i = 0; i != 16; ++i)
1269       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1270         return false;
1271   } else if (ShuffleKind == 2) {
1272     if (!IsLE)
1273       return false;
1274     for (unsigned i = 0; i != 16; ++i)
1275       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1276         return false;
1277   } else if (ShuffleKind == 1) {
1278     unsigned j = IsLE ? 0 : 1;
1279     for (unsigned i = 0; i != 8; ++i)
1280       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1281           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1282         return false;
1283   }
1284   return true;
1285 }
1286 
1287 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1288 /// VPKUWUM instruction.
1289 /// The ShuffleKind distinguishes between big-endian operations with
1290 /// two different inputs (0), either-endian operations with two identical
1291 /// inputs (1), and little-endian operations with two different inputs (2).
1292 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1293 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1294                                SelectionDAG &DAG) {
1295   bool IsLE = DAG.getDataLayout().isLittleEndian();
1296   if (ShuffleKind == 0) {
1297     if (IsLE)
1298       return false;
1299     for (unsigned i = 0; i != 16; i += 2)
1300       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1301           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1302         return false;
1303   } else if (ShuffleKind == 2) {
1304     if (!IsLE)
1305       return false;
1306     for (unsigned i = 0; i != 16; i += 2)
1307       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1308           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1309         return false;
1310   } else if (ShuffleKind == 1) {
1311     unsigned j = IsLE ? 0 : 2;
1312     for (unsigned i = 0; i != 8; i += 2)
1313       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1314           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1315           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1316           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1317         return false;
1318   }
1319   return true;
1320 }
1321 
1322 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1323 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1324 /// current subtarget.
1325 ///
1326 /// The ShuffleKind distinguishes between big-endian operations with
1327 /// two different inputs (0), either-endian operations with two identical
1328 /// inputs (1), and little-endian operations with two different inputs (2).
1329 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1330 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1331                                SelectionDAG &DAG) {
1332   const PPCSubtarget& Subtarget =
1333     static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1334   if (!Subtarget.hasP8Vector())
1335     return false;
1336 
1337   bool IsLE = DAG.getDataLayout().isLittleEndian();
1338   if (ShuffleKind == 0) {
1339     if (IsLE)
1340       return false;
1341     for (unsigned i = 0; i != 16; i += 4)
1342       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1343           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1344           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1345           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1346         return false;
1347   } else if (ShuffleKind == 2) {
1348     if (!IsLE)
1349       return false;
1350     for (unsigned i = 0; i != 16; i += 4)
1351       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1352           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1353           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1354           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1355         return false;
1356   } else if (ShuffleKind == 1) {
1357     unsigned j = IsLE ? 0 : 4;
1358     for (unsigned i = 0; i != 8; i += 4)
1359       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1360           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1361           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1362           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1363           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1364           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1365           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1366           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1367         return false;
1368   }
1369   return true;
1370 }
1371 
1372 /// isVMerge - Common function, used to match vmrg* shuffles.
1373 ///
1374 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1375                      unsigned LHSStart, unsigned RHSStart) {
1376   if (N->getValueType(0) != MVT::v16i8)
1377     return false;
1378   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1379          "Unsupported merge size!");
1380 
1381   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1382     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1383       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1384                              LHSStart+j+i*UnitSize) ||
1385           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1386                              RHSStart+j+i*UnitSize))
1387         return false;
1388     }
1389   return true;
1390 }
1391 
1392 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1393 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1394 /// The ShuffleKind distinguishes between big-endian merges with two
1395 /// different inputs (0), either-endian merges with two identical inputs (1),
1396 /// and little-endian merges with two different inputs (2).  For the latter,
1397 /// the input operands are swapped (see PPCInstrAltivec.td).
1398 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1399                              unsigned ShuffleKind, SelectionDAG &DAG) {
1400   if (DAG.getDataLayout().isLittleEndian()) {
1401     if (ShuffleKind == 1) // unary
1402       return isVMerge(N, UnitSize, 0, 0);
1403     else if (ShuffleKind == 2) // swapped
1404       return isVMerge(N, UnitSize, 0, 16);
1405     else
1406       return false;
1407   } else {
1408     if (ShuffleKind == 1) // unary
1409       return isVMerge(N, UnitSize, 8, 8);
1410     else if (ShuffleKind == 0) // normal
1411       return isVMerge(N, UnitSize, 8, 24);
1412     else
1413       return false;
1414   }
1415 }
1416 
1417 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1418 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1419 /// The ShuffleKind distinguishes between big-endian merges with two
1420 /// different inputs (0), either-endian merges with two identical inputs (1),
1421 /// and little-endian merges with two different inputs (2).  For the latter,
1422 /// the input operands are swapped (see PPCInstrAltivec.td).
1423 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1424                              unsigned ShuffleKind, SelectionDAG &DAG) {
1425   if (DAG.getDataLayout().isLittleEndian()) {
1426     if (ShuffleKind == 1) // unary
1427       return isVMerge(N, UnitSize, 8, 8);
1428     else if (ShuffleKind == 2) // swapped
1429       return isVMerge(N, UnitSize, 8, 24);
1430     else
1431       return false;
1432   } else {
1433     if (ShuffleKind == 1) // unary
1434       return isVMerge(N, UnitSize, 0, 0);
1435     else if (ShuffleKind == 0) // normal
1436       return isVMerge(N, UnitSize, 0, 16);
1437     else
1438       return false;
1439   }
1440 }
1441 
1442 /**
1443  * \brief Common function used to match vmrgew and vmrgow shuffles
1444  *
1445  * The indexOffset determines whether to look for even or odd words in
1446  * the shuffle mask. This is based on the of the endianness of the target
1447  * machine.
1448  *   - Little Endian:
1449  *     - Use offset of 0 to check for odd elements
1450  *     - Use offset of 4 to check for even elements
1451  *   - Big Endian:
1452  *     - Use offset of 0 to check for even elements
1453  *     - Use offset of 4 to check for odd elements
1454  * A detailed description of the vector element ordering for little endian and
1455  * big endian can be found at
1456  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1457  * Targeting your applications - what little endian and big endian IBM XL C/C++
1458  * compiler differences mean to you
1459  *
1460  * The mask to the shuffle vector instruction specifies the indices of the
1461  * elements from the two input vectors to place in the result. The elements are
1462  * numbered in array-access order, starting with the first vector. These vectors
1463  * are always of type v16i8, thus each vector will contain 16 elements of size
1464  * 8. More info on the shuffle vector can be found in the
1465  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1466  * Language Reference.
1467  *
1468  * The RHSStartValue indicates whether the same input vectors are used (unary)
1469  * or two different input vectors are used, based on the following:
1470  *   - If the instruction uses the same vector for both inputs, the range of the
1471  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1472  *     be 0.
1473  *   - If the instruction has two different vectors then the range of the
1474  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1475  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1476  *     to 31 specify elements in the second vector).
1477  *
1478  * \param[in] N The shuffle vector SD Node to analyze
1479  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1480  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1481  * vector to the shuffle_vector instruction
1482  * \return true iff this shuffle vector represents an even or odd word merge
1483  */
1484 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1485                      unsigned RHSStartValue) {
1486   if (N->getValueType(0) != MVT::v16i8)
1487     return false;
1488 
1489   for (unsigned i = 0; i < 2; ++i)
1490     for (unsigned j = 0; j < 4; ++j)
1491       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1492                              i*RHSStartValue+j+IndexOffset) ||
1493           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1494                              i*RHSStartValue+j+IndexOffset+8))
1495         return false;
1496   return true;
1497 }
1498 
1499 /**
1500  * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
1501  * vmrgow instructions.
1502  *
1503  * \param[in] N The shuffle vector SD Node to analyze
1504  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1505  * \param[in] ShuffleKind Identify the type of merge:
1506  *   - 0 = big-endian merge with two different inputs;
1507  *   - 1 = either-endian merge with two identical inputs;
1508  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1509  *     little-endian merges).
1510  * \param[in] DAG The current SelectionDAG
1511  * \return true iff this shuffle mask
1512  */
1513 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1514                               unsigned ShuffleKind, SelectionDAG &DAG) {
1515   if (DAG.getDataLayout().isLittleEndian()) {
1516     unsigned indexOffset = CheckEven ? 4 : 0;
1517     if (ShuffleKind == 1) // Unary
1518       return isVMerge(N, indexOffset, 0);
1519     else if (ShuffleKind == 2) // swapped
1520       return isVMerge(N, indexOffset, 16);
1521     else
1522       return false;
1523   }
1524   else {
1525     unsigned indexOffset = CheckEven ? 0 : 4;
1526     if (ShuffleKind == 1) // Unary
1527       return isVMerge(N, indexOffset, 0);
1528     else if (ShuffleKind == 0) // Normal
1529       return isVMerge(N, indexOffset, 16);
1530     else
1531       return false;
1532   }
1533   return false;
1534 }
1535 
1536 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1537 /// amount, otherwise return -1.
1538 /// The ShuffleKind distinguishes between big-endian operations with two
1539 /// different inputs (0), either-endian operations with two identical inputs
1540 /// (1), and little-endian operations with two different inputs (2).  For the
1541 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1542 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1543                              SelectionDAG &DAG) {
1544   if (N->getValueType(0) != MVT::v16i8)
1545     return -1;
1546 
1547   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1548 
1549   // Find the first non-undef value in the shuffle mask.
1550   unsigned i;
1551   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1552     /*search*/;
1553 
1554   if (i == 16) return -1;  // all undef.
1555 
1556   // Otherwise, check to see if the rest of the elements are consecutively
1557   // numbered from this value.
1558   unsigned ShiftAmt = SVOp->getMaskElt(i);
1559   if (ShiftAmt < i) return -1;
1560 
1561   ShiftAmt -= i;
1562   bool isLE = DAG.getDataLayout().isLittleEndian();
1563 
1564   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1565     // Check the rest of the elements to see if they are consecutive.
1566     for (++i; i != 16; ++i)
1567       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1568         return -1;
1569   } else if (ShuffleKind == 1) {
1570     // Check the rest of the elements to see if they are consecutive.
1571     for (++i; i != 16; ++i)
1572       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1573         return -1;
1574   } else
1575     return -1;
1576 
1577   if (isLE)
1578     ShiftAmt = 16 - ShiftAmt;
1579 
1580   return ShiftAmt;
1581 }
1582 
1583 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1584 /// specifies a splat of a single element that is suitable for input to
1585 /// VSPLTB/VSPLTH/VSPLTW.
1586 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1587   assert(N->getValueType(0) == MVT::v16i8 &&
1588          (EltSize == 1 || EltSize == 2 || EltSize == 4));
1589 
1590   // The consecutive indices need to specify an element, not part of two
1591   // different elements.  So abandon ship early if this isn't the case.
1592   if (N->getMaskElt(0) % EltSize != 0)
1593     return false;
1594 
1595   // This is a splat operation if each element of the permute is the same, and
1596   // if the value doesn't reference the second vector.
1597   unsigned ElementBase = N->getMaskElt(0);
1598 
1599   // FIXME: Handle UNDEF elements too!
1600   if (ElementBase >= 16)
1601     return false;
1602 
1603   // Check that the indices are consecutive, in the case of a multi-byte element
1604   // splatted with a v16i8 mask.
1605   for (unsigned i = 1; i != EltSize; ++i)
1606     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1607       return false;
1608 
1609   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1610     if (N->getMaskElt(i) < 0) continue;
1611     for (unsigned j = 0; j != EltSize; ++j)
1612       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1613         return false;
1614   }
1615   return true;
1616 }
1617 
1618 /// Check that the mask is shuffling N byte elements. Within each N byte
1619 /// element of the mask, the indices could be either in increasing or
1620 /// decreasing order as long as they are consecutive.
1621 /// \param[in] N the shuffle vector SD Node to analyze
1622 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1623 /// Word/DoubleWord/QuadWord).
1624 /// \param[in] StepLen the delta indices number among the N byte element, if
1625 /// the mask is in increasing/decreasing order then it is 1/-1.
1626 /// \return true iff the mask is shuffling N byte elements.
1627 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1628                                    int StepLen) {
1629   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1630          "Unexpected element width.");
1631   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1632 
1633   unsigned NumOfElem = 16 / Width;
1634   unsigned MaskVal[16]; //  Width is never greater than 16
1635   for (unsigned i = 0; i < NumOfElem; ++i) {
1636     MaskVal[0] = N->getMaskElt(i * Width);
1637     if ((StepLen == 1) && (MaskVal[0] % Width)) {
1638       return false;
1639     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1640       return false;
1641     }
1642 
1643     for (unsigned int j = 1; j < Width; ++j) {
1644       MaskVal[j] = N->getMaskElt(i * Width + j);
1645       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1646         return false;
1647       }
1648     }
1649   }
1650 
1651   return true;
1652 }
1653 
1654 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1655                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1656   if (!isNByteElemShuffleMask(N, 4, 1))
1657     return false;
1658 
1659   // Now we look at mask elements 0,4,8,12
1660   unsigned M0 = N->getMaskElt(0) / 4;
1661   unsigned M1 = N->getMaskElt(4) / 4;
1662   unsigned M2 = N->getMaskElt(8) / 4;
1663   unsigned M3 = N->getMaskElt(12) / 4;
1664   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1665   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1666 
1667   // Below, let H and L be arbitrary elements of the shuffle mask
1668   // where H is in the range [4,7] and L is in the range [0,3].
1669   // H, 1, 2, 3 or L, 5, 6, 7
1670   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1671       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1672     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1673     InsertAtByte = IsLE ? 12 : 0;
1674     Swap = M0 < 4;
1675     return true;
1676   }
1677   // 0, H, 2, 3 or 4, L, 6, 7
1678   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1679       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1680     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1681     InsertAtByte = IsLE ? 8 : 4;
1682     Swap = M1 < 4;
1683     return true;
1684   }
1685   // 0, 1, H, 3 or 4, 5, L, 7
1686   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1687       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1688     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1689     InsertAtByte = IsLE ? 4 : 8;
1690     Swap = M2 < 4;
1691     return true;
1692   }
1693   // 0, 1, 2, H or 4, 5, 6, L
1694   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1695       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1696     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1697     InsertAtByte = IsLE ? 0 : 12;
1698     Swap = M3 < 4;
1699     return true;
1700   }
1701 
1702   // If both vector operands for the shuffle are the same vector, the mask will
1703   // contain only elements from the first one and the second one will be undef.
1704   if (N->getOperand(1).isUndef()) {
1705     ShiftElts = 0;
1706     Swap = true;
1707     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1708     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1709       InsertAtByte = IsLE ? 12 : 0;
1710       return true;
1711     }
1712     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1713       InsertAtByte = IsLE ? 8 : 4;
1714       return true;
1715     }
1716     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1717       InsertAtByte = IsLE ? 4 : 8;
1718       return true;
1719     }
1720     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1721       InsertAtByte = IsLE ? 0 : 12;
1722       return true;
1723     }
1724   }
1725 
1726   return false;
1727 }
1728 
1729 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1730                                bool &Swap, bool IsLE) {
1731   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1732   // Ensure each byte index of the word is consecutive.
1733   if (!isNByteElemShuffleMask(N, 4, 1))
1734     return false;
1735 
1736   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1737   unsigned M0 = N->getMaskElt(0) / 4;
1738   unsigned M1 = N->getMaskElt(4) / 4;
1739   unsigned M2 = N->getMaskElt(8) / 4;
1740   unsigned M3 = N->getMaskElt(12) / 4;
1741 
1742   // If both vector operands for the shuffle are the same vector, the mask will
1743   // contain only elements from the first one and the second one will be undef.
1744   if (N->getOperand(1).isUndef()) {
1745     assert(M0 < 4 && "Indexing into an undef vector?");
1746     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
1747       return false;
1748 
1749     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
1750     Swap = false;
1751     return true;
1752   }
1753 
1754   // Ensure each word index of the ShuffleVector Mask is consecutive.
1755   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
1756     return false;
1757 
1758   if (IsLE) {
1759     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
1760       // Input vectors don't need to be swapped if the leading element
1761       // of the result is one of the 3 left elements of the second vector
1762       // (or if there is no shift to be done at all).
1763       Swap = false;
1764       ShiftElts = (8 - M0) % 8;
1765     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
1766       // Input vectors need to be swapped if the leading element
1767       // of the result is one of the 3 left elements of the first vector
1768       // (or if we're shifting by 4 - thereby simply swapping the vectors).
1769       Swap = true;
1770       ShiftElts = (4 - M0) % 4;
1771     }
1772 
1773     return true;
1774   } else {                                          // BE
1775     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
1776       // Input vectors don't need to be swapped if the leading element
1777       // of the result is one of the 4 elements of the first vector.
1778       Swap = false;
1779       ShiftElts = M0;
1780     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
1781       // Input vectors need to be swapped if the leading element
1782       // of the result is one of the 4 elements of the right vector.
1783       Swap = true;
1784       ShiftElts = M0 - 4;
1785     }
1786 
1787     return true;
1788   }
1789 }
1790 
1791 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1792   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1793 
1794   if (!isNByteElemShuffleMask(N, Width, -1))
1795     return false;
1796 
1797   for (int i = 0; i < 16; i += Width)
1798     if (N->getMaskElt(i) != i + Width - 1)
1799       return false;
1800 
1801   return true;
1802 }
1803 
1804 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
1805   return isXXBRShuffleMaskHelper(N, 2);
1806 }
1807 
1808 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
1809   return isXXBRShuffleMaskHelper(N, 4);
1810 }
1811 
1812 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
1813   return isXXBRShuffleMaskHelper(N, 8);
1814 }
1815 
1816 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
1817   return isXXBRShuffleMaskHelper(N, 16);
1818 }
1819 
1820 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
1821 /// if the inputs to the instruction should be swapped and set \p DM to the
1822 /// value for the immediate.
1823 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
1824 /// AND element 0 of the result comes from the first input (LE) or second input
1825 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
1826 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
1827 /// mask.
1828 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
1829                                bool &Swap, bool IsLE) {
1830   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1831 
1832   // Ensure each byte index of the double word is consecutive.
1833   if (!isNByteElemShuffleMask(N, 8, 1))
1834     return false;
1835 
1836   unsigned M0 = N->getMaskElt(0) / 8;
1837   unsigned M1 = N->getMaskElt(8) / 8;
1838   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
1839 
1840   // If both vector operands for the shuffle are the same vector, the mask will
1841   // contain only elements from the first one and the second one will be undef.
1842   if (N->getOperand(1).isUndef()) {
1843     if ((M0 | M1) < 2) {
1844       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
1845       Swap = false;
1846       return true;
1847     } else
1848       return false;
1849   }
1850 
1851   if (IsLE) {
1852     if (M0 > 1 && M1 < 2) {
1853       Swap = false;
1854     } else if (M0 < 2 && M1 > 1) {
1855       M0 = (M0 + 2) % 4;
1856       M1 = (M1 + 2) % 4;
1857       Swap = true;
1858     } else
1859       return false;
1860 
1861     // Note: if control flow comes here that means Swap is already set above
1862     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
1863     return true;
1864   } else { // BE
1865     if (M0 < 2 && M1 > 1) {
1866       Swap = false;
1867     } else if (M0 > 1 && M1 < 2) {
1868       M0 = (M0 + 2) % 4;
1869       M1 = (M1 + 2) % 4;
1870       Swap = true;
1871     } else
1872       return false;
1873 
1874     // Note: if control flow comes here that means Swap is already set above
1875     DM = (M0 << 1) + (M1 & 1);
1876     return true;
1877   }
1878 }
1879 
1880 
1881 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1882 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1883 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1884                                 SelectionDAG &DAG) {
1885   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1886   assert(isSplatShuffleMask(SVOp, EltSize));
1887   if (DAG.getDataLayout().isLittleEndian())
1888     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1889   else
1890     return SVOp->getMaskElt(0) / EltSize;
1891 }
1892 
1893 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1894 /// by using a vspltis[bhw] instruction of the specified element size, return
1895 /// the constant being splatted.  The ByteSize field indicates the number of
1896 /// bytes of each element [124] -> [bhw].
1897 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1898   SDValue OpVal(nullptr, 0);
1899 
1900   // If ByteSize of the splat is bigger than the element size of the
1901   // build_vector, then we have a case where we are checking for a splat where
1902   // multiple elements of the buildvector are folded together into a single
1903   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1904   unsigned EltSize = 16/N->getNumOperands();
1905   if (EltSize < ByteSize) {
1906     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1907     SDValue UniquedVals[4];
1908     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1909 
1910     // See if all of the elements in the buildvector agree across.
1911     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1912       if (N->getOperand(i).isUndef()) continue;
1913       // If the element isn't a constant, bail fully out.
1914       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1915 
1916       if (!UniquedVals[i&(Multiple-1)].getNode())
1917         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1918       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1919         return SDValue();  // no match.
1920     }
1921 
1922     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1923     // either constant or undef values that are identical for each chunk.  See
1924     // if these chunks can form into a larger vspltis*.
1925 
1926     // Check to see if all of the leading entries are either 0 or -1.  If
1927     // neither, then this won't fit into the immediate field.
1928     bool LeadingZero = true;
1929     bool LeadingOnes = true;
1930     for (unsigned i = 0; i != Multiple-1; ++i) {
1931       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1932 
1933       LeadingZero &= isNullConstant(UniquedVals[i]);
1934       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
1935     }
1936     // Finally, check the least significant entry.
1937     if (LeadingZero) {
1938       if (!UniquedVals[Multiple-1].getNode())
1939         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
1940       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1941       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
1942         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1943     }
1944     if (LeadingOnes) {
1945       if (!UniquedVals[Multiple-1].getNode())
1946         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1947       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1948       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1949         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1950     }
1951 
1952     return SDValue();
1953   }
1954 
1955   // Check to see if this buildvec has a single non-undef value in its elements.
1956   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1957     if (N->getOperand(i).isUndef()) continue;
1958     if (!OpVal.getNode())
1959       OpVal = N->getOperand(i);
1960     else if (OpVal != N->getOperand(i))
1961       return SDValue();
1962   }
1963 
1964   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1965 
1966   unsigned ValSizeInBytes = EltSize;
1967   uint64_t Value = 0;
1968   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1969     Value = CN->getZExtValue();
1970   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1971     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1972     Value = FloatToBits(CN->getValueAPF().convertToFloat());
1973   }
1974 
1975   // If the splat value is larger than the element value, then we can never do
1976   // this splat.  The only case that we could fit the replicated bits into our
1977   // immediate field for would be zero, and we prefer to use vxor for it.
1978   if (ValSizeInBytes < ByteSize) return SDValue();
1979 
1980   // If the element value is larger than the splat value, check if it consists
1981   // of a repeated bit pattern of size ByteSize.
1982   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
1983     return SDValue();
1984 
1985   // Properly sign extend the value.
1986   int MaskVal = SignExtend32(Value, ByteSize * 8);
1987 
1988   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1989   if (MaskVal == 0) return SDValue();
1990 
1991   // Finally, if this value fits in a 5 bit sext field, return it
1992   if (SignExtend32<5>(MaskVal) == MaskVal)
1993     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
1994   return SDValue();
1995 }
1996 
1997 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1998 /// amount, otherwise return -1.
1999 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2000   EVT VT = N->getValueType(0);
2001   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2002     return -1;
2003 
2004   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2005 
2006   // Find the first non-undef value in the shuffle mask.
2007   unsigned i;
2008   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2009     /*search*/;
2010 
2011   if (i == 4) return -1;  // all undef.
2012 
2013   // Otherwise, check to see if the rest of the elements are consecutively
2014   // numbered from this value.
2015   unsigned ShiftAmt = SVOp->getMaskElt(i);
2016   if (ShiftAmt < i) return -1;
2017   ShiftAmt -= i;
2018 
2019   // Check the rest of the elements to see if they are consecutive.
2020   for (++i; i != 4; ++i)
2021     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2022       return -1;
2023 
2024   return ShiftAmt;
2025 }
2026 
2027 //===----------------------------------------------------------------------===//
2028 //  Addressing Mode Selection
2029 //===----------------------------------------------------------------------===//
2030 
2031 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2032 /// or 64-bit immediate, and if the value can be accurately represented as a
2033 /// sign extension from a 16-bit value.  If so, this returns true and the
2034 /// immediate.
2035 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2036   if (!isa<ConstantSDNode>(N))
2037     return false;
2038 
2039   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2040   if (N->getValueType(0) == MVT::i32)
2041     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2042   else
2043     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2044 }
2045 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2046   return isIntS16Immediate(Op.getNode(), Imm);
2047 }
2048 
2049 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2050 /// can be represented as an indexed [r+r] operation.  Returns false if it
2051 /// can be more efficiently represented with [r+imm].
2052 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2053                                             SDValue &Index,
2054                                             SelectionDAG &DAG) const {
2055   int16_t imm = 0;
2056   if (N.getOpcode() == ISD::ADD) {
2057     if (isIntS16Immediate(N.getOperand(1), imm))
2058       return false;    // r+i
2059     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2060       return false;    // r+i
2061 
2062     Base = N.getOperand(0);
2063     Index = N.getOperand(1);
2064     return true;
2065   } else if (N.getOpcode() == ISD::OR) {
2066     if (isIntS16Immediate(N.getOperand(1), imm))
2067       return false;    // r+i can fold it if we can.
2068 
2069     // If this is an or of disjoint bitfields, we can codegen this as an add
2070     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2071     // disjoint.
2072     KnownBits LHSKnown, RHSKnown;
2073     DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2074 
2075     if (LHSKnown.Zero.getBoolValue()) {
2076       DAG.computeKnownBits(N.getOperand(1), RHSKnown);
2077       // If all of the bits are known zero on the LHS or RHS, the add won't
2078       // carry.
2079       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2080         Base = N.getOperand(0);
2081         Index = N.getOperand(1);
2082         return true;
2083       }
2084     }
2085   }
2086 
2087   return false;
2088 }
2089 
2090 // If we happen to be doing an i64 load or store into a stack slot that has
2091 // less than a 4-byte alignment, then the frame-index elimination may need to
2092 // use an indexed load or store instruction (because the offset may not be a
2093 // multiple of 4). The extra register needed to hold the offset comes from the
2094 // register scavenger, and it is possible that the scavenger will need to use
2095 // an emergency spill slot. As a result, we need to make sure that a spill slot
2096 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2097 // stack slot.
2098 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2099   // FIXME: This does not handle the LWA case.
2100   if (VT != MVT::i64)
2101     return;
2102 
2103   // NOTE: We'll exclude negative FIs here, which come from argument
2104   // lowering, because there are no known test cases triggering this problem
2105   // using packed structures (or similar). We can remove this exclusion if
2106   // we find such a test case. The reason why this is so test-case driven is
2107   // because this entire 'fixup' is only to prevent crashes (from the
2108   // register scavenger) on not-really-valid inputs. For example, if we have:
2109   //   %a = alloca i1
2110   //   %b = bitcast i1* %a to i64*
2111   //   store i64* a, i64 b
2112   // then the store should really be marked as 'align 1', but is not. If it
2113   // were marked as 'align 1' then the indexed form would have been
2114   // instruction-selected initially, and the problem this 'fixup' is preventing
2115   // won't happen regardless.
2116   if (FrameIdx < 0)
2117     return;
2118 
2119   MachineFunction &MF = DAG.getMachineFunction();
2120   MachineFrameInfo &MFI = MF.getFrameInfo();
2121 
2122   unsigned Align = MFI.getObjectAlignment(FrameIdx);
2123   if (Align >= 4)
2124     return;
2125 
2126   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2127   FuncInfo->setHasNonRISpills();
2128 }
2129 
2130 /// Returns true if the address N can be represented by a base register plus
2131 /// a signed 16-bit displacement [r+imm], and if it is not better
2132 /// represented as reg+reg.  If Aligned is true, only accept displacements
2133 /// suitable for STD and friends, i.e. multiples of 4.
2134 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2135                                             SDValue &Base,
2136                                             SelectionDAG &DAG,
2137                                             bool Aligned) const {
2138   // FIXME dl should come from parent load or store, not from address
2139   SDLoc dl(N);
2140   // If this can be more profitably realized as r+r, fail.
2141   if (SelectAddressRegReg(N, Disp, Base, DAG))
2142     return false;
2143 
2144   if (N.getOpcode() == ISD::ADD) {
2145     int16_t imm = 0;
2146     if (isIntS16Immediate(N.getOperand(1), imm) &&
2147         (!Aligned || (imm & 3) == 0)) {
2148       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2149       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2150         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2151         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2152       } else {
2153         Base = N.getOperand(0);
2154       }
2155       return true; // [r+i]
2156     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2157       // Match LOAD (ADD (X, Lo(G))).
2158       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2159              && "Cannot handle constant offsets yet!");
2160       Disp = N.getOperand(1).getOperand(0);  // The global address.
2161       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2162              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2163              Disp.getOpcode() == ISD::TargetConstantPool ||
2164              Disp.getOpcode() == ISD::TargetJumpTable);
2165       Base = N.getOperand(0);
2166       return true;  // [&g+r]
2167     }
2168   } else if (N.getOpcode() == ISD::OR) {
2169     int16_t imm = 0;
2170     if (isIntS16Immediate(N.getOperand(1), imm) &&
2171         (!Aligned || (imm & 3) == 0)) {
2172       // If this is an or of disjoint bitfields, we can codegen this as an add
2173       // (for better address arithmetic) if the LHS and RHS of the OR are
2174       // provably disjoint.
2175       KnownBits LHSKnown;
2176       DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2177 
2178       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2179         // If all of the bits are known zero on the LHS or RHS, the add won't
2180         // carry.
2181         if (FrameIndexSDNode *FI =
2182               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2183           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2184           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2185         } else {
2186           Base = N.getOperand(0);
2187         }
2188         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2189         return true;
2190       }
2191     }
2192   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2193     // Loading from a constant address.
2194 
2195     // If this address fits entirely in a 16-bit sext immediate field, codegen
2196     // this as "d, 0"
2197     int16_t Imm;
2198     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
2199       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2200       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2201                              CN->getValueType(0));
2202       return true;
2203     }
2204 
2205     // Handle 32-bit sext immediates with LIS + addr mode.
2206     if ((CN->getValueType(0) == MVT::i32 ||
2207          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2208         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
2209       int Addr = (int)CN->getZExtValue();
2210 
2211       // Otherwise, break this down into an LIS + disp.
2212       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2213 
2214       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2215                                    MVT::i32);
2216       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2217       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2218       return true;
2219     }
2220   }
2221 
2222   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2223   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2224     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2225     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2226   } else
2227     Base = N;
2228   return true;      // [r+0]
2229 }
2230 
2231 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2232 /// represented as an indexed [r+r] operation.
2233 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2234                                                 SDValue &Index,
2235                                                 SelectionDAG &DAG) const {
2236   // Check to see if we can easily represent this as an [r+r] address.  This
2237   // will fail if it thinks that the address is more profitably represented as
2238   // reg+imm, e.g. where imm = 0.
2239   if (SelectAddressRegReg(N, Base, Index, DAG))
2240     return true;
2241 
2242   // If the operand is an addition, always emit this as [r+r], since this is
2243   // better (for code size, and execution, as the memop does the add for free)
2244   // than emitting an explicit add.
2245   if (N.getOpcode() == ISD::ADD) {
2246     Base = N.getOperand(0);
2247     Index = N.getOperand(1);
2248     return true;
2249   }
2250 
2251   // Otherwise, do it the hard way, using R0 as the base register.
2252   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2253                          N.getValueType());
2254   Index = N;
2255   return true;
2256 }
2257 
2258 /// getPreIndexedAddressParts - returns true by value, base pointer and
2259 /// offset pointer and addressing mode by reference if the node's address
2260 /// can be legally represented as pre-indexed load / store address.
2261 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2262                                                   SDValue &Offset,
2263                                                   ISD::MemIndexedMode &AM,
2264                                                   SelectionDAG &DAG) const {
2265   if (DisablePPCPreinc) return false;
2266 
2267   bool isLoad = true;
2268   SDValue Ptr;
2269   EVT VT;
2270   unsigned Alignment;
2271   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2272     Ptr = LD->getBasePtr();
2273     VT = LD->getMemoryVT();
2274     Alignment = LD->getAlignment();
2275   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2276     Ptr = ST->getBasePtr();
2277     VT  = ST->getMemoryVT();
2278     Alignment = ST->getAlignment();
2279     isLoad = false;
2280   } else
2281     return false;
2282 
2283   // PowerPC doesn't have preinc load/store instructions for vectors (except
2284   // for QPX, which does have preinc r+r forms).
2285   if (VT.isVector()) {
2286     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2287       return false;
2288     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2289       AM = ISD::PRE_INC;
2290       return true;
2291     }
2292   }
2293 
2294   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2295     // Common code will reject creating a pre-inc form if the base pointer
2296     // is a frame index, or if N is a store and the base pointer is either
2297     // the same as or a predecessor of the value being stored.  Check for
2298     // those situations here, and try with swapped Base/Offset instead.
2299     bool Swap = false;
2300 
2301     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2302       Swap = true;
2303     else if (!isLoad) {
2304       SDValue Val = cast<StoreSDNode>(N)->getValue();
2305       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2306         Swap = true;
2307     }
2308 
2309     if (Swap)
2310       std::swap(Base, Offset);
2311 
2312     AM = ISD::PRE_INC;
2313     return true;
2314   }
2315 
2316   // LDU/STU can only handle immediates that are a multiple of 4.
2317   if (VT != MVT::i64) {
2318     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
2319       return false;
2320   } else {
2321     // LDU/STU need an address with at least 4-byte alignment.
2322     if (Alignment < 4)
2323       return false;
2324 
2325     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
2326       return false;
2327   }
2328 
2329   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2330     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2331     // sext i32 to i64 when addr mode is r+i.
2332     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2333         LD->getExtensionType() == ISD::SEXTLOAD &&
2334         isa<ConstantSDNode>(Offset))
2335       return false;
2336   }
2337 
2338   AM = ISD::PRE_INC;
2339   return true;
2340 }
2341 
2342 //===----------------------------------------------------------------------===//
2343 //  LowerOperation implementation
2344 //===----------------------------------------------------------------------===//
2345 
2346 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2347 /// and LoOpFlags to the target MO flags.
2348 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2349                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2350                                const GlobalValue *GV = nullptr) {
2351   HiOpFlags = PPCII::MO_HA;
2352   LoOpFlags = PPCII::MO_LO;
2353 
2354   // Don't use the pic base if not in PIC relocation model.
2355   if (IsPIC) {
2356     HiOpFlags |= PPCII::MO_PIC_FLAG;
2357     LoOpFlags |= PPCII::MO_PIC_FLAG;
2358   }
2359 
2360   // If this is a reference to a global value that requires a non-lazy-ptr, make
2361   // sure that instruction lowering adds it.
2362   if (GV && Subtarget.hasLazyResolverStub(GV)) {
2363     HiOpFlags |= PPCII::MO_NLP_FLAG;
2364     LoOpFlags |= PPCII::MO_NLP_FLAG;
2365 
2366     if (GV->hasHiddenVisibility()) {
2367       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2368       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2369     }
2370   }
2371 }
2372 
2373 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2374                              SelectionDAG &DAG) {
2375   SDLoc DL(HiPart);
2376   EVT PtrVT = HiPart.getValueType();
2377   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2378 
2379   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2380   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2381 
2382   // With PIC, the first instruction is actually "GR+hi(&G)".
2383   if (isPIC)
2384     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2385                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2386 
2387   // Generate non-pic code that has direct accesses to the constant pool.
2388   // The address of the global is just (hi(&g)+lo(&g)).
2389   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2390 }
2391 
2392 static void setUsesTOCBasePtr(MachineFunction &MF) {
2393   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2394   FuncInfo->setUsesTOCBasePtr();
2395 }
2396 
2397 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2398   setUsesTOCBasePtr(DAG.getMachineFunction());
2399 }
2400 
2401 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2402                            SDValue GA) {
2403   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2404   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2405                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2406 
2407   SDValue Ops[] = { GA, Reg };
2408   return DAG.getMemIntrinsicNode(
2409       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2410       MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
2411       false, 0);
2412 }
2413 
2414 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2415                                              SelectionDAG &DAG) const {
2416   EVT PtrVT = Op.getValueType();
2417   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2418   const Constant *C = CP->getConstVal();
2419 
2420   // 64-bit SVR4 ABI code is always position-independent.
2421   // The actual address of the GlobalValue is stored in the TOC.
2422   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2423     setUsesTOCBasePtr(DAG);
2424     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2425     return getTOCEntry(DAG, SDLoc(CP), true, GA);
2426   }
2427 
2428   unsigned MOHiFlag, MOLoFlag;
2429   bool IsPIC = isPositionIndependent();
2430   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2431 
2432   if (IsPIC && Subtarget.isSVR4ABI()) {
2433     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2434                                            PPCII::MO_PIC_FLAG);
2435     return getTOCEntry(DAG, SDLoc(CP), false, GA);
2436   }
2437 
2438   SDValue CPIHi =
2439     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2440   SDValue CPILo =
2441     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2442   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2443 }
2444 
2445 // For 64-bit PowerPC, prefer the more compact relative encodings.
2446 // This trades 32 bits per jump table entry for one or two instructions
2447 // on the jump site.
2448 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2449   if (isJumpTableRelative())
2450     return MachineJumpTableInfo::EK_LabelDifference32;
2451 
2452   return TargetLowering::getJumpTableEncoding();
2453 }
2454 
2455 bool PPCTargetLowering::isJumpTableRelative() const {
2456   if (Subtarget.isPPC64())
2457     return true;
2458   return TargetLowering::isJumpTableRelative();
2459 }
2460 
2461 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2462                                                     SelectionDAG &DAG) const {
2463   if (!Subtarget.isPPC64())
2464     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2465 
2466   switch (getTargetMachine().getCodeModel()) {
2467   case CodeModel::Default:
2468   case CodeModel::Small:
2469   case CodeModel::Medium:
2470     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2471   default:
2472     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2473                        getPointerTy(DAG.getDataLayout()));
2474   }
2475 }
2476 
2477 const MCExpr *
2478 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2479                                                 unsigned JTI,
2480                                                 MCContext &Ctx) const {
2481   if (!Subtarget.isPPC64())
2482     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2483 
2484   switch (getTargetMachine().getCodeModel()) {
2485   case CodeModel::Default:
2486   case CodeModel::Small:
2487   case CodeModel::Medium:
2488     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2489   default:
2490     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2491   }
2492 }
2493 
2494 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2495   EVT PtrVT = Op.getValueType();
2496   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2497 
2498   // 64-bit SVR4 ABI code is always position-independent.
2499   // The actual address of the GlobalValue is stored in the TOC.
2500   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2501     setUsesTOCBasePtr(DAG);
2502     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2503     return getTOCEntry(DAG, SDLoc(JT), true, GA);
2504   }
2505 
2506   unsigned MOHiFlag, MOLoFlag;
2507   bool IsPIC = isPositionIndependent();
2508   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2509 
2510   if (IsPIC && Subtarget.isSVR4ABI()) {
2511     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2512                                         PPCII::MO_PIC_FLAG);
2513     return getTOCEntry(DAG, SDLoc(GA), false, GA);
2514   }
2515 
2516   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2517   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2518   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2519 }
2520 
2521 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2522                                              SelectionDAG &DAG) const {
2523   EVT PtrVT = Op.getValueType();
2524   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2525   const BlockAddress *BA = BASDN->getBlockAddress();
2526 
2527   // 64-bit SVR4 ABI code is always position-independent.
2528   // The actual BlockAddress is stored in the TOC.
2529   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2530     setUsesTOCBasePtr(DAG);
2531     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2532     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
2533   }
2534 
2535   unsigned MOHiFlag, MOLoFlag;
2536   bool IsPIC = isPositionIndependent();
2537   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2538   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2539   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2540   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2541 }
2542 
2543 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2544                                               SelectionDAG &DAG) const {
2545   // FIXME: TLS addresses currently use medium model code sequences,
2546   // which is the most useful form.  Eventually support for small and
2547   // large models could be added if users need it, at the cost of
2548   // additional complexity.
2549   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2550   if (DAG.getTarget().Options.EmulatedTLS)
2551     return LowerToTLSEmulatedModel(GA, DAG);
2552 
2553   SDLoc dl(GA);
2554   const GlobalValue *GV = GA->getGlobal();
2555   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2556   bool is64bit = Subtarget.isPPC64();
2557   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2558   PICLevel::Level picLevel = M->getPICLevel();
2559 
2560   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
2561 
2562   if (Model == TLSModel::LocalExec) {
2563     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2564                                                PPCII::MO_TPREL_HA);
2565     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2566                                                PPCII::MO_TPREL_LO);
2567     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2568                              : DAG.getRegister(PPC::R2, MVT::i32);
2569 
2570     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2571     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2572   }
2573 
2574   if (Model == TLSModel::InitialExec) {
2575     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2576     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2577                                                 PPCII::MO_TLS);
2578     SDValue GOTPtr;
2579     if (is64bit) {
2580       setUsesTOCBasePtr(DAG);
2581       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2582       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2583                            PtrVT, GOTReg, TGA);
2584     } else
2585       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2586     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2587                                    PtrVT, TGA, GOTPtr);
2588     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2589   }
2590 
2591   if (Model == TLSModel::GeneralDynamic) {
2592     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2593     SDValue GOTPtr;
2594     if (is64bit) {
2595       setUsesTOCBasePtr(DAG);
2596       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2597       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2598                                    GOTReg, TGA);
2599     } else {
2600       if (picLevel == PICLevel::SmallPIC)
2601         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2602       else
2603         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2604     }
2605     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2606                        GOTPtr, TGA, TGA);
2607   }
2608 
2609   if (Model == TLSModel::LocalDynamic) {
2610     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2611     SDValue GOTPtr;
2612     if (is64bit) {
2613       setUsesTOCBasePtr(DAG);
2614       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2615       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2616                            GOTReg, TGA);
2617     } else {
2618       if (picLevel == PICLevel::SmallPIC)
2619         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2620       else
2621         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2622     }
2623     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2624                                   PtrVT, GOTPtr, TGA, TGA);
2625     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2626                                       PtrVT, TLSAddr, TGA);
2627     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2628   }
2629 
2630   llvm_unreachable("Unknown TLS model!");
2631 }
2632 
2633 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2634                                               SelectionDAG &DAG) const {
2635   EVT PtrVT = Op.getValueType();
2636   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2637   SDLoc DL(GSDN);
2638   const GlobalValue *GV = GSDN->getGlobal();
2639 
2640   // 64-bit SVR4 ABI code is always position-independent.
2641   // The actual address of the GlobalValue is stored in the TOC.
2642   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2643     setUsesTOCBasePtr(DAG);
2644     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2645     return getTOCEntry(DAG, DL, true, GA);
2646   }
2647 
2648   unsigned MOHiFlag, MOLoFlag;
2649   bool IsPIC = isPositionIndependent();
2650   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2651 
2652   if (IsPIC && Subtarget.isSVR4ABI()) {
2653     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2654                                             GSDN->getOffset(),
2655                                             PPCII::MO_PIC_FLAG);
2656     return getTOCEntry(DAG, DL, false, GA);
2657   }
2658 
2659   SDValue GAHi =
2660     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2661   SDValue GALo =
2662     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2663 
2664   SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2665 
2666   // If the global reference is actually to a non-lazy-pointer, we have to do an
2667   // extra load to get the address of the global.
2668   if (MOHiFlag & PPCII::MO_NLP_FLAG)
2669     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2670   return Ptr;
2671 }
2672 
2673 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2674   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2675   SDLoc dl(Op);
2676 
2677   if (Op.getValueType() == MVT::v2i64) {
2678     // When the operands themselves are v2i64 values, we need to do something
2679     // special because VSX has no underlying comparison operations for these.
2680     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2681       // Equality can be handled by casting to the legal type for Altivec
2682       // comparisons, everything else needs to be expanded.
2683       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2684         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2685                  DAG.getSetCC(dl, MVT::v4i32,
2686                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2687                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2688                    CC));
2689       }
2690 
2691       return SDValue();
2692     }
2693 
2694     // We handle most of these in the usual way.
2695     return Op;
2696   }
2697 
2698   // If we're comparing for equality to zero, expose the fact that this is
2699   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2700   // fold the new nodes.
2701   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2702     return V;
2703 
2704   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2705     // Leave comparisons against 0 and -1 alone for now, since they're usually
2706     // optimized.  FIXME: revisit this when we can custom lower all setcc
2707     // optimizations.
2708     if (C->isAllOnesValue() || C->isNullValue())
2709       return SDValue();
2710   }
2711 
2712   // If we have an integer seteq/setne, turn it into a compare against zero
2713   // by xor'ing the rhs with the lhs, which is faster than setting a
2714   // condition register, reading it back out, and masking the correct bit.  The
2715   // normal approach here uses sub to do this instead of xor.  Using xor exposes
2716   // the result to other bit-twiddling opportunities.
2717   EVT LHSVT = Op.getOperand(0).getValueType();
2718   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2719     EVT VT = Op.getValueType();
2720     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2721                                 Op.getOperand(1));
2722     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2723   }
2724   return SDValue();
2725 }
2726 
2727 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2728   SDNode *Node = Op.getNode();
2729   EVT VT = Node->getValueType(0);
2730   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2731   SDValue InChain = Node->getOperand(0);
2732   SDValue VAListPtr = Node->getOperand(1);
2733   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2734   SDLoc dl(Node);
2735 
2736   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2737 
2738   // gpr_index
2739   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2740                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
2741   InChain = GprIndex.getValue(1);
2742 
2743   if (VT == MVT::i64) {
2744     // Check if GprIndex is even
2745     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2746                                  DAG.getConstant(1, dl, MVT::i32));
2747     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2748                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2749     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2750                                           DAG.getConstant(1, dl, MVT::i32));
2751     // Align GprIndex to be even if it isn't
2752     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2753                            GprIndex);
2754   }
2755 
2756   // fpr index is 1 byte after gpr
2757   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2758                                DAG.getConstant(1, dl, MVT::i32));
2759 
2760   // fpr
2761   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2762                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
2763   InChain = FprIndex.getValue(1);
2764 
2765   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2766                                        DAG.getConstant(8, dl, MVT::i32));
2767 
2768   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2769                                         DAG.getConstant(4, dl, MVT::i32));
2770 
2771   // areas
2772   SDValue OverflowArea =
2773       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
2774   InChain = OverflowArea.getValue(1);
2775 
2776   SDValue RegSaveArea =
2777       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
2778   InChain = RegSaveArea.getValue(1);
2779 
2780   // select overflow_area if index > 8
2781   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2782                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2783 
2784   // adjustment constant gpr_index * 4/8
2785   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2786                                     VT.isInteger() ? GprIndex : FprIndex,
2787                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2788                                                     MVT::i32));
2789 
2790   // OurReg = RegSaveArea + RegConstant
2791   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2792                                RegConstant);
2793 
2794   // Floating types are 32 bytes into RegSaveArea
2795   if (VT.isFloatingPoint())
2796     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2797                          DAG.getConstant(32, dl, MVT::i32));
2798 
2799   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2800   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2801                                    VT.isInteger() ? GprIndex : FprIndex,
2802                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2803                                                    MVT::i32));
2804 
2805   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2806                               VT.isInteger() ? VAListPtr : FprPtr,
2807                               MachinePointerInfo(SV), MVT::i8);
2808 
2809   // determine if we should load from reg_save_area or overflow_area
2810   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2811 
2812   // increase overflow_area by 4/8 if gpr/fpr > 8
2813   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2814                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
2815                                           dl, MVT::i32));
2816 
2817   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2818                              OverflowAreaPlusN);
2819 
2820   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
2821                               MachinePointerInfo(), MVT::i32);
2822 
2823   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
2824 }
2825 
2826 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
2827   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2828 
2829   // We have to copy the entire va_list struct:
2830   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2831   return DAG.getMemcpy(Op.getOperand(0), Op,
2832                        Op.getOperand(1), Op.getOperand(2),
2833                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2834                        false, MachinePointerInfo(), MachinePointerInfo());
2835 }
2836 
2837 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2838                                                   SelectionDAG &DAG) const {
2839   return Op.getOperand(0);
2840 }
2841 
2842 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2843                                                 SelectionDAG &DAG) const {
2844   SDValue Chain = Op.getOperand(0);
2845   SDValue Trmp = Op.getOperand(1); // trampoline
2846   SDValue FPtr = Op.getOperand(2); // nested function
2847   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2848   SDLoc dl(Op);
2849 
2850   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2851   bool isPPC64 = (PtrVT == MVT::i64);
2852   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
2853 
2854   TargetLowering::ArgListTy Args;
2855   TargetLowering::ArgListEntry Entry;
2856 
2857   Entry.Ty = IntPtrTy;
2858   Entry.Node = Trmp; Args.push_back(Entry);
2859 
2860   // TrampSize == (isPPC64 ? 48 : 40);
2861   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
2862                                isPPC64 ? MVT::i64 : MVT::i32);
2863   Args.push_back(Entry);
2864 
2865   Entry.Node = FPtr; Args.push_back(Entry);
2866   Entry.Node = Nest; Args.push_back(Entry);
2867 
2868   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2869   TargetLowering::CallLoweringInfo CLI(DAG);
2870   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2871       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2872       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
2873 
2874   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2875   return CallResult.second;
2876 }
2877 
2878 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2879   MachineFunction &MF = DAG.getMachineFunction();
2880   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2881   EVT PtrVT = getPointerTy(MF.getDataLayout());
2882 
2883   SDLoc dl(Op);
2884 
2885   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2886     // vastart just stores the address of the VarArgsFrameIndex slot into the
2887     // memory location argument.
2888     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2889     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2890     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2891                         MachinePointerInfo(SV));
2892   }
2893 
2894   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2895   // We suppose the given va_list is already allocated.
2896   //
2897   // typedef struct {
2898   //  char gpr;     /* index into the array of 8 GPRs
2899   //                 * stored in the register save area
2900   //                 * gpr=0 corresponds to r3,
2901   //                 * gpr=1 to r4, etc.
2902   //                 */
2903   //  char fpr;     /* index into the array of 8 FPRs
2904   //                 * stored in the register save area
2905   //                 * fpr=0 corresponds to f1,
2906   //                 * fpr=1 to f2, etc.
2907   //                 */
2908   //  char *overflow_arg_area;
2909   //                /* location on stack that holds
2910   //                 * the next overflow argument
2911   //                 */
2912   //  char *reg_save_area;
2913   //               /* where r3:r10 and f1:f8 (if saved)
2914   //                * are stored
2915   //                */
2916   // } va_list[1];
2917 
2918   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2919   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2920   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2921                                             PtrVT);
2922   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2923                                  PtrVT);
2924 
2925   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2926   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2927 
2928   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2929   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2930 
2931   uint64_t FPROffset = 1;
2932   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2933 
2934   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2935 
2936   // Store first byte : number of int regs
2937   SDValue firstStore =
2938       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
2939                         MachinePointerInfo(SV), MVT::i8);
2940   uint64_t nextOffset = FPROffset;
2941   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2942                                   ConstFPROffset);
2943 
2944   // Store second byte : number of float regs
2945   SDValue secondStore =
2946       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2947                         MachinePointerInfo(SV, nextOffset), MVT::i8);
2948   nextOffset += StackOffset;
2949   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2950 
2951   // Store second word : arguments given on stack
2952   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2953                                     MachinePointerInfo(SV, nextOffset));
2954   nextOffset += FrameOffset;
2955   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2956 
2957   // Store third word : arguments given in registers
2958   return DAG.getStore(thirdStore, dl, FR, nextPtr,
2959                       MachinePointerInfo(SV, nextOffset));
2960 }
2961 
2962 #include "PPCGenCallingConv.inc"
2963 
2964 // Function whose sole purpose is to kill compiler warnings
2965 // stemming from unused functions included from PPCGenCallingConv.inc.
2966 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2967   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2968 }
2969 
2970 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2971                                       CCValAssign::LocInfo &LocInfo,
2972                                       ISD::ArgFlagsTy &ArgFlags,
2973                                       CCState &State) {
2974   return true;
2975 }
2976 
2977 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2978                                              MVT &LocVT,
2979                                              CCValAssign::LocInfo &LocInfo,
2980                                              ISD::ArgFlagsTy &ArgFlags,
2981                                              CCState &State) {
2982   static const MCPhysReg ArgRegs[] = {
2983     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2984     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2985   };
2986   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2987 
2988   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2989 
2990   // Skip one register if the first unallocated register has an even register
2991   // number and there are still argument registers available which have not been
2992   // allocated yet. RegNum is actually an index into ArgRegs, which means we
2993   // need to skip a register if RegNum is odd.
2994   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2995     State.AllocateReg(ArgRegs[RegNum]);
2996   }
2997 
2998   // Always return false here, as this function only makes sure that the first
2999   // unallocated register has an odd register number and does not actually
3000   // allocate a register for the current argument.
3001   return false;
3002 }
3003 
3004 bool
3005 llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
3006                                                   MVT &LocVT,
3007                                                   CCValAssign::LocInfo &LocInfo,
3008                                                   ISD::ArgFlagsTy &ArgFlags,
3009                                                   CCState &State) {
3010   static const MCPhysReg ArgRegs[] = {
3011     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3012     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3013   };
3014   const unsigned NumArgRegs = array_lengthof(ArgRegs);
3015 
3016   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3017   int RegsLeft = NumArgRegs - RegNum;
3018 
3019   // Skip if there is not enough registers left for long double type (4 gpr regs
3020   // in soft float mode) and put long double argument on the stack.
3021   if (RegNum != NumArgRegs && RegsLeft < 4) {
3022     for (int i = 0; i < RegsLeft; i++) {
3023       State.AllocateReg(ArgRegs[RegNum + i]);
3024     }
3025   }
3026 
3027   return false;
3028 }
3029 
3030 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
3031                                                MVT &LocVT,
3032                                                CCValAssign::LocInfo &LocInfo,
3033                                                ISD::ArgFlagsTy &ArgFlags,
3034                                                CCState &State) {
3035   static const MCPhysReg ArgRegs[] = {
3036     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3037     PPC::F8
3038   };
3039 
3040   const unsigned NumArgRegs = array_lengthof(ArgRegs);
3041 
3042   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3043 
3044   // If there is only one Floating-point register left we need to put both f64
3045   // values of a split ppc_fp128 value on the stack.
3046   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
3047     State.AllocateReg(ArgRegs[RegNum]);
3048   }
3049 
3050   // Always return false here, as this function only makes sure that the two f64
3051   // values a ppc_fp128 value is split into are both passed in registers or both
3052   // passed on the stack and does not actually allocate a register for the
3053   // current argument.
3054   return false;
3055 }
3056 
3057 /// FPR - The set of FP registers that should be allocated for arguments,
3058 /// on Darwin.
3059 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3060                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3061                                 PPC::F11, PPC::F12, PPC::F13};
3062 
3063 /// QFPR - The set of QPX registers that should be allocated for arguments.
3064 static const MCPhysReg QFPR[] = {
3065     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
3066     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3067 
3068 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3069 /// the stack.
3070 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3071                                        unsigned PtrByteSize) {
3072   unsigned ArgSize = ArgVT.getStoreSize();
3073   if (Flags.isByVal())
3074     ArgSize = Flags.getByValSize();
3075 
3076   // Round up to multiples of the pointer size, except for array members,
3077   // which are always packed.
3078   if (!Flags.isInConsecutiveRegs())
3079     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3080 
3081   return ArgSize;
3082 }
3083 
3084 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3085 /// on the stack.
3086 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3087                                             ISD::ArgFlagsTy Flags,
3088                                             unsigned PtrByteSize) {
3089   unsigned Align = PtrByteSize;
3090 
3091   // Altivec parameters are padded to a 16 byte boundary.
3092   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3093       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3094       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3095       ArgVT == MVT::v1i128)
3096     Align = 16;
3097   // QPX vector types stored in double-precision are padded to a 32 byte
3098   // boundary.
3099   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3100     Align = 32;
3101 
3102   // ByVal parameters are aligned as requested.
3103   if (Flags.isByVal()) {
3104     unsigned BVAlign = Flags.getByValAlign();
3105     if (BVAlign > PtrByteSize) {
3106       if (BVAlign % PtrByteSize != 0)
3107           llvm_unreachable(
3108             "ByVal alignment is not a multiple of the pointer size");
3109 
3110       Align = BVAlign;
3111     }
3112   }
3113 
3114   // Array members are always packed to their original alignment.
3115   if (Flags.isInConsecutiveRegs()) {
3116     // If the array member was split into multiple registers, the first
3117     // needs to be aligned to the size of the full type.  (Except for
3118     // ppcf128, which is only aligned as its f64 components.)
3119     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3120       Align = OrigVT.getStoreSize();
3121     else
3122       Align = ArgVT.getStoreSize();
3123   }
3124 
3125   return Align;
3126 }
3127 
3128 /// CalculateStackSlotUsed - Return whether this argument will use its
3129 /// stack slot (instead of being passed in registers).  ArgOffset,
3130 /// AvailableFPRs, and AvailableVRs must hold the current argument
3131 /// position, and will be updated to account for this argument.
3132 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3133                                    ISD::ArgFlagsTy Flags,
3134                                    unsigned PtrByteSize,
3135                                    unsigned LinkageSize,
3136                                    unsigned ParamAreaSize,
3137                                    unsigned &ArgOffset,
3138                                    unsigned &AvailableFPRs,
3139                                    unsigned &AvailableVRs, bool HasQPX) {
3140   bool UseMemory = false;
3141 
3142   // Respect alignment of argument on the stack.
3143   unsigned Align =
3144     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3145   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3146   // If there's no space left in the argument save area, we must
3147   // use memory (this check also catches zero-sized arguments).
3148   if (ArgOffset >= LinkageSize + ParamAreaSize)
3149     UseMemory = true;
3150 
3151   // Allocate argument on the stack.
3152   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3153   if (Flags.isInConsecutiveRegsLast())
3154     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3155   // If we overran the argument save area, we must use memory
3156   // (this check catches arguments passed partially in memory)
3157   if (ArgOffset > LinkageSize + ParamAreaSize)
3158     UseMemory = true;
3159 
3160   // However, if the argument is actually passed in an FPR or a VR,
3161   // we don't use memory after all.
3162   if (!Flags.isByVal()) {
3163     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3164         // QPX registers overlap with the scalar FP registers.
3165         (HasQPX && (ArgVT == MVT::v4f32 ||
3166                     ArgVT == MVT::v4f64 ||
3167                     ArgVT == MVT::v4i1)))
3168       if (AvailableFPRs > 0) {
3169         --AvailableFPRs;
3170         return false;
3171       }
3172     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3173         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3174         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3175         ArgVT == MVT::v1i128)
3176       if (AvailableVRs > 0) {
3177         --AvailableVRs;
3178         return false;
3179       }
3180   }
3181 
3182   return UseMemory;
3183 }
3184 
3185 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3186 /// ensure minimum alignment required for target.
3187 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3188                                      unsigned NumBytes) {
3189   unsigned TargetAlign = Lowering->getStackAlignment();
3190   unsigned AlignMask = TargetAlign - 1;
3191   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3192   return NumBytes;
3193 }
3194 
3195 SDValue PPCTargetLowering::LowerFormalArguments(
3196     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3197     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3198     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3199   if (Subtarget.isSVR4ABI()) {
3200     if (Subtarget.isPPC64())
3201       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
3202                                          dl, DAG, InVals);
3203     else
3204       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
3205                                          dl, DAG, InVals);
3206   } else {
3207     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
3208                                        dl, DAG, InVals);
3209   }
3210 }
3211 
3212 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3213     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3214     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3215     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3216 
3217   // 32-bit SVR4 ABI Stack Frame Layout:
3218   //              +-----------------------------------+
3219   //        +-->  |            Back chain             |
3220   //        |     +-----------------------------------+
3221   //        |     | Floating-point register save area |
3222   //        |     +-----------------------------------+
3223   //        |     |    General register save area     |
3224   //        |     +-----------------------------------+
3225   //        |     |          CR save word             |
3226   //        |     +-----------------------------------+
3227   //        |     |         VRSAVE save word          |
3228   //        |     +-----------------------------------+
3229   //        |     |         Alignment padding         |
3230   //        |     +-----------------------------------+
3231   //        |     |     Vector register save area     |
3232   //        |     +-----------------------------------+
3233   //        |     |       Local variable space        |
3234   //        |     +-----------------------------------+
3235   //        |     |        Parameter list area        |
3236   //        |     +-----------------------------------+
3237   //        |     |           LR save word            |
3238   //        |     +-----------------------------------+
3239   // SP-->  +---  |            Back chain             |
3240   //              +-----------------------------------+
3241   //
3242   // Specifications:
3243   //   System V Application Binary Interface PowerPC Processor Supplement
3244   //   AltiVec Technology Programming Interface Manual
3245 
3246   MachineFunction &MF = DAG.getMachineFunction();
3247   MachineFrameInfo &MFI = MF.getFrameInfo();
3248   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3249 
3250   EVT PtrVT = getPointerTy(MF.getDataLayout());
3251   // Potential tail calls could cause overwriting of argument stack slots.
3252   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3253                        (CallConv == CallingConv::Fast));
3254   unsigned PtrByteSize = 4;
3255 
3256   // Assign locations to all of the incoming arguments.
3257   SmallVector<CCValAssign, 16> ArgLocs;
3258   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3259                  *DAG.getContext());
3260 
3261   // Reserve space for the linkage area on the stack.
3262   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3263   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3264   if (useSoftFloat())
3265     CCInfo.PreAnalyzeFormalArguments(Ins);
3266 
3267   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3268   CCInfo.clearWasPPCF128();
3269 
3270   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3271     CCValAssign &VA = ArgLocs[i];
3272 
3273     // Arguments stored in registers.
3274     if (VA.isRegLoc()) {
3275       const TargetRegisterClass *RC;
3276       EVT ValVT = VA.getValVT();
3277 
3278       switch (ValVT.getSimpleVT().SimpleTy) {
3279         default:
3280           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3281         case MVT::i1:
3282         case MVT::i32:
3283           RC = &PPC::GPRCRegClass;
3284           break;
3285         case MVT::f32:
3286           if (Subtarget.hasP8Vector())
3287             RC = &PPC::VSSRCRegClass;
3288           else
3289             RC = &PPC::F4RCRegClass;
3290           break;
3291         case MVT::f64:
3292           if (Subtarget.hasVSX())
3293             RC = &PPC::VSFRCRegClass;
3294           else
3295             RC = &PPC::F8RCRegClass;
3296           break;
3297         case MVT::v16i8:
3298         case MVT::v8i16:
3299         case MVT::v4i32:
3300           RC = &PPC::VRRCRegClass;
3301           break;
3302         case MVT::v4f32:
3303           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3304           break;
3305         case MVT::v2f64:
3306         case MVT::v2i64:
3307           RC = &PPC::VRRCRegClass;
3308           break;
3309         case MVT::v4f64:
3310           RC = &PPC::QFRCRegClass;
3311           break;
3312         case MVT::v4i1:
3313           RC = &PPC::QBRCRegClass;
3314           break;
3315       }
3316 
3317       // Transform the arguments stored in physical registers into virtual ones.
3318       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3319       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3320                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
3321 
3322       if (ValVT == MVT::i1)
3323         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3324 
3325       InVals.push_back(ArgValue);
3326     } else {
3327       // Argument stored in memory.
3328       assert(VA.isMemLoc());
3329 
3330       unsigned ArgSize = VA.getLocVT().getStoreSize();
3331       int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
3332                                      isImmutable);
3333 
3334       // Create load nodes to retrieve arguments from the stack.
3335       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3336       InVals.push_back(
3337           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3338     }
3339   }
3340 
3341   // Assign locations to all of the incoming aggregate by value arguments.
3342   // Aggregates passed by value are stored in the local variable space of the
3343   // caller's stack frame, right above the parameter list area.
3344   SmallVector<CCValAssign, 16> ByValArgLocs;
3345   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3346                       ByValArgLocs, *DAG.getContext());
3347 
3348   // Reserve stack space for the allocations in CCInfo.
3349   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3350 
3351   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3352 
3353   // Area that is at least reserved in the caller of this function.
3354   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3355   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3356 
3357   // Set the size that is at least reserved in caller of this function.  Tail
3358   // call optimized function's reserved stack space needs to be aligned so that
3359   // taking the difference between two stack areas will result in an aligned
3360   // stack.
3361   MinReservedArea =
3362       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3363   FuncInfo->setMinReservedArea(MinReservedArea);
3364 
3365   SmallVector<SDValue, 8> MemOps;
3366 
3367   // If the function takes variable number of arguments, make a frame index for
3368   // the start of the first vararg value... for expansion of llvm.va_start.
3369   if (isVarArg) {
3370     static const MCPhysReg GPArgRegs[] = {
3371       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3372       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3373     };
3374     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3375 
3376     static const MCPhysReg FPArgRegs[] = {
3377       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3378       PPC::F8
3379     };
3380     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3381 
3382     if (useSoftFloat())
3383        NumFPArgRegs = 0;
3384 
3385     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3386     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3387 
3388     // Make room for NumGPArgRegs and NumFPArgRegs.
3389     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3390                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3391 
3392     FuncInfo->setVarArgsStackOffset(
3393       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3394                             CCInfo.getNextStackOffset(), true));
3395 
3396     FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3397     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3398 
3399     // The fixed integer arguments of a variadic function are stored to the
3400     // VarArgsFrameIndex on the stack so that they may be loaded by
3401     // dereferencing the result of va_next.
3402     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3403       // Get an existing live-in vreg, or add a new one.
3404       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3405       if (!VReg)
3406         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3407 
3408       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3409       SDValue Store =
3410           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3411       MemOps.push_back(Store);
3412       // Increment the address by four for the next argument to store
3413       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3414       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3415     }
3416 
3417     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3418     // is set.
3419     // The double arguments are stored to the VarArgsFrameIndex
3420     // on the stack.
3421     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3422       // Get an existing live-in vreg, or add a new one.
3423       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3424       if (!VReg)
3425         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3426 
3427       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3428       SDValue Store =
3429           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3430       MemOps.push_back(Store);
3431       // Increment the address by eight for the next argument to store
3432       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3433                                          PtrVT);
3434       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3435     }
3436   }
3437 
3438   if (!MemOps.empty())
3439     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3440 
3441   return Chain;
3442 }
3443 
3444 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3445 // value to MVT::i64 and then truncate to the correct register size.
3446 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3447                                              EVT ObjectVT, SelectionDAG &DAG,
3448                                              SDValue ArgVal,
3449                                              const SDLoc &dl) const {
3450   if (Flags.isSExt())
3451     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3452                          DAG.getValueType(ObjectVT));
3453   else if (Flags.isZExt())
3454     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3455                          DAG.getValueType(ObjectVT));
3456 
3457   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3458 }
3459 
3460 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3461     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3462     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3463     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3464   // TODO: add description of PPC stack frame format, or at least some docs.
3465   //
3466   bool isELFv2ABI = Subtarget.isELFv2ABI();
3467   bool isLittleEndian = Subtarget.isLittleEndian();
3468   MachineFunction &MF = DAG.getMachineFunction();
3469   MachineFrameInfo &MFI = MF.getFrameInfo();
3470   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3471 
3472   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3473          "fastcc not supported on varargs functions");
3474 
3475   EVT PtrVT = getPointerTy(MF.getDataLayout());
3476   // Potential tail calls could cause overwriting of argument stack slots.
3477   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3478                        (CallConv == CallingConv::Fast));
3479   unsigned PtrByteSize = 8;
3480   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3481 
3482   static const MCPhysReg GPR[] = {
3483     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3484     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3485   };
3486   static const MCPhysReg VR[] = {
3487     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3488     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3489   };
3490 
3491   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3492   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3493   const unsigned Num_VR_Regs  = array_lengthof(VR);
3494   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3495 
3496   // Do a first pass over the arguments to determine whether the ABI
3497   // guarantees that our caller has allocated the parameter save area
3498   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3499   // in the ELFv2 ABI, it is true if this is a vararg function or if
3500   // any parameter is located in a stack slot.
3501 
3502   bool HasParameterArea = !isELFv2ABI || isVarArg;
3503   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3504   unsigned NumBytes = LinkageSize;
3505   unsigned AvailableFPRs = Num_FPR_Regs;
3506   unsigned AvailableVRs = Num_VR_Regs;
3507   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3508     if (Ins[i].Flags.isNest())
3509       continue;
3510 
3511     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3512                                PtrByteSize, LinkageSize, ParamAreaSize,
3513                                NumBytes, AvailableFPRs, AvailableVRs,
3514                                Subtarget.hasQPX()))
3515       HasParameterArea = true;
3516   }
3517 
3518   // Add DAG nodes to load the arguments or copy them out of registers.  On
3519   // entry to a function on PPC, the arguments start after the linkage area,
3520   // although the first ones are often in registers.
3521 
3522   unsigned ArgOffset = LinkageSize;
3523   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3524   unsigned &QFPR_idx = FPR_idx;
3525   SmallVector<SDValue, 8> MemOps;
3526   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3527   unsigned CurArgIdx = 0;
3528   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3529     SDValue ArgVal;
3530     bool needsLoad = false;
3531     EVT ObjectVT = Ins[ArgNo].VT;
3532     EVT OrigVT = Ins[ArgNo].ArgVT;
3533     unsigned ObjSize = ObjectVT.getStoreSize();
3534     unsigned ArgSize = ObjSize;
3535     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3536     if (Ins[ArgNo].isOrigArg()) {
3537       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3538       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3539     }
3540     // We re-align the argument offset for each argument, except when using the
3541     // fast calling convention, when we need to make sure we do that only when
3542     // we'll actually use a stack slot.
3543     unsigned CurArgOffset, Align;
3544     auto ComputeArgOffset = [&]() {
3545       /* Respect alignment of argument on the stack.  */
3546       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3547       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3548       CurArgOffset = ArgOffset;
3549     };
3550 
3551     if (CallConv != CallingConv::Fast) {
3552       ComputeArgOffset();
3553 
3554       /* Compute GPR index associated with argument offset.  */
3555       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3556       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3557     }
3558 
3559     // FIXME the codegen can be much improved in some cases.
3560     // We do not have to keep everything in memory.
3561     if (Flags.isByVal()) {
3562       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3563 
3564       if (CallConv == CallingConv::Fast)
3565         ComputeArgOffset();
3566 
3567       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3568       ObjSize = Flags.getByValSize();
3569       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3570       // Empty aggregate parameters do not take up registers.  Examples:
3571       //   struct { } a;
3572       //   union  { } b;
3573       //   int c[0];
3574       // etc.  However, we have to provide a place-holder in InVals, so
3575       // pretend we have an 8-byte item at the current address for that
3576       // purpose.
3577       if (!ObjSize) {
3578         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3579         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3580         InVals.push_back(FIN);
3581         continue;
3582       }
3583 
3584       // Create a stack object covering all stack doublewords occupied
3585       // by the argument.  If the argument is (fully or partially) on
3586       // the stack, or if the argument is fully in registers but the
3587       // caller has allocated the parameter save anyway, we can refer
3588       // directly to the caller's stack frame.  Otherwise, create a
3589       // local copy in our own frame.
3590       int FI;
3591       if (HasParameterArea ||
3592           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3593         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3594       else
3595         FI = MFI.CreateStackObject(ArgSize, Align, false);
3596       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3597 
3598       // Handle aggregates smaller than 8 bytes.
3599       if (ObjSize < PtrByteSize) {
3600         // The value of the object is its address, which differs from the
3601         // address of the enclosing doubleword on big-endian systems.
3602         SDValue Arg = FIN;
3603         if (!isLittleEndian) {
3604           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3605           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3606         }
3607         InVals.push_back(Arg);
3608 
3609         if (GPR_idx != Num_GPR_Regs) {
3610           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3611           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3612           SDValue Store;
3613 
3614           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3615             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3616                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
3617             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3618                                       MachinePointerInfo(&*FuncArg), ObjType);
3619           } else {
3620             // For sizes that don't fit a truncating store (3, 5, 6, 7),
3621             // store the whole register as-is to the parameter save area
3622             // slot.
3623             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3624                                  MachinePointerInfo(&*FuncArg));
3625           }
3626 
3627           MemOps.push_back(Store);
3628         }
3629         // Whether we copied from a register or not, advance the offset
3630         // into the parameter save area by a full doubleword.
3631         ArgOffset += PtrByteSize;
3632         continue;
3633       }
3634 
3635       // The value of the object is its address, which is the address of
3636       // its first stack doubleword.
3637       InVals.push_back(FIN);
3638 
3639       // Store whatever pieces of the object are in registers to memory.
3640       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3641         if (GPR_idx == Num_GPR_Regs)
3642           break;
3643 
3644         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3645         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3646         SDValue Addr = FIN;
3647         if (j) {
3648           SDValue Off = DAG.getConstant(j, dl, PtrVT);
3649           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3650         }
3651         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3652                                      MachinePointerInfo(&*FuncArg, j));
3653         MemOps.push_back(Store);
3654         ++GPR_idx;
3655       }
3656       ArgOffset += ArgSize;
3657       continue;
3658     }
3659 
3660     switch (ObjectVT.getSimpleVT().SimpleTy) {
3661     default: llvm_unreachable("Unhandled argument type!");
3662     case MVT::i1:
3663     case MVT::i32:
3664     case MVT::i64:
3665       if (Flags.isNest()) {
3666         // The 'nest' parameter, if any, is passed in R11.
3667         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3668         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3669 
3670         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3671           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3672 
3673         break;
3674       }
3675 
3676       // These can be scalar arguments or elements of an integer array type
3677       // passed directly.  Clang may use those instead of "byval" aggregate
3678       // types to avoid forcing arguments to memory unnecessarily.
3679       if (GPR_idx != Num_GPR_Regs) {
3680         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3681         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3682 
3683         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3684           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3685           // value to MVT::i64 and then truncate to the correct register size.
3686           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3687       } else {
3688         if (CallConv == CallingConv::Fast)
3689           ComputeArgOffset();
3690 
3691         needsLoad = true;
3692         ArgSize = PtrByteSize;
3693       }
3694       if (CallConv != CallingConv::Fast || needsLoad)
3695         ArgOffset += 8;
3696       break;
3697 
3698     case MVT::f32:
3699     case MVT::f64:
3700       // These can be scalar arguments or elements of a float array type
3701       // passed directly.  The latter are used to implement ELFv2 homogenous
3702       // float aggregates.
3703       if (FPR_idx != Num_FPR_Regs) {
3704         unsigned VReg;
3705 
3706         if (ObjectVT == MVT::f32)
3707           VReg = MF.addLiveIn(FPR[FPR_idx],
3708                               Subtarget.hasP8Vector()
3709                                   ? &PPC::VSSRCRegClass
3710                                   : &PPC::F4RCRegClass);
3711         else
3712           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3713                                                 ? &PPC::VSFRCRegClass
3714                                                 : &PPC::F8RCRegClass);
3715 
3716         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3717         ++FPR_idx;
3718       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3719         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3720         // once we support fp <-> gpr moves.
3721 
3722         // This can only ever happen in the presence of f32 array types,
3723         // since otherwise we never run out of FPRs before running out
3724         // of GPRs.
3725         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3726         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3727 
3728         if (ObjectVT == MVT::f32) {
3729           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3730             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3731                                  DAG.getConstant(32, dl, MVT::i32));
3732           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3733         }
3734 
3735         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3736       } else {
3737         if (CallConv == CallingConv::Fast)
3738           ComputeArgOffset();
3739 
3740         needsLoad = true;
3741       }
3742 
3743       // When passing an array of floats, the array occupies consecutive
3744       // space in the argument area; only round up to the next doubleword
3745       // at the end of the array.  Otherwise, each float takes 8 bytes.
3746       if (CallConv != CallingConv::Fast || needsLoad) {
3747         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3748         ArgOffset += ArgSize;
3749         if (Flags.isInConsecutiveRegsLast())
3750           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3751       }
3752       break;
3753     case MVT::v4f32:
3754     case MVT::v4i32:
3755     case MVT::v8i16:
3756     case MVT::v16i8:
3757     case MVT::v2f64:
3758     case MVT::v2i64:
3759     case MVT::v1i128:
3760       if (!Subtarget.hasQPX()) {
3761       // These can be scalar arguments or elements of a vector array type
3762       // passed directly.  The latter are used to implement ELFv2 homogenous
3763       // vector aggregates.
3764       if (VR_idx != Num_VR_Regs) {
3765         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3766         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3767         ++VR_idx;
3768       } else {
3769         if (CallConv == CallingConv::Fast)
3770           ComputeArgOffset();
3771 
3772         needsLoad = true;
3773       }
3774       if (CallConv != CallingConv::Fast || needsLoad)
3775         ArgOffset += 16;
3776       break;
3777       } // not QPX
3778 
3779       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3780              "Invalid QPX parameter type");
3781       /* fall through */
3782 
3783     case MVT::v4f64:
3784     case MVT::v4i1:
3785       // QPX vectors are treated like their scalar floating-point subregisters
3786       // (except that they're larger).
3787       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3788       if (QFPR_idx != Num_QFPR_Regs) {
3789         const TargetRegisterClass *RC;
3790         switch (ObjectVT.getSimpleVT().SimpleTy) {
3791         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3792         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3793         default:         RC = &PPC::QBRCRegClass; break;
3794         }
3795 
3796         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3797         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3798         ++QFPR_idx;
3799       } else {
3800         if (CallConv == CallingConv::Fast)
3801           ComputeArgOffset();
3802         needsLoad = true;
3803       }
3804       if (CallConv != CallingConv::Fast || needsLoad)
3805         ArgOffset += Sz;
3806       break;
3807     }
3808 
3809     // We need to load the argument to a virtual register if we determined
3810     // above that we ran out of physical registers of the appropriate type.
3811     if (needsLoad) {
3812       if (ObjSize < ArgSize && !isLittleEndian)
3813         CurArgOffset += ArgSize - ObjSize;
3814       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3815       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3816       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3817     }
3818 
3819     InVals.push_back(ArgVal);
3820   }
3821 
3822   // Area that is at least reserved in the caller of this function.
3823   unsigned MinReservedArea;
3824   if (HasParameterArea)
3825     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3826   else
3827     MinReservedArea = LinkageSize;
3828 
3829   // Set the size that is at least reserved in caller of this function.  Tail
3830   // call optimized functions' reserved stack space needs to be aligned so that
3831   // taking the difference between two stack areas will result in an aligned
3832   // stack.
3833   MinReservedArea =
3834       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3835   FuncInfo->setMinReservedArea(MinReservedArea);
3836 
3837   // If the function takes variable number of arguments, make a frame index for
3838   // the start of the first vararg value... for expansion of llvm.va_start.
3839   if (isVarArg) {
3840     int Depth = ArgOffset;
3841 
3842     FuncInfo->setVarArgsFrameIndex(
3843       MFI.CreateFixedObject(PtrByteSize, Depth, true));
3844     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3845 
3846     // If this function is vararg, store any remaining integer argument regs
3847     // to their spots on the stack so that they may be loaded by dereferencing
3848     // the result of va_next.
3849     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3850          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3851       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3852       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3853       SDValue Store =
3854           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3855       MemOps.push_back(Store);
3856       // Increment the address by four for the next argument to store
3857       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3858       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3859     }
3860   }
3861 
3862   if (!MemOps.empty())
3863     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3864 
3865   return Chain;
3866 }
3867 
3868 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
3869     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3870     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3871     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3872   // TODO: add description of PPC stack frame format, or at least some docs.
3873   //
3874   MachineFunction &MF = DAG.getMachineFunction();
3875   MachineFrameInfo &MFI = MF.getFrameInfo();
3876   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3877 
3878   EVT PtrVT = getPointerTy(MF.getDataLayout());
3879   bool isPPC64 = PtrVT == MVT::i64;
3880   // Potential tail calls could cause overwriting of argument stack slots.
3881   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3882                        (CallConv == CallingConv::Fast));
3883   unsigned PtrByteSize = isPPC64 ? 8 : 4;
3884   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3885   unsigned ArgOffset = LinkageSize;
3886   // Area that is at least reserved in caller of this function.
3887   unsigned MinReservedArea = ArgOffset;
3888 
3889   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
3890     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3891     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3892   };
3893   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
3894     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3895     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3896   };
3897   static const MCPhysReg VR[] = {
3898     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3899     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3900   };
3901 
3902   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3903   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3904   const unsigned Num_VR_Regs  = array_lengthof( VR);
3905 
3906   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3907 
3908   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3909 
3910   // In 32-bit non-varargs functions, the stack space for vectors is after the
3911   // stack space for non-vectors.  We do not use this space unless we have
3912   // too many vectors to fit in registers, something that only occurs in
3913   // constructed examples:), but we have to walk the arglist to figure
3914   // that out...for the pathological case, compute VecArgOffset as the
3915   // start of the vector parameter area.  Computing VecArgOffset is the
3916   // entire point of the following loop.
3917   unsigned VecArgOffset = ArgOffset;
3918   if (!isVarArg && !isPPC64) {
3919     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3920          ++ArgNo) {
3921       EVT ObjectVT = Ins[ArgNo].VT;
3922       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3923 
3924       if (Flags.isByVal()) {
3925         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3926         unsigned ObjSize = Flags.getByValSize();
3927         unsigned ArgSize =
3928                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3929         VecArgOffset += ArgSize;
3930         continue;
3931       }
3932 
3933       switch(ObjectVT.getSimpleVT().SimpleTy) {
3934       default: llvm_unreachable("Unhandled argument type!");
3935       case MVT::i1:
3936       case MVT::i32:
3937       case MVT::f32:
3938         VecArgOffset += 4;
3939         break;
3940       case MVT::i64:  // PPC64
3941       case MVT::f64:
3942         // FIXME: We are guaranteed to be !isPPC64 at this point.
3943         // Does MVT::i64 apply?
3944         VecArgOffset += 8;
3945         break;
3946       case MVT::v4f32:
3947       case MVT::v4i32:
3948       case MVT::v8i16:
3949       case MVT::v16i8:
3950         // Nothing to do, we're only looking at Nonvector args here.
3951         break;
3952       }
3953     }
3954   }
3955   // We've found where the vector parameter area in memory is.  Skip the
3956   // first 12 parameters; these don't use that memory.
3957   VecArgOffset = ((VecArgOffset+15)/16)*16;
3958   VecArgOffset += 12*16;
3959 
3960   // Add DAG nodes to load the arguments or copy them out of registers.  On
3961   // entry to a function on PPC, the arguments start after the linkage area,
3962   // although the first ones are often in registers.
3963 
3964   SmallVector<SDValue, 8> MemOps;
3965   unsigned nAltivecParamsAtEnd = 0;
3966   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3967   unsigned CurArgIdx = 0;
3968   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3969     SDValue ArgVal;
3970     bool needsLoad = false;
3971     EVT ObjectVT = Ins[ArgNo].VT;
3972     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3973     unsigned ArgSize = ObjSize;
3974     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3975     if (Ins[ArgNo].isOrigArg()) {
3976       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3977       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3978     }
3979     unsigned CurArgOffset = ArgOffset;
3980 
3981     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3982     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3983         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3984       if (isVarArg || isPPC64) {
3985         MinReservedArea = ((MinReservedArea+15)/16)*16;
3986         MinReservedArea += CalculateStackSlotSize(ObjectVT,
3987                                                   Flags,
3988                                                   PtrByteSize);
3989       } else  nAltivecParamsAtEnd++;
3990     } else
3991       // Calculate min reserved area.
3992       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3993                                                 Flags,
3994                                                 PtrByteSize);
3995 
3996     // FIXME the codegen can be much improved in some cases.
3997     // We do not have to keep everything in memory.
3998     if (Flags.isByVal()) {
3999       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4000 
4001       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4002       ObjSize = Flags.getByValSize();
4003       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4004       // Objects of size 1 and 2 are right justified, everything else is
4005       // left justified.  This means the memory address is adjusted forwards.
4006       if (ObjSize==1 || ObjSize==2) {
4007         CurArgOffset = CurArgOffset + (4 - ObjSize);
4008       }
4009       // The value of the object is its address.
4010       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4011       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4012       InVals.push_back(FIN);
4013       if (ObjSize==1 || ObjSize==2) {
4014         if (GPR_idx != Num_GPR_Regs) {
4015           unsigned VReg;
4016           if (isPPC64)
4017             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4018           else
4019             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4020           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4021           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4022           SDValue Store =
4023               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4024                                 MachinePointerInfo(&*FuncArg), ObjType);
4025           MemOps.push_back(Store);
4026           ++GPR_idx;
4027         }
4028 
4029         ArgOffset += PtrByteSize;
4030 
4031         continue;
4032       }
4033       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4034         // Store whatever pieces of the object are in registers
4035         // to memory.  ArgOffset will be the address of the beginning
4036         // of the object.
4037         if (GPR_idx != Num_GPR_Regs) {
4038           unsigned VReg;
4039           if (isPPC64)
4040             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4041           else
4042             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4043           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4044           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4045           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4046           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4047                                        MachinePointerInfo(&*FuncArg, j));
4048           MemOps.push_back(Store);
4049           ++GPR_idx;
4050           ArgOffset += PtrByteSize;
4051         } else {
4052           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4053           break;
4054         }
4055       }
4056       continue;
4057     }
4058 
4059     switch (ObjectVT.getSimpleVT().SimpleTy) {
4060     default: llvm_unreachable("Unhandled argument type!");
4061     case MVT::i1:
4062     case MVT::i32:
4063       if (!isPPC64) {
4064         if (GPR_idx != Num_GPR_Regs) {
4065           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4066           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4067 
4068           if (ObjectVT == MVT::i1)
4069             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4070 
4071           ++GPR_idx;
4072         } else {
4073           needsLoad = true;
4074           ArgSize = PtrByteSize;
4075         }
4076         // All int arguments reserve stack space in the Darwin ABI.
4077         ArgOffset += PtrByteSize;
4078         break;
4079       }
4080       LLVM_FALLTHROUGH;
4081     case MVT::i64:  // PPC64
4082       if (GPR_idx != Num_GPR_Regs) {
4083         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4084         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4085 
4086         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4087           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4088           // value to MVT::i64 and then truncate to the correct register size.
4089           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4090 
4091         ++GPR_idx;
4092       } else {
4093         needsLoad = true;
4094         ArgSize = PtrByteSize;
4095       }
4096       // All int arguments reserve stack space in the Darwin ABI.
4097       ArgOffset += 8;
4098       break;
4099 
4100     case MVT::f32:
4101     case MVT::f64:
4102       // Every 4 bytes of argument space consumes one of the GPRs available for
4103       // argument passing.
4104       if (GPR_idx != Num_GPR_Regs) {
4105         ++GPR_idx;
4106         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4107           ++GPR_idx;
4108       }
4109       if (FPR_idx != Num_FPR_Regs) {
4110         unsigned VReg;
4111 
4112         if (ObjectVT == MVT::f32)
4113           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4114         else
4115           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4116 
4117         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4118         ++FPR_idx;
4119       } else {
4120         needsLoad = true;
4121       }
4122 
4123       // All FP arguments reserve stack space in the Darwin ABI.
4124       ArgOffset += isPPC64 ? 8 : ObjSize;
4125       break;
4126     case MVT::v4f32:
4127     case MVT::v4i32:
4128     case MVT::v8i16:
4129     case MVT::v16i8:
4130       // Note that vector arguments in registers don't reserve stack space,
4131       // except in varargs functions.
4132       if (VR_idx != Num_VR_Regs) {
4133         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4134         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4135         if (isVarArg) {
4136           while ((ArgOffset % 16) != 0) {
4137             ArgOffset += PtrByteSize;
4138             if (GPR_idx != Num_GPR_Regs)
4139               GPR_idx++;
4140           }
4141           ArgOffset += 16;
4142           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4143         }
4144         ++VR_idx;
4145       } else {
4146         if (!isVarArg && !isPPC64) {
4147           // Vectors go after all the nonvectors.
4148           CurArgOffset = VecArgOffset;
4149           VecArgOffset += 16;
4150         } else {
4151           // Vectors are aligned.
4152           ArgOffset = ((ArgOffset+15)/16)*16;
4153           CurArgOffset = ArgOffset;
4154           ArgOffset += 16;
4155         }
4156         needsLoad = true;
4157       }
4158       break;
4159     }
4160 
4161     // We need to load the argument to a virtual register if we determined above
4162     // that we ran out of physical registers of the appropriate type.
4163     if (needsLoad) {
4164       int FI = MFI.CreateFixedObject(ObjSize,
4165                                      CurArgOffset + (ArgSize - ObjSize),
4166                                      isImmutable);
4167       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4168       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4169     }
4170 
4171     InVals.push_back(ArgVal);
4172   }
4173 
4174   // Allow for Altivec parameters at the end, if needed.
4175   if (nAltivecParamsAtEnd) {
4176     MinReservedArea = ((MinReservedArea+15)/16)*16;
4177     MinReservedArea += 16*nAltivecParamsAtEnd;
4178   }
4179 
4180   // Area that is at least reserved in the caller of this function.
4181   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4182 
4183   // Set the size that is at least reserved in caller of this function.  Tail
4184   // call optimized functions' reserved stack space needs to be aligned so that
4185   // taking the difference between two stack areas will result in an aligned
4186   // stack.
4187   MinReservedArea =
4188       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4189   FuncInfo->setMinReservedArea(MinReservedArea);
4190 
4191   // If the function takes variable number of arguments, make a frame index for
4192   // the start of the first vararg value... for expansion of llvm.va_start.
4193   if (isVarArg) {
4194     int Depth = ArgOffset;
4195 
4196     FuncInfo->setVarArgsFrameIndex(
4197       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4198                             Depth, true));
4199     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4200 
4201     // If this function is vararg, store any remaining integer argument regs
4202     // to their spots on the stack so that they may be loaded by dereferencing
4203     // the result of va_next.
4204     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4205       unsigned VReg;
4206 
4207       if (isPPC64)
4208         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4209       else
4210         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4211 
4212       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4213       SDValue Store =
4214           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4215       MemOps.push_back(Store);
4216       // Increment the address by four for the next argument to store
4217       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4218       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4219     }
4220   }
4221 
4222   if (!MemOps.empty())
4223     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4224 
4225   return Chain;
4226 }
4227 
4228 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4229 /// adjusted to accommodate the arguments for the tailcall.
4230 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4231                                    unsigned ParamSize) {
4232 
4233   if (!isTailCall) return 0;
4234 
4235   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4236   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4237   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4238   // Remember only if the new adjustement is bigger.
4239   if (SPDiff < FI->getTailCallSPDelta())
4240     FI->setTailCallSPDelta(SPDiff);
4241 
4242   return SPDiff;
4243 }
4244 
4245 static bool isFunctionGlobalAddress(SDValue Callee);
4246 
4247 static bool
4248 resideInSameSection(const Function *Caller, SDValue Callee,
4249                     const TargetMachine &TM) {
4250   // If !G, Callee can be an external symbol.
4251   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4252   if (!G)
4253     return false;
4254 
4255   const GlobalValue *GV = G->getGlobal();
4256   if (!GV->isStrongDefinitionForLinker())
4257     return false;
4258 
4259   // Any explicitly-specified sections and section prefixes must also match.
4260   // Also, if we're using -ffunction-sections, then each function is always in
4261   // a different section (the same is true for COMDAT functions).
4262   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4263       GV->getSection() != Caller->getSection())
4264     return false;
4265   if (const auto *F = dyn_cast<Function>(GV)) {
4266     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4267       return false;
4268   }
4269 
4270   // If the callee might be interposed, then we can't assume the ultimate call
4271   // target will be in the same section. Even in cases where we can assume that
4272   // interposition won't happen, in any case where the linker might insert a
4273   // stub to allow for interposition, we must generate code as though
4274   // interposition might occur. To understand why this matters, consider a
4275   // situation where: a -> b -> c where the arrows indicate calls. b and c are
4276   // in the same section, but a is in a different module (i.e. has a different
4277   // TOC base pointer). If the linker allows for interposition between b and c,
4278   // then it will generate a stub for the call edge between b and c which will
4279   // save the TOC pointer into the designated stack slot allocated by b. If we
4280   // return true here, and therefore allow a tail call between b and c, that
4281   // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4282   // pointer into the stack slot allocated by a (where the a -> b stub saved
4283   // a's TOC base pointer). If we're not considering a tail call, but rather,
4284   // whether a nop is needed after the call instruction in b, because the linker
4285   // will insert a stub, it might complain about a missing nop if we omit it
4286   // (although many don't complain in this case).
4287   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4288     return false;
4289 
4290   return true;
4291 }
4292 
4293 static bool
4294 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4295                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4296   assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
4297 
4298   const unsigned PtrByteSize = 8;
4299   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4300 
4301   static const MCPhysReg GPR[] = {
4302     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4303     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4304   };
4305   static const MCPhysReg VR[] = {
4306     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4307     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4308   };
4309 
4310   const unsigned NumGPRs = array_lengthof(GPR);
4311   const unsigned NumFPRs = 13;
4312   const unsigned NumVRs = array_lengthof(VR);
4313   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4314 
4315   unsigned NumBytes = LinkageSize;
4316   unsigned AvailableFPRs = NumFPRs;
4317   unsigned AvailableVRs = NumVRs;
4318 
4319   for (const ISD::OutputArg& Param : Outs) {
4320     if (Param.Flags.isNest()) continue;
4321 
4322     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4323                                PtrByteSize, LinkageSize, ParamAreaSize,
4324                                NumBytes, AvailableFPRs, AvailableVRs,
4325                                Subtarget.hasQPX()))
4326       return true;
4327   }
4328   return false;
4329 }
4330 
4331 static bool
4332 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
4333   if (CS->arg_size() != CallerFn->arg_size())
4334     return false;
4335 
4336   ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
4337   ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end();
4338   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4339 
4340   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4341     const Value* CalleeArg = *CalleeArgIter;
4342     const Value* CallerArg = &(*CallerArgIter);
4343     if (CalleeArg == CallerArg)
4344       continue;
4345 
4346     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4347     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4348     //      }
4349     // 1st argument of callee is undef and has the same type as caller.
4350     if (CalleeArg->getType() == CallerArg->getType() &&
4351         isa<UndefValue>(CalleeArg))
4352       continue;
4353 
4354     return false;
4355   }
4356 
4357   return true;
4358 }
4359 
4360 bool
4361 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4362                                     SDValue Callee,
4363                                     CallingConv::ID CalleeCC,
4364                                     ImmutableCallSite *CS,
4365                                     bool isVarArg,
4366                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4367                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4368                                     SelectionDAG& DAG) const {
4369   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4370 
4371   if (DisableSCO && !TailCallOpt) return false;
4372 
4373   // Variadic argument functions are not supported.
4374   if (isVarArg) return false;
4375 
4376   MachineFunction &MF = DAG.getMachineFunction();
4377   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4378 
4379   // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
4380   // the same calling convention
4381   if (CallerCC != CalleeCC) return false;
4382 
4383   // SCO support C calling convention
4384   if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
4385     return false;
4386 
4387   // Caller contains any byval parameter is not supported.
4388   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4389     return false;
4390 
4391   // Callee contains any byval parameter is not supported, too.
4392   // Note: This is a quick work around, because in some cases, e.g.
4393   // caller's stack size > callee's stack size, we are still able to apply
4394   // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
4395   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4396     return false;
4397 
4398   // No TCO/SCO on indirect call because Caller have to restore its TOC
4399   if (!isFunctionGlobalAddress(Callee) &&
4400       !isa<ExternalSymbolSDNode>(Callee))
4401     return false;
4402 
4403   // Check if Callee resides in the same section, because for now, PPC64 SVR4
4404   // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
4405   // section.
4406   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4407   if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
4408     return false;
4409 
4410   // TCO allows altering callee ABI, so we don't have to check further.
4411   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4412     return true;
4413 
4414   if (DisableSCO) return false;
4415 
4416   // If callee use the same argument list that caller is using, then we can
4417   // apply SCO on this case. If it is not, then we need to check if callee needs
4418   // stack for passing arguments.
4419   if (!hasSameArgumentList(MF.getFunction(), CS) &&
4420       needStackSlotPassParameters(Subtarget, Outs)) {
4421     return false;
4422   }
4423 
4424   return true;
4425 }
4426 
4427 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4428 /// for tail call optimization. Targets which want to do tail call
4429 /// optimization should implement this function.
4430 bool
4431 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4432                                                      CallingConv::ID CalleeCC,
4433                                                      bool isVarArg,
4434                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4435                                                      SelectionDAG& DAG) const {
4436   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4437     return false;
4438 
4439   // Variable argument functions are not supported.
4440   if (isVarArg)
4441     return false;
4442 
4443   MachineFunction &MF = DAG.getMachineFunction();
4444   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4445   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4446     // Functions containing by val parameters are not supported.
4447     for (unsigned i = 0; i != Ins.size(); i++) {
4448        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4449        if (Flags.isByVal()) return false;
4450     }
4451 
4452     // Non-PIC/GOT tail calls are supported.
4453     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4454       return true;
4455 
4456     // At the moment we can only do local tail calls (in same module, hidden
4457     // or protected) if we are generating PIC.
4458     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4459       return G->getGlobal()->hasHiddenVisibility()
4460           || G->getGlobal()->hasProtectedVisibility();
4461   }
4462 
4463   return false;
4464 }
4465 
4466 /// isCallCompatibleAddress - Return the immediate to use if the specified
4467 /// 32-bit value is representable in the immediate field of a BxA instruction.
4468 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4469   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4470   if (!C) return nullptr;
4471 
4472   int Addr = C->getZExtValue();
4473   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4474       SignExtend32<26>(Addr) != Addr)
4475     return nullptr;  // Top 6 bits have to be sext of immediate.
4476 
4477   return DAG
4478       .getConstant(
4479           (int)C->getZExtValue() >> 2, SDLoc(Op),
4480           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4481       .getNode();
4482 }
4483 
4484 namespace {
4485 
4486 struct TailCallArgumentInfo {
4487   SDValue Arg;
4488   SDValue FrameIdxOp;
4489   int FrameIdx = 0;
4490 
4491   TailCallArgumentInfo() = default;
4492 };
4493 
4494 } // end anonymous namespace
4495 
4496 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4497 static void StoreTailCallArgumentsToStackSlot(
4498     SelectionDAG &DAG, SDValue Chain,
4499     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4500     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4501   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4502     SDValue Arg = TailCallArgs[i].Arg;
4503     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4504     int FI = TailCallArgs[i].FrameIdx;
4505     // Store relative to framepointer.
4506     MemOpChains.push_back(DAG.getStore(
4507         Chain, dl, Arg, FIN,
4508         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4509   }
4510 }
4511 
4512 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4513 /// the appropriate stack slot for the tail call optimized function call.
4514 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4515                                              SDValue OldRetAddr, SDValue OldFP,
4516                                              int SPDiff, const SDLoc &dl) {
4517   if (SPDiff) {
4518     // Calculate the new stack slot for the return address.
4519     MachineFunction &MF = DAG.getMachineFunction();
4520     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4521     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4522     bool isPPC64 = Subtarget.isPPC64();
4523     int SlotSize = isPPC64 ? 8 : 4;
4524     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4525     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4526                                                          NewRetAddrLoc, true);
4527     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4528     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4529     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4530                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4531 
4532     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4533     // slot as the FP is never overwritten.
4534     if (Subtarget.isDarwinABI()) {
4535       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4536       int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4537                                                          true);
4538       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4539       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4540                            MachinePointerInfo::getFixedStack(
4541                                DAG.getMachineFunction(), NewFPIdx));
4542     }
4543   }
4544   return Chain;
4545 }
4546 
4547 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4548 /// the position of the argument.
4549 static void
4550 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4551                          SDValue Arg, int SPDiff, unsigned ArgOffset,
4552                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4553   int Offset = ArgOffset + SPDiff;
4554   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4555   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4556   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4557   SDValue FIN = DAG.getFrameIndex(FI, VT);
4558   TailCallArgumentInfo Info;
4559   Info.Arg = Arg;
4560   Info.FrameIdxOp = FIN;
4561   Info.FrameIdx = FI;
4562   TailCallArguments.push_back(Info);
4563 }
4564 
4565 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4566 /// stack slot. Returns the chain as result and the loaded frame pointers in
4567 /// LROpOut/FPOpout. Used when tail calling.
4568 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4569     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4570     SDValue &FPOpOut, const SDLoc &dl) const {
4571   if (SPDiff) {
4572     // Load the LR and FP stack slot for later adjusting.
4573     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4574     LROpOut = getReturnAddrFrameIndex(DAG);
4575     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4576     Chain = SDValue(LROpOut.getNode(), 1);
4577 
4578     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4579     // slot as the FP is never overwritten.
4580     if (Subtarget.isDarwinABI()) {
4581       FPOpOut = getFramePointerFrameIndex(DAG);
4582       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4583       Chain = SDValue(FPOpOut.getNode(), 1);
4584     }
4585   }
4586   return Chain;
4587 }
4588 
4589 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4590 /// by "Src" to address "Dst" of size "Size".  Alignment information is
4591 /// specified by the specific parameter attribute. The copy will be passed as
4592 /// a byval function parameter.
4593 /// Sometimes what we are copying is the end of a larger object, the part that
4594 /// does not fit in registers.
4595 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4596                                          SDValue Chain, ISD::ArgFlagsTy Flags,
4597                                          SelectionDAG &DAG, const SDLoc &dl) {
4598   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4599   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4600                        false, false, false, MachinePointerInfo(),
4601                        MachinePointerInfo());
4602 }
4603 
4604 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4605 /// tail calls.
4606 static void LowerMemOpCallTo(
4607     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4608     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4609     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4610     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4611   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4612   if (!isTailCall) {
4613     if (isVector) {
4614       SDValue StackPtr;
4615       if (isPPC64)
4616         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4617       else
4618         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4619       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4620                            DAG.getConstant(ArgOffset, dl, PtrVT));
4621     }
4622     MemOpChains.push_back(
4623         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4624     // Calculate and remember argument location.
4625   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4626                                   TailCallArguments);
4627 }
4628 
4629 static void
4630 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4631                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4632                 SDValue FPOp,
4633                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4634   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4635   // might overwrite each other in case of tail call optimization.
4636   SmallVector<SDValue, 8> MemOpChains2;
4637   // Do not flag preceding copytoreg stuff together with the following stuff.
4638   InFlag = SDValue();
4639   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4640                                     MemOpChains2, dl);
4641   if (!MemOpChains2.empty())
4642     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4643 
4644   // Store the return address to the appropriate stack slot.
4645   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4646 
4647   // Emit callseq_end just before tailcall node.
4648   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4649                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4650   InFlag = Chain.getValue(1);
4651 }
4652 
4653 // Is this global address that of a function that can be called by name? (as
4654 // opposed to something that must hold a descriptor for an indirect call).
4655 static bool isFunctionGlobalAddress(SDValue Callee) {
4656   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4657     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4658         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4659       return false;
4660 
4661     return G->getGlobal()->getValueType()->isFunctionTy();
4662   }
4663 
4664   return false;
4665 }
4666 
4667 static unsigned
4668 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4669             SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4670             bool isPatchPoint, bool hasNest,
4671             SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4672             SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4673             ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
4674   bool isPPC64 = Subtarget.isPPC64();
4675   bool isSVR4ABI = Subtarget.isSVR4ABI();
4676   bool isELFv2ABI = Subtarget.isELFv2ABI();
4677 
4678   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4679   NodeTys.push_back(MVT::Other);   // Returns a chain
4680   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4681 
4682   unsigned CallOpc = PPCISD::CALL;
4683 
4684   bool needIndirectCall = true;
4685   if (!isSVR4ABI || !isPPC64)
4686     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4687       // If this is an absolute destination address, use the munged value.
4688       Callee = SDValue(Dest, 0);
4689       needIndirectCall = false;
4690     }
4691 
4692   // PC-relative references to external symbols should go through $stub, unless
4693   // we're building with the leopard linker or later, which automatically
4694   // synthesizes these stubs.
4695   const TargetMachine &TM = DAG.getTarget();
4696   const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
4697   const GlobalValue *GV = nullptr;
4698   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4699     GV = G->getGlobal();
4700   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4701   bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4702 
4703   if (isFunctionGlobalAddress(Callee)) {
4704     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4705     // A call to a TLS address is actually an indirect call to a
4706     // thread-specific pointer.
4707     unsigned OpFlags = 0;
4708     if (UsePlt)
4709       OpFlags = PPCII::MO_PLT;
4710 
4711     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4712     // every direct call is) turn it into a TargetGlobalAddress /
4713     // TargetExternalSymbol node so that legalize doesn't hack it.
4714     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4715                                         Callee.getValueType(), 0, OpFlags);
4716     needIndirectCall = false;
4717   }
4718 
4719   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4720     unsigned char OpFlags = 0;
4721 
4722     if (UsePlt)
4723       OpFlags = PPCII::MO_PLT;
4724 
4725     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4726                                          OpFlags);
4727     needIndirectCall = false;
4728   }
4729 
4730   if (isPatchPoint) {
4731     // We'll form an invalid direct call when lowering a patchpoint; the full
4732     // sequence for an indirect call is complicated, and many of the
4733     // instructions introduced might have side effects (and, thus, can't be
4734     // removed later). The call itself will be removed as soon as the
4735     // argument/return lowering is complete, so the fact that it has the wrong
4736     // kind of operands should not really matter.
4737     needIndirectCall = false;
4738   }
4739 
4740   if (needIndirectCall) {
4741     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4742     // to do the call, we can't use PPCISD::CALL.
4743     SDValue MTCTROps[] = {Chain, Callee, InFlag};
4744 
4745     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4746       // Function pointers in the 64-bit SVR4 ABI do not point to the function
4747       // entry point, but to the function descriptor (the function entry point
4748       // address is part of the function descriptor though).
4749       // The function descriptor is a three doubleword structure with the
4750       // following fields: function entry point, TOC base address and
4751       // environment pointer.
4752       // Thus for a call through a function pointer, the following actions need
4753       // to be performed:
4754       //   1. Save the TOC of the caller in the TOC save area of its stack
4755       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4756       //   2. Load the address of the function entry point from the function
4757       //      descriptor.
4758       //   3. Load the TOC of the callee from the function descriptor into r2.
4759       //   4. Load the environment pointer from the function descriptor into
4760       //      r11.
4761       //   5. Branch to the function entry point address.
4762       //   6. On return of the callee, the TOC of the caller needs to be
4763       //      restored (this is done in FinishCall()).
4764       //
4765       // The loads are scheduled at the beginning of the call sequence, and the
4766       // register copies are flagged together to ensure that no other
4767       // operations can be scheduled in between. E.g. without flagging the
4768       // copies together, a TOC access in the caller could be scheduled between
4769       // the assignment of the callee TOC and the branch to the callee, which
4770       // results in the TOC access going through the TOC of the callee instead
4771       // of going through the TOC of the caller, which leads to incorrect code.
4772 
4773       // Load the address of the function entry point from the function
4774       // descriptor.
4775       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4776       if (LDChain.getValueType() == MVT::Glue)
4777         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4778 
4779       auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
4780                           ? (MachineMemOperand::MODereferenceable |
4781                              MachineMemOperand::MOInvariant)
4782                           : MachineMemOperand::MONone;
4783 
4784       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4785       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4786                                         /* Alignment = */ 8, MMOFlags);
4787 
4788       // Load environment pointer into r11.
4789       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4790       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4791       SDValue LoadEnvPtr =
4792           DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
4793                       /* Alignment = */ 8, MMOFlags);
4794 
4795       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4796       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4797       SDValue TOCPtr =
4798           DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
4799                       /* Alignment = */ 8, MMOFlags);
4800 
4801       setUsesTOCBasePtr(DAG);
4802       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4803                                         InFlag);
4804       Chain = TOCVal.getValue(0);
4805       InFlag = TOCVal.getValue(1);
4806 
4807       // If the function call has an explicit 'nest' parameter, it takes the
4808       // place of the environment pointer.
4809       if (!hasNest) {
4810         SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4811                                           InFlag);
4812 
4813         Chain = EnvVal.getValue(0);
4814         InFlag = EnvVal.getValue(1);
4815       }
4816 
4817       MTCTROps[0] = Chain;
4818       MTCTROps[1] = LoadFuncPtr;
4819       MTCTROps[2] = InFlag;
4820     }
4821 
4822     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4823                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4824     InFlag = Chain.getValue(1);
4825 
4826     NodeTys.clear();
4827     NodeTys.push_back(MVT::Other);
4828     NodeTys.push_back(MVT::Glue);
4829     Ops.push_back(Chain);
4830     CallOpc = PPCISD::BCTRL;
4831     Callee.setNode(nullptr);
4832     // Add use of X11 (holding environment pointer)
4833     if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
4834       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4835     // Add CTR register as callee so a bctr can be emitted later.
4836     if (isTailCall)
4837       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4838   }
4839 
4840   // If this is a direct call, pass the chain and the callee.
4841   if (Callee.getNode()) {
4842     Ops.push_back(Chain);
4843     Ops.push_back(Callee);
4844   }
4845   // If this is a tail call add stack pointer delta.
4846   if (isTailCall)
4847     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4848 
4849   // Add argument registers to the end of the list so that they are known live
4850   // into the call.
4851   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4852     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4853                                   RegsToPass[i].second.getValueType()));
4854 
4855   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4856   // into the call.
4857   if (isSVR4ABI && isPPC64 && !isPatchPoint) {
4858     setUsesTOCBasePtr(DAG);
4859     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4860   }
4861 
4862   return CallOpc;
4863 }
4864 
4865 SDValue PPCTargetLowering::LowerCallResult(
4866     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4867     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4868     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4869   SmallVector<CCValAssign, 16> RVLocs;
4870   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4871                     *DAG.getContext());
4872   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4873 
4874   // Copy all of the result registers out of their specified physreg.
4875   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4876     CCValAssign &VA = RVLocs[i];
4877     assert(VA.isRegLoc() && "Can only return in registers!");
4878 
4879     SDValue Val = DAG.getCopyFromReg(Chain, dl,
4880                                      VA.getLocReg(), VA.getLocVT(), InFlag);
4881     Chain = Val.getValue(1);
4882     InFlag = Val.getValue(2);
4883 
4884     switch (VA.getLocInfo()) {
4885     default: llvm_unreachable("Unknown loc info!");
4886     case CCValAssign::Full: break;
4887     case CCValAssign::AExt:
4888       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4889       break;
4890     case CCValAssign::ZExt:
4891       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4892                         DAG.getValueType(VA.getValVT()));
4893       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4894       break;
4895     case CCValAssign::SExt:
4896       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4897                         DAG.getValueType(VA.getValVT()));
4898       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4899       break;
4900     }
4901 
4902     InVals.push_back(Val);
4903   }
4904 
4905   return Chain;
4906 }
4907 
4908 SDValue PPCTargetLowering::FinishCall(
4909     CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
4910     bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
4911     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
4912     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
4913     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
4914     SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
4915   std::vector<EVT> NodeTys;
4916   SmallVector<SDValue, 8> Ops;
4917   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4918                                  SPDiff, isTailCall, isPatchPoint, hasNest,
4919                                  RegsToPass, Ops, NodeTys, CS, Subtarget);
4920 
4921   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4922   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4923     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4924 
4925   // When performing tail call optimization the callee pops its arguments off
4926   // the stack. Account for this here so these bytes can be pushed back on in
4927   // PPCFrameLowering::eliminateCallFramePseudoInstr.
4928   int BytesCalleePops =
4929     (CallConv == CallingConv::Fast &&
4930      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
4931 
4932   // Add a register mask operand representing the call-preserved registers.
4933   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4934   const uint32_t *Mask =
4935       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4936   assert(Mask && "Missing call preserved mask for calling convention");
4937   Ops.push_back(DAG.getRegisterMask(Mask));
4938 
4939   if (InFlag.getNode())
4940     Ops.push_back(InFlag);
4941 
4942   // Emit tail call.
4943   if (isTailCall) {
4944     assert(((Callee.getOpcode() == ISD::Register &&
4945              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4946             Callee.getOpcode() == ISD::TargetExternalSymbol ||
4947             Callee.getOpcode() == ISD::TargetGlobalAddress ||
4948             isa<ConstantSDNode>(Callee)) &&
4949     "Expecting an global address, external symbol, absolute value or register");
4950 
4951     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
4952     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4953   }
4954 
4955   // Add a NOP immediately after the branch instruction when using the 64-bit
4956   // SVR4 ABI. At link time, if caller and callee are in a different module and
4957   // thus have a different TOC, the call will be replaced with a call to a stub
4958   // function which saves the current TOC, loads the TOC of the callee and
4959   // branches to the callee. The NOP will be replaced with a load instruction
4960   // which restores the TOC of the caller from the TOC save slot of the current
4961   // stack frame. If caller and callee belong to the same module (and have the
4962   // same TOC), the NOP will remain unchanged.
4963 
4964   MachineFunction &MF = DAG.getMachineFunction();
4965   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4966       !isPatchPoint) {
4967     if (CallOpc == PPCISD::BCTRL) {
4968       // This is a call through a function pointer.
4969       // Restore the caller TOC from the save area into R2.
4970       // See PrepareCall() for more information about calls through function
4971       // pointers in the 64-bit SVR4 ABI.
4972       // We are using a target-specific load with r2 hard coded, because the
4973       // result of a target-independent load would never go directly into r2,
4974       // since r2 is a reserved register (which prevents the register allocator
4975       // from allocating it), resulting in an additional register being
4976       // allocated and an unnecessary move instruction being generated.
4977       CallOpc = PPCISD::BCTRL_LOAD_TOC;
4978 
4979       EVT PtrVT = getPointerTy(DAG.getDataLayout());
4980       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4981       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4982       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
4983       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4984 
4985       // The address needs to go after the chain input but before the flag (or
4986       // any other variadic arguments).
4987       Ops.insert(std::next(Ops.begin()), AddTOC);
4988     } else if (CallOpc == PPCISD::CALL &&
4989       !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
4990       // Otherwise insert NOP for non-local calls.
4991       CallOpc = PPCISD::CALL_NOP;
4992     }
4993   }
4994 
4995   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
4996   InFlag = Chain.getValue(1);
4997 
4998   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4999                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5000                              InFlag, dl);
5001   if (!Ins.empty())
5002     InFlag = Chain.getValue(1);
5003 
5004   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5005                          Ins, dl, DAG, InVals);
5006 }
5007 
5008 SDValue
5009 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5010                              SmallVectorImpl<SDValue> &InVals) const {
5011   SelectionDAG &DAG                     = CLI.DAG;
5012   SDLoc &dl                             = CLI.DL;
5013   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5014   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5015   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5016   SDValue Chain                         = CLI.Chain;
5017   SDValue Callee                        = CLI.Callee;
5018   bool &isTailCall                      = CLI.IsTailCall;
5019   CallingConv::ID CallConv              = CLI.CallConv;
5020   bool isVarArg                         = CLI.IsVarArg;
5021   bool isPatchPoint                     = CLI.IsPatchPoint;
5022   ImmutableCallSite *CS                 = CLI.CS;
5023 
5024   if (isTailCall) {
5025     if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall()))
5026       isTailCall = false;
5027     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5028       isTailCall =
5029         IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5030                                                  isVarArg, Outs, Ins, DAG);
5031     else
5032       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5033                                                      Ins, DAG);
5034     if (isTailCall) {
5035       ++NumTailCalls;
5036       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5037         ++NumSiblingCalls;
5038 
5039       assert(isa<GlobalAddressSDNode>(Callee) &&
5040              "Callee should be an llvm::Function object.");
5041       DEBUG(
5042         const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5043         const unsigned Width = 80 - strlen("TCO caller: ")
5044                                   - strlen(", callee linkage: 0, 0");
5045         dbgs() << "TCO caller: "
5046                << left_justify(DAG.getMachineFunction().getName(), Width)
5047                << ", callee linkage: "
5048                << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
5049       );
5050     }
5051   }
5052 
5053   if (!isTailCall && CS && CS->isMustTailCall())
5054     report_fatal_error("failed to perform tail call elimination on a call "
5055                        "site marked musttail");
5056 
5057   // When long calls (i.e. indirect calls) are always used, calls are always
5058   // made via function pointer. If we have a function name, first translate it
5059   // into a pointer.
5060   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5061       !isTailCall)
5062     Callee = LowerGlobalAddress(Callee, DAG);
5063 
5064   if (Subtarget.isSVR4ABI()) {
5065     if (Subtarget.isPPC64())
5066       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5067                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
5068                               dl, DAG, InVals, CS);
5069     else
5070       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5071                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
5072                               dl, DAG, InVals, CS);
5073   }
5074 
5075   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5076                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
5077                           dl, DAG, InVals, CS);
5078 }
5079 
5080 SDValue PPCTargetLowering::LowerCall_32SVR4(
5081     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5082     bool isTailCall, bool isPatchPoint,
5083     const SmallVectorImpl<ISD::OutputArg> &Outs,
5084     const SmallVectorImpl<SDValue> &OutVals,
5085     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5086     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5087     ImmutableCallSite *CS) const {
5088   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5089   // of the 32-bit SVR4 ABI stack frame layout.
5090 
5091   assert((CallConv == CallingConv::C ||
5092           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5093 
5094   unsigned PtrByteSize = 4;
5095 
5096   MachineFunction &MF = DAG.getMachineFunction();
5097 
5098   // Mark this function as potentially containing a function that contains a
5099   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5100   // and restoring the callers stack pointer in this functions epilog. This is
5101   // done because by tail calling the called function might overwrite the value
5102   // in this function's (MF) stack pointer stack slot 0(SP).
5103   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5104       CallConv == CallingConv::Fast)
5105     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5106 
5107   // Count how many bytes are to be pushed on the stack, including the linkage
5108   // area, parameter list area and the part of the local variable space which
5109   // contains copies of aggregates which are passed by value.
5110 
5111   // Assign locations to all of the outgoing arguments.
5112   SmallVector<CCValAssign, 16> ArgLocs;
5113   PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5114 
5115   // Reserve space for the linkage area on the stack.
5116   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5117                        PtrByteSize);
5118   if (useSoftFloat())
5119     CCInfo.PreAnalyzeCallOperands(Outs);
5120 
5121   if (isVarArg) {
5122     // Handle fixed and variable vector arguments differently.
5123     // Fixed vector arguments go into registers as long as registers are
5124     // available. Variable vector arguments always go into memory.
5125     unsigned NumArgs = Outs.size();
5126 
5127     for (unsigned i = 0; i != NumArgs; ++i) {
5128       MVT ArgVT = Outs[i].VT;
5129       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5130       bool Result;
5131 
5132       if (Outs[i].IsFixed) {
5133         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5134                                CCInfo);
5135       } else {
5136         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5137                                       ArgFlags, CCInfo);
5138       }
5139 
5140       if (Result) {
5141 #ifndef NDEBUG
5142         errs() << "Call operand #" << i << " has unhandled type "
5143              << EVT(ArgVT).getEVTString() << "\n";
5144 #endif
5145         llvm_unreachable(nullptr);
5146       }
5147     }
5148   } else {
5149     // All arguments are treated the same.
5150     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5151   }
5152   CCInfo.clearWasPPCF128();
5153 
5154   // Assign locations to all of the outgoing aggregate by value arguments.
5155   SmallVector<CCValAssign, 16> ByValArgLocs;
5156   CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5157 
5158   // Reserve stack space for the allocations in CCInfo.
5159   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5160 
5161   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5162 
5163   // Size of the linkage area, parameter list area and the part of the local
5164   // space variable where copies of aggregates which are passed by value are
5165   // stored.
5166   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5167 
5168   // Calculate by how many bytes the stack has to be adjusted in case of tail
5169   // call optimization.
5170   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5171 
5172   // Adjust the stack pointer for the new arguments...
5173   // These operations are automatically eliminated by the prolog/epilog pass
5174   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5175   SDValue CallSeqStart = Chain;
5176 
5177   // Load the return address and frame pointer so it can be moved somewhere else
5178   // later.
5179   SDValue LROp, FPOp;
5180   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5181 
5182   // Set up a copy of the stack pointer for use loading and storing any
5183   // arguments that may not fit in the registers available for argument
5184   // passing.
5185   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5186 
5187   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5188   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5189   SmallVector<SDValue, 8> MemOpChains;
5190 
5191   bool seenFloatArg = false;
5192   // Walk the register/memloc assignments, inserting copies/loads.
5193   for (unsigned i = 0, j = 0, e = ArgLocs.size();
5194        i != e;
5195        ++i) {
5196     CCValAssign &VA = ArgLocs[i];
5197     SDValue Arg = OutVals[i];
5198     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5199 
5200     if (Flags.isByVal()) {
5201       // Argument is an aggregate which is passed by value, thus we need to
5202       // create a copy of it in the local variable space of the current stack
5203       // frame (which is the stack frame of the caller) and pass the address of
5204       // this copy to the callee.
5205       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5206       CCValAssign &ByValVA = ByValArgLocs[j++];
5207       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5208 
5209       // Memory reserved in the local variable space of the callers stack frame.
5210       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5211 
5212       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5213       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5214                            StackPtr, PtrOff);
5215 
5216       // Create a copy of the argument in the local area of the current
5217       // stack frame.
5218       SDValue MemcpyCall =
5219         CreateCopyOfByValArgument(Arg, PtrOff,
5220                                   CallSeqStart.getNode()->getOperand(0),
5221                                   Flags, DAG, dl);
5222 
5223       // This must go outside the CALLSEQ_START..END.
5224       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5225                                                      SDLoc(MemcpyCall));
5226       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5227                              NewCallSeqStart.getNode());
5228       Chain = CallSeqStart = NewCallSeqStart;
5229 
5230       // Pass the address of the aggregate copy on the stack either in a
5231       // physical register or in the parameter list area of the current stack
5232       // frame to the callee.
5233       Arg = PtrOff;
5234     }
5235 
5236     if (VA.isRegLoc()) {
5237       if (Arg.getValueType() == MVT::i1)
5238         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
5239 
5240       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5241       // Put argument in a physical register.
5242       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5243     } else {
5244       // Put argument in the parameter list area of the current stack frame.
5245       assert(VA.isMemLoc());
5246       unsigned LocMemOffset = VA.getLocMemOffset();
5247 
5248       if (!isTailCall) {
5249         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5250         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5251                              StackPtr, PtrOff);
5252 
5253         MemOpChains.push_back(
5254             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5255       } else {
5256         // Calculate and remember argument location.
5257         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5258                                  TailCallArguments);
5259       }
5260     }
5261   }
5262 
5263   if (!MemOpChains.empty())
5264     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5265 
5266   // Build a sequence of copy-to-reg nodes chained together with token chain
5267   // and flag operands which copy the outgoing args into the appropriate regs.
5268   SDValue InFlag;
5269   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5270     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5271                              RegsToPass[i].second, InFlag);
5272     InFlag = Chain.getValue(1);
5273   }
5274 
5275   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5276   // registers.
5277   if (isVarArg) {
5278     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5279     SDValue Ops[] = { Chain, InFlag };
5280 
5281     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5282                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5283 
5284     InFlag = Chain.getValue(1);
5285   }
5286 
5287   if (isTailCall)
5288     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5289                     TailCallArguments);
5290 
5291   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5292                     /* unused except on PPC64 ELFv1 */ false, DAG,
5293                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5294                     NumBytes, Ins, InVals, CS);
5295 }
5296 
5297 // Copy an argument into memory, being careful to do this outside the
5298 // call sequence for the call to which the argument belongs.
5299 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5300     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5301     SelectionDAG &DAG, const SDLoc &dl) const {
5302   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5303                         CallSeqStart.getNode()->getOperand(0),
5304                         Flags, DAG, dl);
5305   // The MEMCPY must go outside the CALLSEQ_START..END.
5306   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5307   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5308                                                  SDLoc(MemcpyCall));
5309   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5310                          NewCallSeqStart.getNode());
5311   return NewCallSeqStart;
5312 }
5313 
5314 SDValue PPCTargetLowering::LowerCall_64SVR4(
5315     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5316     bool isTailCall, bool isPatchPoint,
5317     const SmallVectorImpl<ISD::OutputArg> &Outs,
5318     const SmallVectorImpl<SDValue> &OutVals,
5319     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5320     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5321     ImmutableCallSite *CS) const {
5322   bool isELFv2ABI = Subtarget.isELFv2ABI();
5323   bool isLittleEndian = Subtarget.isLittleEndian();
5324   unsigned NumOps = Outs.size();
5325   bool hasNest = false;
5326   bool IsSibCall = false;
5327 
5328   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5329   unsigned PtrByteSize = 8;
5330 
5331   MachineFunction &MF = DAG.getMachineFunction();
5332 
5333   if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5334     IsSibCall = true;
5335 
5336   // Mark this function as potentially containing a function that contains a
5337   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5338   // and restoring the callers stack pointer in this functions epilog. This is
5339   // done because by tail calling the called function might overwrite the value
5340   // in this function's (MF) stack pointer stack slot 0(SP).
5341   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5342       CallConv == CallingConv::Fast)
5343     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5344 
5345   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5346          "fastcc not supported on varargs functions");
5347 
5348   // Count how many bytes are to be pushed on the stack, including the linkage
5349   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5350   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5351   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5352   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5353   unsigned NumBytes = LinkageSize;
5354   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5355   unsigned &QFPR_idx = FPR_idx;
5356 
5357   static const MCPhysReg GPR[] = {
5358     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5359     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5360   };
5361   static const MCPhysReg VR[] = {
5362     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5363     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5364   };
5365 
5366   const unsigned NumGPRs = array_lengthof(GPR);
5367   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5368   const unsigned NumVRs  = array_lengthof(VR);
5369   const unsigned NumQFPRs = NumFPRs;
5370 
5371   // On ELFv2, we can avoid allocating the parameter area if all the arguments
5372   // can be passed to the callee in registers.
5373   // For the fast calling convention, there is another check below.
5374   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5375   bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
5376   if (!HasParameterArea) {
5377     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5378     unsigned AvailableFPRs = NumFPRs;
5379     unsigned AvailableVRs = NumVRs;
5380     unsigned NumBytesTmp = NumBytes;
5381     for (unsigned i = 0; i != NumOps; ++i) {
5382       if (Outs[i].Flags.isNest()) continue;
5383       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5384                                 PtrByteSize, LinkageSize, ParamAreaSize,
5385                                 NumBytesTmp, AvailableFPRs, AvailableVRs,
5386                                 Subtarget.hasQPX()))
5387         HasParameterArea = true;
5388     }
5389   }
5390 
5391   // When using the fast calling convention, we don't provide backing for
5392   // arguments that will be in registers.
5393   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5394 
5395   // Add up all the space actually used.
5396   for (unsigned i = 0; i != NumOps; ++i) {
5397     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5398     EVT ArgVT = Outs[i].VT;
5399     EVT OrigVT = Outs[i].ArgVT;
5400 
5401     if (Flags.isNest())
5402       continue;
5403 
5404     if (CallConv == CallingConv::Fast) {
5405       if (Flags.isByVal())
5406         NumGPRsUsed += (Flags.getByValSize()+7)/8;
5407       else
5408         switch (ArgVT.getSimpleVT().SimpleTy) {
5409         default: llvm_unreachable("Unexpected ValueType for argument!");
5410         case MVT::i1:
5411         case MVT::i32:
5412         case MVT::i64:
5413           if (++NumGPRsUsed <= NumGPRs)
5414             continue;
5415           break;
5416         case MVT::v4i32:
5417         case MVT::v8i16:
5418         case MVT::v16i8:
5419         case MVT::v2f64:
5420         case MVT::v2i64:
5421         case MVT::v1i128:
5422           if (++NumVRsUsed <= NumVRs)
5423             continue;
5424           break;
5425         case MVT::v4f32:
5426           // When using QPX, this is handled like a FP register, otherwise, it
5427           // is an Altivec register.
5428           if (Subtarget.hasQPX()) {
5429             if (++NumFPRsUsed <= NumFPRs)
5430               continue;
5431           } else {
5432             if (++NumVRsUsed <= NumVRs)
5433               continue;
5434           }
5435           break;
5436         case MVT::f32:
5437         case MVT::f64:
5438         case MVT::v4f64: // QPX
5439         case MVT::v4i1:  // QPX
5440           if (++NumFPRsUsed <= NumFPRs)
5441             continue;
5442           break;
5443         }
5444     }
5445 
5446     /* Respect alignment of argument on the stack.  */
5447     unsigned Align =
5448       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5449     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5450 
5451     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5452     if (Flags.isInConsecutiveRegsLast())
5453       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5454   }
5455 
5456   unsigned NumBytesActuallyUsed = NumBytes;
5457 
5458   // In the old ELFv1 ABI,
5459   // the prolog code of the callee may store up to 8 GPR argument registers to
5460   // the stack, allowing va_start to index over them in memory if its varargs.
5461   // Because we cannot tell if this is needed on the caller side, we have to
5462   // conservatively assume that it is needed.  As such, make sure we have at
5463   // least enough stack space for the caller to store the 8 GPRs.
5464   // In the ELFv2 ABI, we allocate the parameter area iff a callee
5465   // really requires memory operands, e.g. a vararg function.
5466   if (HasParameterArea)
5467     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5468   else
5469     NumBytes = LinkageSize;
5470 
5471   // Tail call needs the stack to be aligned.
5472   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5473       CallConv == CallingConv::Fast)
5474     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5475 
5476   int SPDiff = 0;
5477 
5478   // Calculate by how many bytes the stack has to be adjusted in case of tail
5479   // call optimization.
5480   if (!IsSibCall)
5481     SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5482 
5483   // To protect arguments on the stack from being clobbered in a tail call,
5484   // force all the loads to happen before doing any other lowering.
5485   if (isTailCall)
5486     Chain = DAG.getStackArgumentTokenFactor(Chain);
5487 
5488   // Adjust the stack pointer for the new arguments...
5489   // These operations are automatically eliminated by the prolog/epilog pass
5490   if (!IsSibCall)
5491     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5492   SDValue CallSeqStart = Chain;
5493 
5494   // Load the return address and frame pointer so it can be move somewhere else
5495   // later.
5496   SDValue LROp, FPOp;
5497   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5498 
5499   // Set up a copy of the stack pointer for use loading and storing any
5500   // arguments that may not fit in the registers available for argument
5501   // passing.
5502   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5503 
5504   // Figure out which arguments are going to go in registers, and which in
5505   // memory.  Also, if this is a vararg function, floating point operations
5506   // must be stored to our stack, and loaded into integer regs as well, if
5507   // any integer regs are available for argument passing.
5508   unsigned ArgOffset = LinkageSize;
5509 
5510   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5511   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5512 
5513   SmallVector<SDValue, 8> MemOpChains;
5514   for (unsigned i = 0; i != NumOps; ++i) {
5515     SDValue Arg = OutVals[i];
5516     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5517     EVT ArgVT = Outs[i].VT;
5518     EVT OrigVT = Outs[i].ArgVT;
5519 
5520     // PtrOff will be used to store the current argument to the stack if a
5521     // register cannot be found for it.
5522     SDValue PtrOff;
5523 
5524     // We re-align the argument offset for each argument, except when using the
5525     // fast calling convention, when we need to make sure we do that only when
5526     // we'll actually use a stack slot.
5527     auto ComputePtrOff = [&]() {
5528       /* Respect alignment of argument on the stack.  */
5529       unsigned Align =
5530         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5531       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5532 
5533       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5534 
5535       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5536     };
5537 
5538     if (CallConv != CallingConv::Fast) {
5539       ComputePtrOff();
5540 
5541       /* Compute GPR index associated with argument offset.  */
5542       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5543       GPR_idx = std::min(GPR_idx, NumGPRs);
5544     }
5545 
5546     // Promote integers to 64-bit values.
5547     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5548       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5549       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5550       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5551     }
5552 
5553     // FIXME memcpy is used way more than necessary.  Correctness first.
5554     // Note: "by value" is code for passing a structure by value, not
5555     // basic types.
5556     if (Flags.isByVal()) {
5557       // Note: Size includes alignment padding, so
5558       //   struct x { short a; char b; }
5559       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5560       // These are the proper values we need for right-justifying the
5561       // aggregate in a parameter register.
5562       unsigned Size = Flags.getByValSize();
5563 
5564       // An empty aggregate parameter takes up no storage and no
5565       // registers.
5566       if (Size == 0)
5567         continue;
5568 
5569       if (CallConv == CallingConv::Fast)
5570         ComputePtrOff();
5571 
5572       // All aggregates smaller than 8 bytes must be passed right-justified.
5573       if (Size==1 || Size==2 || Size==4) {
5574         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5575         if (GPR_idx != NumGPRs) {
5576           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5577                                         MachinePointerInfo(), VT);
5578           MemOpChains.push_back(Load.getValue(1));
5579           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5580 
5581           ArgOffset += PtrByteSize;
5582           continue;
5583         }
5584       }
5585 
5586       if (GPR_idx == NumGPRs && Size < 8) {
5587         SDValue AddPtr = PtrOff;
5588         if (!isLittleEndian) {
5589           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5590                                           PtrOff.getValueType());
5591           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5592         }
5593         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5594                                                           CallSeqStart,
5595                                                           Flags, DAG, dl);
5596         ArgOffset += PtrByteSize;
5597         continue;
5598       }
5599       // Copy entire object into memory.  There are cases where gcc-generated
5600       // code assumes it is there, even if it could be put entirely into
5601       // registers.  (This is not what the doc says.)
5602 
5603       // FIXME: The above statement is likely due to a misunderstanding of the
5604       // documents.  All arguments must be copied into the parameter area BY
5605       // THE CALLEE in the event that the callee takes the address of any
5606       // formal argument.  That has not yet been implemented.  However, it is
5607       // reasonable to use the stack area as a staging area for the register
5608       // load.
5609 
5610       // Skip this for small aggregates, as we will use the same slot for a
5611       // right-justified copy, below.
5612       if (Size >= 8)
5613         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5614                                                           CallSeqStart,
5615                                                           Flags, DAG, dl);
5616 
5617       // When a register is available, pass a small aggregate right-justified.
5618       if (Size < 8 && GPR_idx != NumGPRs) {
5619         // The easiest way to get this right-justified in a register
5620         // is to copy the structure into the rightmost portion of a
5621         // local variable slot, then load the whole slot into the
5622         // register.
5623         // FIXME: The memcpy seems to produce pretty awful code for
5624         // small aggregates, particularly for packed ones.
5625         // FIXME: It would be preferable to use the slot in the
5626         // parameter save area instead of a new local variable.
5627         SDValue AddPtr = PtrOff;
5628         if (!isLittleEndian) {
5629           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5630           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5631         }
5632         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5633                                                           CallSeqStart,
5634                                                           Flags, DAG, dl);
5635 
5636         // Load the slot into the register.
5637         SDValue Load =
5638             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5639         MemOpChains.push_back(Load.getValue(1));
5640         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5641 
5642         // Done with this argument.
5643         ArgOffset += PtrByteSize;
5644         continue;
5645       }
5646 
5647       // For aggregates larger than PtrByteSize, copy the pieces of the
5648       // object that fit into registers from the parameter save area.
5649       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5650         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5651         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5652         if (GPR_idx != NumGPRs) {
5653           SDValue Load =
5654               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5655           MemOpChains.push_back(Load.getValue(1));
5656           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5657           ArgOffset += PtrByteSize;
5658         } else {
5659           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5660           break;
5661         }
5662       }
5663       continue;
5664     }
5665 
5666     switch (Arg.getSimpleValueType().SimpleTy) {
5667     default: llvm_unreachable("Unexpected ValueType for argument!");
5668     case MVT::i1:
5669     case MVT::i32:
5670     case MVT::i64:
5671       if (Flags.isNest()) {
5672         // The 'nest' parameter, if any, is passed in R11.
5673         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5674         hasNest = true;
5675         break;
5676       }
5677 
5678       // These can be scalar arguments or elements of an integer array type
5679       // passed directly.  Clang may use those instead of "byval" aggregate
5680       // types to avoid forcing arguments to memory unnecessarily.
5681       if (GPR_idx != NumGPRs) {
5682         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5683       } else {
5684         if (CallConv == CallingConv::Fast)
5685           ComputePtrOff();
5686 
5687         assert(HasParameterArea &&
5688                "Parameter area must exist to pass an argument in memory.");
5689         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5690                          true, isTailCall, false, MemOpChains,
5691                          TailCallArguments, dl);
5692         if (CallConv == CallingConv::Fast)
5693           ArgOffset += PtrByteSize;
5694       }
5695       if (CallConv != CallingConv::Fast)
5696         ArgOffset += PtrByteSize;
5697       break;
5698     case MVT::f32:
5699     case MVT::f64: {
5700       // These can be scalar arguments or elements of a float array type
5701       // passed directly.  The latter are used to implement ELFv2 homogenous
5702       // float aggregates.
5703 
5704       // Named arguments go into FPRs first, and once they overflow, the
5705       // remaining arguments go into GPRs and then the parameter save area.
5706       // Unnamed arguments for vararg functions always go to GPRs and
5707       // then the parameter save area.  For now, put all arguments to vararg
5708       // routines always in both locations (FPR *and* GPR or stack slot).
5709       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5710       bool NeededLoad = false;
5711 
5712       // First load the argument into the next available FPR.
5713       if (FPR_idx != NumFPRs)
5714         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5715 
5716       // Next, load the argument into GPR or stack slot if needed.
5717       if (!NeedGPROrStack)
5718         ;
5719       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
5720         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5721         // once we support fp <-> gpr moves.
5722 
5723         // In the non-vararg case, this can only ever happen in the
5724         // presence of f32 array types, since otherwise we never run
5725         // out of FPRs before running out of GPRs.
5726         SDValue ArgVal;
5727 
5728         // Double values are always passed in a single GPR.
5729         if (Arg.getValueType() != MVT::f32) {
5730           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
5731 
5732         // Non-array float values are extended and passed in a GPR.
5733         } else if (!Flags.isInConsecutiveRegs()) {
5734           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5735           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5736 
5737         // If we have an array of floats, we collect every odd element
5738         // together with its predecessor into one GPR.
5739         } else if (ArgOffset % PtrByteSize != 0) {
5740           SDValue Lo, Hi;
5741           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5742           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5743           if (!isLittleEndian)
5744             std::swap(Lo, Hi);
5745           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5746 
5747         // The final element, if even, goes into the first half of a GPR.
5748         } else if (Flags.isInConsecutiveRegsLast()) {
5749           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5750           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5751           if (!isLittleEndian)
5752             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5753                                  DAG.getConstant(32, dl, MVT::i32));
5754 
5755         // Non-final even elements are skipped; they will be handled
5756         // together the with subsequent argument on the next go-around.
5757         } else
5758           ArgVal = SDValue();
5759 
5760         if (ArgVal.getNode())
5761           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5762       } else {
5763         if (CallConv == CallingConv::Fast)
5764           ComputePtrOff();
5765 
5766         // Single-precision floating-point values are mapped to the
5767         // second (rightmost) word of the stack doubleword.
5768         if (Arg.getValueType() == MVT::f32 &&
5769             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
5770           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5771           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5772         }
5773 
5774         assert(HasParameterArea &&
5775                "Parameter area must exist to pass an argument in memory.");
5776         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5777                          true, isTailCall, false, MemOpChains,
5778                          TailCallArguments, dl);
5779 
5780         NeededLoad = true;
5781       }
5782       // When passing an array of floats, the array occupies consecutive
5783       // space in the argument area; only round up to the next doubleword
5784       // at the end of the array.  Otherwise, each float takes 8 bytes.
5785       if (CallConv != CallingConv::Fast || NeededLoad) {
5786         ArgOffset += (Arg.getValueType() == MVT::f32 &&
5787                       Flags.isInConsecutiveRegs()) ? 4 : 8;
5788         if (Flags.isInConsecutiveRegsLast())
5789           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5790       }
5791       break;
5792     }
5793     case MVT::v4f32:
5794     case MVT::v4i32:
5795     case MVT::v8i16:
5796     case MVT::v16i8:
5797     case MVT::v2f64:
5798     case MVT::v2i64:
5799     case MVT::v1i128:
5800       if (!Subtarget.hasQPX()) {
5801       // These can be scalar arguments or elements of a vector array type
5802       // passed directly.  The latter are used to implement ELFv2 homogenous
5803       // vector aggregates.
5804 
5805       // For a varargs call, named arguments go into VRs or on the stack as
5806       // usual; unnamed arguments always go to the stack or the corresponding
5807       // GPRs when within range.  For now, we always put the value in both
5808       // locations (or even all three).
5809       if (isVarArg) {
5810         assert(HasParameterArea &&
5811                "Parameter area must exist if we have a varargs call.");
5812         // We could elide this store in the case where the object fits
5813         // entirely in R registers.  Maybe later.
5814         SDValue Store =
5815             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5816         MemOpChains.push_back(Store);
5817         if (VR_idx != NumVRs) {
5818           SDValue Load =
5819               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
5820           MemOpChains.push_back(Load.getValue(1));
5821           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5822         }
5823         ArgOffset += 16;
5824         for (unsigned i=0; i<16; i+=PtrByteSize) {
5825           if (GPR_idx == NumGPRs)
5826             break;
5827           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5828                                    DAG.getConstant(i, dl, PtrVT));
5829           SDValue Load =
5830               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5831           MemOpChains.push_back(Load.getValue(1));
5832           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5833         }
5834         break;
5835       }
5836 
5837       // Non-varargs Altivec params go into VRs or on the stack.
5838       if (VR_idx != NumVRs) {
5839         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5840       } else {
5841         if (CallConv == CallingConv::Fast)
5842           ComputePtrOff();
5843 
5844         assert(HasParameterArea &&
5845                "Parameter area must exist to pass an argument in memory.");
5846         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5847                          true, isTailCall, true, MemOpChains,
5848                          TailCallArguments, dl);
5849         if (CallConv == CallingConv::Fast)
5850           ArgOffset += 16;
5851       }
5852 
5853       if (CallConv != CallingConv::Fast)
5854         ArgOffset += 16;
5855       break;
5856       } // not QPX
5857 
5858       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5859              "Invalid QPX parameter type");
5860 
5861       /* fall through */
5862     case MVT::v4f64:
5863     case MVT::v4i1: {
5864       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5865       if (isVarArg) {
5866         assert(HasParameterArea &&
5867                "Parameter area must exist if we have a varargs call.");
5868         // We could elide this store in the case where the object fits
5869         // entirely in R registers.  Maybe later.
5870         SDValue Store =
5871             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5872         MemOpChains.push_back(Store);
5873         if (QFPR_idx != NumQFPRs) {
5874           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
5875                                      PtrOff, MachinePointerInfo());
5876           MemOpChains.push_back(Load.getValue(1));
5877           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5878         }
5879         ArgOffset += (IsF32 ? 16 : 32);
5880         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5881           if (GPR_idx == NumGPRs)
5882             break;
5883           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5884                                    DAG.getConstant(i, dl, PtrVT));
5885           SDValue Load =
5886               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5887           MemOpChains.push_back(Load.getValue(1));
5888           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5889         }
5890         break;
5891       }
5892 
5893       // Non-varargs QPX params go into registers or on the stack.
5894       if (QFPR_idx != NumQFPRs) {
5895         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5896       } else {
5897         if (CallConv == CallingConv::Fast)
5898           ComputePtrOff();
5899 
5900         assert(HasParameterArea &&
5901                "Parameter area must exist to pass an argument in memory.");
5902         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5903                          true, isTailCall, true, MemOpChains,
5904                          TailCallArguments, dl);
5905         if (CallConv == CallingConv::Fast)
5906           ArgOffset += (IsF32 ? 16 : 32);
5907       }
5908 
5909       if (CallConv != CallingConv::Fast)
5910         ArgOffset += (IsF32 ? 16 : 32);
5911       break;
5912       }
5913     }
5914   }
5915 
5916   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
5917          "mismatch in size of parameter area");
5918   (void)NumBytesActuallyUsed;
5919 
5920   if (!MemOpChains.empty())
5921     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5922 
5923   // Check if this is an indirect call (MTCTR/BCTRL).
5924   // See PrepareCall() for more information about calls through function
5925   // pointers in the 64-bit SVR4 ABI.
5926   if (!isTailCall && !isPatchPoint &&
5927       !isFunctionGlobalAddress(Callee) &&
5928       !isa<ExternalSymbolSDNode>(Callee)) {
5929     // Load r2 into a virtual register and store it to the TOC save area.
5930     setUsesTOCBasePtr(DAG);
5931     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5932     // TOC save area offset.
5933     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5934     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5935     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5936     Chain = DAG.getStore(
5937         Val.getValue(1), dl, Val, AddPtr,
5938         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
5939     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5940     // This does not mean the MTCTR instruction must use R12; it's easier
5941     // to model this as an extra parameter, so do that.
5942     if (isELFv2ABI && !isPatchPoint)
5943       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5944   }
5945 
5946   // Build a sequence of copy-to-reg nodes chained together with token chain
5947   // and flag operands which copy the outgoing args into the appropriate regs.
5948   SDValue InFlag;
5949   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5950     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5951                              RegsToPass[i].second, InFlag);
5952     InFlag = Chain.getValue(1);
5953   }
5954 
5955   if (isTailCall && !IsSibCall)
5956     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5957                     TailCallArguments);
5958 
5959   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
5960                     DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
5961                     SPDiff, NumBytes, Ins, InVals, CS);
5962 }
5963 
5964 SDValue PPCTargetLowering::LowerCall_Darwin(
5965     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5966     bool isTailCall, bool isPatchPoint,
5967     const SmallVectorImpl<ISD::OutputArg> &Outs,
5968     const SmallVectorImpl<SDValue> &OutVals,
5969     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5970     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5971     ImmutableCallSite *CS) const {
5972   unsigned NumOps = Outs.size();
5973 
5974   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5975   bool isPPC64 = PtrVT == MVT::i64;
5976   unsigned PtrByteSize = isPPC64 ? 8 : 4;
5977 
5978   MachineFunction &MF = DAG.getMachineFunction();
5979 
5980   // Mark this function as potentially containing a function that contains a
5981   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5982   // and restoring the callers stack pointer in this functions epilog. This is
5983   // done because by tail calling the called function might overwrite the value
5984   // in this function's (MF) stack pointer stack slot 0(SP).
5985   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5986       CallConv == CallingConv::Fast)
5987     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5988 
5989   // Count how many bytes are to be pushed on the stack, including the linkage
5990   // area, and parameter passing area.  We start with 24/48 bytes, which is
5991   // prereserved space for [SP][CR][LR][3 x unused].
5992   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5993   unsigned NumBytes = LinkageSize;
5994 
5995   // Add up all the space actually used.
5996   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
5997   // they all go in registers, but we must reserve stack space for them for
5998   // possible use by the caller.  In varargs or 64-bit calls, parameters are
5999   // assigned stack space in order, with padding so Altivec parameters are
6000   // 16-byte aligned.
6001   unsigned nAltivecParamsAtEnd = 0;
6002   for (unsigned i = 0; i != NumOps; ++i) {
6003     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6004     EVT ArgVT = Outs[i].VT;
6005     // Varargs Altivec parameters are padded to a 16 byte boundary.
6006     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6007         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6008         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6009       if (!isVarArg && !isPPC64) {
6010         // Non-varargs Altivec parameters go after all the non-Altivec
6011         // parameters; handle those later so we know how much padding we need.
6012         nAltivecParamsAtEnd++;
6013         continue;
6014       }
6015       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6016       NumBytes = ((NumBytes+15)/16)*16;
6017     }
6018     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6019   }
6020 
6021   // Allow for Altivec parameters at the end, if needed.
6022   if (nAltivecParamsAtEnd) {
6023     NumBytes = ((NumBytes+15)/16)*16;
6024     NumBytes += 16*nAltivecParamsAtEnd;
6025   }
6026 
6027   // The prolog code of the callee may store up to 8 GPR argument registers to
6028   // the stack, allowing va_start to index over them in memory if its varargs.
6029   // Because we cannot tell if this is needed on the caller side, we have to
6030   // conservatively assume that it is needed.  As such, make sure we have at
6031   // least enough stack space for the caller to store the 8 GPRs.
6032   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6033 
6034   // Tail call needs the stack to be aligned.
6035   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6036       CallConv == CallingConv::Fast)
6037     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6038 
6039   // Calculate by how many bytes the stack has to be adjusted in case of tail
6040   // call optimization.
6041   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
6042 
6043   // To protect arguments on the stack from being clobbered in a tail call,
6044   // force all the loads to happen before doing any other lowering.
6045   if (isTailCall)
6046     Chain = DAG.getStackArgumentTokenFactor(Chain);
6047 
6048   // Adjust the stack pointer for the new arguments...
6049   // These operations are automatically eliminated by the prolog/epilog pass
6050   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6051   SDValue CallSeqStart = Chain;
6052 
6053   // Load the return address and frame pointer so it can be move somewhere else
6054   // later.
6055   SDValue LROp, FPOp;
6056   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6057 
6058   // Set up a copy of the stack pointer for use loading and storing any
6059   // arguments that may not fit in the registers available for argument
6060   // passing.
6061   SDValue StackPtr;
6062   if (isPPC64)
6063     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6064   else
6065     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6066 
6067   // Figure out which arguments are going to go in registers, and which in
6068   // memory.  Also, if this is a vararg function, floating point operations
6069   // must be stored to our stack, and loaded into integer regs as well, if
6070   // any integer regs are available for argument passing.
6071   unsigned ArgOffset = LinkageSize;
6072   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6073 
6074   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6075     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6076     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6077   };
6078   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6079     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6080     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6081   };
6082   static const MCPhysReg VR[] = {
6083     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6084     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6085   };
6086   const unsigned NumGPRs = array_lengthof(GPR_32);
6087   const unsigned NumFPRs = 13;
6088   const unsigned NumVRs  = array_lengthof(VR);
6089 
6090   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6091 
6092   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6093   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6094 
6095   SmallVector<SDValue, 8> MemOpChains;
6096   for (unsigned i = 0; i != NumOps; ++i) {
6097     SDValue Arg = OutVals[i];
6098     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6099 
6100     // PtrOff will be used to store the current argument to the stack if a
6101     // register cannot be found for it.
6102     SDValue PtrOff;
6103 
6104     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6105 
6106     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6107 
6108     // On PPC64, promote integers to 64-bit values.
6109     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6110       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6111       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6112       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6113     }
6114 
6115     // FIXME memcpy is used way more than necessary.  Correctness first.
6116     // Note: "by value" is code for passing a structure by value, not
6117     // basic types.
6118     if (Flags.isByVal()) {
6119       unsigned Size = Flags.getByValSize();
6120       // Very small objects are passed right-justified.  Everything else is
6121       // passed left-justified.
6122       if (Size==1 || Size==2) {
6123         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6124         if (GPR_idx != NumGPRs) {
6125           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6126                                         MachinePointerInfo(), VT);
6127           MemOpChains.push_back(Load.getValue(1));
6128           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6129 
6130           ArgOffset += PtrByteSize;
6131         } else {
6132           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6133                                           PtrOff.getValueType());
6134           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6135           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6136                                                             CallSeqStart,
6137                                                             Flags, DAG, dl);
6138           ArgOffset += PtrByteSize;
6139         }
6140         continue;
6141       }
6142       // Copy entire object into memory.  There are cases where gcc-generated
6143       // code assumes it is there, even if it could be put entirely into
6144       // registers.  (This is not what the doc says.)
6145       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6146                                                         CallSeqStart,
6147                                                         Flags, DAG, dl);
6148 
6149       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6150       // copy the pieces of the object that fit into registers from the
6151       // parameter save area.
6152       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6153         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6154         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6155         if (GPR_idx != NumGPRs) {
6156           SDValue Load =
6157               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6158           MemOpChains.push_back(Load.getValue(1));
6159           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6160           ArgOffset += PtrByteSize;
6161         } else {
6162           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6163           break;
6164         }
6165       }
6166       continue;
6167     }
6168 
6169     switch (Arg.getSimpleValueType().SimpleTy) {
6170     default: llvm_unreachable("Unexpected ValueType for argument!");
6171     case MVT::i1:
6172     case MVT::i32:
6173     case MVT::i64:
6174       if (GPR_idx != NumGPRs) {
6175         if (Arg.getValueType() == MVT::i1)
6176           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6177 
6178         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6179       } else {
6180         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6181                          isPPC64, isTailCall, false, MemOpChains,
6182                          TailCallArguments, dl);
6183       }
6184       ArgOffset += PtrByteSize;
6185       break;
6186     case MVT::f32:
6187     case MVT::f64:
6188       if (FPR_idx != NumFPRs) {
6189         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6190 
6191         if (isVarArg) {
6192           SDValue Store =
6193               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6194           MemOpChains.push_back(Store);
6195 
6196           // Float varargs are always shadowed in available integer registers
6197           if (GPR_idx != NumGPRs) {
6198             SDValue Load =
6199                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6200             MemOpChains.push_back(Load.getValue(1));
6201             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6202           }
6203           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6204             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6205             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6206             SDValue Load =
6207                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6208             MemOpChains.push_back(Load.getValue(1));
6209             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6210           }
6211         } else {
6212           // If we have any FPRs remaining, we may also have GPRs remaining.
6213           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6214           // GPRs.
6215           if (GPR_idx != NumGPRs)
6216             ++GPR_idx;
6217           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6218               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6219             ++GPR_idx;
6220         }
6221       } else
6222         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6223                          isPPC64, isTailCall, false, MemOpChains,
6224                          TailCallArguments, dl);
6225       if (isPPC64)
6226         ArgOffset += 8;
6227       else
6228         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6229       break;
6230     case MVT::v4f32:
6231     case MVT::v4i32:
6232     case MVT::v8i16:
6233     case MVT::v16i8:
6234       if (isVarArg) {
6235         // These go aligned on the stack, or in the corresponding R registers
6236         // when within range.  The Darwin PPC ABI doc claims they also go in
6237         // V registers; in fact gcc does this only for arguments that are
6238         // prototyped, not for those that match the ...  We do it for all
6239         // arguments, seems to work.
6240         while (ArgOffset % 16 !=0) {
6241           ArgOffset += PtrByteSize;
6242           if (GPR_idx != NumGPRs)
6243             GPR_idx++;
6244         }
6245         // We could elide this store in the case where the object fits
6246         // entirely in R registers.  Maybe later.
6247         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6248                              DAG.getConstant(ArgOffset, dl, PtrVT));
6249         SDValue Store =
6250             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6251         MemOpChains.push_back(Store);
6252         if (VR_idx != NumVRs) {
6253           SDValue Load =
6254               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6255           MemOpChains.push_back(Load.getValue(1));
6256           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6257         }
6258         ArgOffset += 16;
6259         for (unsigned i=0; i<16; i+=PtrByteSize) {
6260           if (GPR_idx == NumGPRs)
6261             break;
6262           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6263                                    DAG.getConstant(i, dl, PtrVT));
6264           SDValue Load =
6265               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6266           MemOpChains.push_back(Load.getValue(1));
6267           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6268         }
6269         break;
6270       }
6271 
6272       // Non-varargs Altivec params generally go in registers, but have
6273       // stack space allocated at the end.
6274       if (VR_idx != NumVRs) {
6275         // Doesn't have GPR space allocated.
6276         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6277       } else if (nAltivecParamsAtEnd==0) {
6278         // We are emitting Altivec params in order.
6279         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6280                          isPPC64, isTailCall, true, MemOpChains,
6281                          TailCallArguments, dl);
6282         ArgOffset += 16;
6283       }
6284       break;
6285     }
6286   }
6287   // If all Altivec parameters fit in registers, as they usually do,
6288   // they get stack space following the non-Altivec parameters.  We
6289   // don't track this here because nobody below needs it.
6290   // If there are more Altivec parameters than fit in registers emit
6291   // the stores here.
6292   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
6293     unsigned j = 0;
6294     // Offset is aligned; skip 1st 12 params which go in V registers.
6295     ArgOffset = ((ArgOffset+15)/16)*16;
6296     ArgOffset += 12*16;
6297     for (unsigned i = 0; i != NumOps; ++i) {
6298       SDValue Arg = OutVals[i];
6299       EVT ArgType = Outs[i].VT;
6300       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6301           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6302         if (++j > NumVRs) {
6303           SDValue PtrOff;
6304           // We are emitting Altivec params in order.
6305           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6306                            isPPC64, isTailCall, true, MemOpChains,
6307                            TailCallArguments, dl);
6308           ArgOffset += 16;
6309         }
6310       }
6311     }
6312   }
6313 
6314   if (!MemOpChains.empty())
6315     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6316 
6317   // On Darwin, R12 must contain the address of an indirect callee.  This does
6318   // not mean the MTCTR instruction must use R12; it's easier to model this as
6319   // an extra parameter, so do that.
6320   if (!isTailCall &&
6321       !isFunctionGlobalAddress(Callee) &&
6322       !isa<ExternalSymbolSDNode>(Callee) &&
6323       !isBLACompatibleAddress(Callee, DAG))
6324     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6325                                                    PPC::R12), Callee));
6326 
6327   // Build a sequence of copy-to-reg nodes chained together with token chain
6328   // and flag operands which copy the outgoing args into the appropriate regs.
6329   SDValue InFlag;
6330   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6331     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6332                              RegsToPass[i].second, InFlag);
6333     InFlag = Chain.getValue(1);
6334   }
6335 
6336   if (isTailCall)
6337     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6338                     TailCallArguments);
6339 
6340   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6341                     /* unused except on PPC64 ELFv1 */ false, DAG,
6342                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6343                     NumBytes, Ins, InVals, CS);
6344 }
6345 
6346 bool
6347 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6348                                   MachineFunction &MF, bool isVarArg,
6349                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
6350                                   LLVMContext &Context) const {
6351   SmallVector<CCValAssign, 16> RVLocs;
6352   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6353   return CCInfo.CheckReturn(Outs, RetCC_PPC);
6354 }
6355 
6356 SDValue
6357 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6358                                bool isVarArg,
6359                                const SmallVectorImpl<ISD::OutputArg> &Outs,
6360                                const SmallVectorImpl<SDValue> &OutVals,
6361                                const SDLoc &dl, SelectionDAG &DAG) const {
6362   SmallVector<CCValAssign, 16> RVLocs;
6363   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6364                  *DAG.getContext());
6365   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
6366 
6367   SDValue Flag;
6368   SmallVector<SDValue, 4> RetOps(1, Chain);
6369 
6370   // Copy the result values into the output registers.
6371   for (unsigned i = 0; i != RVLocs.size(); ++i) {
6372     CCValAssign &VA = RVLocs[i];
6373     assert(VA.isRegLoc() && "Can only return in registers!");
6374 
6375     SDValue Arg = OutVals[i];
6376 
6377     switch (VA.getLocInfo()) {
6378     default: llvm_unreachable("Unknown loc info!");
6379     case CCValAssign::Full: break;
6380     case CCValAssign::AExt:
6381       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6382       break;
6383     case CCValAssign::ZExt:
6384       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6385       break;
6386     case CCValAssign::SExt:
6387       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6388       break;
6389     }
6390 
6391     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6392     Flag = Chain.getValue(1);
6393     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6394   }
6395 
6396   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6397   const MCPhysReg *I =
6398     TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6399   if (I) {
6400     for (; *I; ++I) {
6401 
6402       if (PPC::G8RCRegClass.contains(*I))
6403         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6404       else if (PPC::F8RCRegClass.contains(*I))
6405         RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6406       else if (PPC::CRRCRegClass.contains(*I))
6407         RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6408       else if (PPC::VRRCRegClass.contains(*I))
6409         RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6410       else
6411         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6412     }
6413   }
6414 
6415   RetOps[0] = Chain;  // Update chain.
6416 
6417   // Add the flag if we have it.
6418   if (Flag.getNode())
6419     RetOps.push_back(Flag);
6420 
6421   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6422 }
6423 
6424 SDValue
6425 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6426                                                 SelectionDAG &DAG) const {
6427   SDLoc dl(Op);
6428 
6429   // Get the corect type for integers.
6430   EVT IntVT = Op.getValueType();
6431 
6432   // Get the inputs.
6433   SDValue Chain = Op.getOperand(0);
6434   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6435   // Build a DYNAREAOFFSET node.
6436   SDValue Ops[2] = {Chain, FPSIdx};
6437   SDVTList VTs = DAG.getVTList(IntVT);
6438   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6439 }
6440 
6441 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6442                                              SelectionDAG &DAG) const {
6443   // When we pop the dynamic allocation we need to restore the SP link.
6444   SDLoc dl(Op);
6445 
6446   // Get the corect type for pointers.
6447   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6448 
6449   // Construct the stack pointer operand.
6450   bool isPPC64 = Subtarget.isPPC64();
6451   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6452   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6453 
6454   // Get the operands for the STACKRESTORE.
6455   SDValue Chain = Op.getOperand(0);
6456   SDValue SaveSP = Op.getOperand(1);
6457 
6458   // Load the old link SP.
6459   SDValue LoadLinkSP =
6460       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6461 
6462   // Restore the stack pointer.
6463   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6464 
6465   // Store the old link SP.
6466   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6467 }
6468 
6469 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6470   MachineFunction &MF = DAG.getMachineFunction();
6471   bool isPPC64 = Subtarget.isPPC64();
6472   EVT PtrVT = getPointerTy(MF.getDataLayout());
6473 
6474   // Get current frame pointer save index.  The users of this index will be
6475   // primarily DYNALLOC instructions.
6476   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6477   int RASI = FI->getReturnAddrSaveIndex();
6478 
6479   // If the frame pointer save index hasn't been defined yet.
6480   if (!RASI) {
6481     // Find out what the fix offset of the frame pointer save area.
6482     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6483     // Allocate the frame index for frame pointer save area.
6484     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6485     // Save the result.
6486     FI->setReturnAddrSaveIndex(RASI);
6487   }
6488   return DAG.getFrameIndex(RASI, PtrVT);
6489 }
6490 
6491 SDValue
6492 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6493   MachineFunction &MF = DAG.getMachineFunction();
6494   bool isPPC64 = Subtarget.isPPC64();
6495   EVT PtrVT = getPointerTy(MF.getDataLayout());
6496 
6497   // Get current frame pointer save index.  The users of this index will be
6498   // primarily DYNALLOC instructions.
6499   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6500   int FPSI = FI->getFramePointerSaveIndex();
6501 
6502   // If the frame pointer save index hasn't been defined yet.
6503   if (!FPSI) {
6504     // Find out what the fix offset of the frame pointer save area.
6505     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6506     // Allocate the frame index for frame pointer save area.
6507     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
6508     // Save the result.
6509     FI->setFramePointerSaveIndex(FPSI);
6510   }
6511   return DAG.getFrameIndex(FPSI, PtrVT);
6512 }
6513 
6514 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6515                                                    SelectionDAG &DAG) const {
6516   // Get the inputs.
6517   SDValue Chain = Op.getOperand(0);
6518   SDValue Size  = Op.getOperand(1);
6519   SDLoc dl(Op);
6520 
6521   // Get the corect type for pointers.
6522   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6523   // Negate the size.
6524   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
6525                                 DAG.getConstant(0, dl, PtrVT), Size);
6526   // Construct a node for the frame pointer save index.
6527   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6528   // Build a DYNALLOC node.
6529   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
6530   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
6531   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
6532 }
6533 
6534 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
6535                                                      SelectionDAG &DAG) const {
6536   MachineFunction &MF = DAG.getMachineFunction();
6537 
6538   bool isPPC64 = Subtarget.isPPC64();
6539   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6540 
6541   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
6542   return DAG.getFrameIndex(FI, PtrVT);
6543 }
6544 
6545 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
6546                                                SelectionDAG &DAG) const {
6547   SDLoc DL(Op);
6548   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
6549                      DAG.getVTList(MVT::i32, MVT::Other),
6550                      Op.getOperand(0), Op.getOperand(1));
6551 }
6552 
6553 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
6554                                                 SelectionDAG &DAG) const {
6555   SDLoc DL(Op);
6556   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
6557                      Op.getOperand(0), Op.getOperand(1));
6558 }
6559 
6560 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
6561   if (Op.getValueType().isVector())
6562     return LowerVectorLoad(Op, DAG);
6563 
6564   assert(Op.getValueType() == MVT::i1 &&
6565          "Custom lowering only for i1 loads");
6566 
6567   // First, load 8 bits into 32 bits, then truncate to 1 bit.
6568 
6569   SDLoc dl(Op);
6570   LoadSDNode *LD = cast<LoadSDNode>(Op);
6571 
6572   SDValue Chain = LD->getChain();
6573   SDValue BasePtr = LD->getBasePtr();
6574   MachineMemOperand *MMO = LD->getMemOperand();
6575 
6576   SDValue NewLD =
6577       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
6578                      BasePtr, MVT::i8, MMO);
6579   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
6580 
6581   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
6582   return DAG.getMergeValues(Ops, dl);
6583 }
6584 
6585 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
6586   if (Op.getOperand(1).getValueType().isVector())
6587     return LowerVectorStore(Op, DAG);
6588 
6589   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
6590          "Custom lowering only for i1 stores");
6591 
6592   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
6593 
6594   SDLoc dl(Op);
6595   StoreSDNode *ST = cast<StoreSDNode>(Op);
6596 
6597   SDValue Chain = ST->getChain();
6598   SDValue BasePtr = ST->getBasePtr();
6599   SDValue Value = ST->getValue();
6600   MachineMemOperand *MMO = ST->getMemOperand();
6601 
6602   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
6603                       Value);
6604   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
6605 }
6606 
6607 // FIXME: Remove this once the ANDI glue bug is fixed:
6608 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
6609   assert(Op.getValueType() == MVT::i1 &&
6610          "Custom lowering only for i1 results");
6611 
6612   SDLoc DL(Op);
6613   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
6614                      Op.getOperand(0));
6615 }
6616 
6617 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
6618 /// possible.
6619 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
6620   // Not FP? Not a fsel.
6621   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
6622       !Op.getOperand(2).getValueType().isFloatingPoint())
6623     return Op;
6624 
6625   // We might be able to do better than this under some circumstances, but in
6626   // general, fsel-based lowering of select is a finite-math-only optimization.
6627   // For more information, see section F.3 of the 2.06 ISA specification.
6628   if (!DAG.getTarget().Options.NoInfsFPMath ||
6629       !DAG.getTarget().Options.NoNaNsFPMath)
6630     return Op;
6631   // TODO: Propagate flags from the select rather than global settings.
6632   SDNodeFlags Flags;
6633   Flags.setNoInfs(true);
6634   Flags.setNoNaNs(true);
6635 
6636   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
6637 
6638   EVT ResVT = Op.getValueType();
6639   EVT CmpVT = Op.getOperand(0).getValueType();
6640   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6641   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
6642   SDLoc dl(Op);
6643 
6644   // If the RHS of the comparison is a 0.0, we don't need to do the
6645   // subtraction at all.
6646   SDValue Sel1;
6647   if (isFloatingPointZero(RHS))
6648     switch (CC) {
6649     default: break;       // SETUO etc aren't handled by fsel.
6650     case ISD::SETNE:
6651       std::swap(TV, FV);
6652       LLVM_FALLTHROUGH;
6653     case ISD::SETEQ:
6654       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6655         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6656       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6657       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6658         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6659       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6660                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6661     case ISD::SETULT:
6662     case ISD::SETLT:
6663       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6664       LLVM_FALLTHROUGH;
6665     case ISD::SETOGE:
6666     case ISD::SETGE:
6667       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6668         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6669       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6670     case ISD::SETUGT:
6671     case ISD::SETGT:
6672       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6673       LLVM_FALLTHROUGH;
6674     case ISD::SETOLE:
6675     case ISD::SETLE:
6676       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6677         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6678       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6679                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6680     }
6681 
6682   SDValue Cmp;
6683   switch (CC) {
6684   default: break;       // SETUO etc aren't handled by fsel.
6685   case ISD::SETNE:
6686     std::swap(TV, FV);
6687     LLVM_FALLTHROUGH;
6688   case ISD::SETEQ:
6689     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6690     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6691       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6692     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6693     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6694       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6695     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6696                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6697   case ISD::SETULT:
6698   case ISD::SETLT:
6699     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6700     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6701       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6702     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6703   case ISD::SETOGE:
6704   case ISD::SETGE:
6705     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6706     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6707       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6708     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6709   case ISD::SETUGT:
6710   case ISD::SETGT:
6711     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6712     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6713       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6714     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6715   case ISD::SETOLE:
6716   case ISD::SETLE:
6717     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6718     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6719       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6720     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6721   }
6722   return Op;
6723 }
6724 
6725 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
6726                                                SelectionDAG &DAG,
6727                                                const SDLoc &dl) const {
6728   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6729   SDValue Src = Op.getOperand(0);
6730   if (Src.getValueType() == MVT::f32)
6731     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6732 
6733   SDValue Tmp;
6734   switch (Op.getSimpleValueType().SimpleTy) {
6735   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6736   case MVT::i32:
6737     Tmp = DAG.getNode(
6738         Op.getOpcode() == ISD::FP_TO_SINT
6739             ? PPCISD::FCTIWZ
6740             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6741         dl, MVT::f64, Src);
6742     break;
6743   case MVT::i64:
6744     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6745            "i64 FP_TO_UINT is supported only with FPCVT");
6746     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6747                                                         PPCISD::FCTIDUZ,
6748                       dl, MVT::f64, Src);
6749     break;
6750   }
6751 
6752   // Convert the FP value to an int value through memory.
6753   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
6754     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
6755   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
6756   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
6757   MachinePointerInfo MPI =
6758       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6759 
6760   // Emit a store to the stack slot.
6761   SDValue Chain;
6762   if (i32Stack) {
6763     MachineFunction &MF = DAG.getMachineFunction();
6764     MachineMemOperand *MMO =
6765       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
6766     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
6767     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
6768               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
6769   } else
6770     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
6771 
6772   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
6773   // add in a bias on big endian.
6774   if (Op.getValueType() == MVT::i32 && !i32Stack) {
6775     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
6776                         DAG.getConstant(4, dl, FIPtr.getValueType()));
6777     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
6778   }
6779 
6780   RLI.Chain = Chain;
6781   RLI.Ptr = FIPtr;
6782   RLI.MPI = MPI;
6783 }
6784 
6785 /// \brief Custom lowers floating point to integer conversions to use
6786 /// the direct move instructions available in ISA 2.07 to avoid the
6787 /// need for load/store combinations.
6788 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6789                                                     SelectionDAG &DAG,
6790                                                     const SDLoc &dl) const {
6791   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6792   SDValue Src = Op.getOperand(0);
6793 
6794   if (Src.getValueType() == MVT::f32)
6795     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6796 
6797   SDValue Tmp;
6798   switch (Op.getSimpleValueType().SimpleTy) {
6799   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6800   case MVT::i32:
6801     Tmp = DAG.getNode(
6802         Op.getOpcode() == ISD::FP_TO_SINT
6803             ? PPCISD::FCTIWZ
6804             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6805         dl, MVT::f64, Src);
6806     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6807     break;
6808   case MVT::i64:
6809     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6810            "i64 FP_TO_UINT is supported only with FPCVT");
6811     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6812                                                         PPCISD::FCTIDUZ,
6813                       dl, MVT::f64, Src);
6814     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6815     break;
6816   }
6817   return Tmp;
6818 }
6819 
6820 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6821                                           const SDLoc &dl) const {
6822   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
6823     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6824 
6825   ReuseLoadInfo RLI;
6826   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6827 
6828   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
6829                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
6830 }
6831 
6832 // We're trying to insert a regular store, S, and then a load, L. If the
6833 // incoming value, O, is a load, we might just be able to have our load use the
6834 // address used by O. However, we don't know if anything else will store to
6835 // that address before we can load from it. To prevent this situation, we need
6836 // to insert our load, L, into the chain as a peer of O. To do this, we give L
6837 // the same chain operand as O, we create a token factor from the chain results
6838 // of O and L, and we replace all uses of O's chain result with that token
6839 // factor (see spliceIntoChain below for this last part).
6840 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6841                                             ReuseLoadInfo &RLI,
6842                                             SelectionDAG &DAG,
6843                                             ISD::LoadExtType ET) const {
6844   SDLoc dl(Op);
6845   if (ET == ISD::NON_EXTLOAD &&
6846       (Op.getOpcode() == ISD::FP_TO_UINT ||
6847        Op.getOpcode() == ISD::FP_TO_SINT) &&
6848       isOperationLegalOrCustom(Op.getOpcode(),
6849                                Op.getOperand(0).getValueType())) {
6850 
6851     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6852     return true;
6853   }
6854 
6855   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6856   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
6857       LD->isNonTemporal())
6858     return false;
6859   if (LD->getMemoryVT() != MemVT)
6860     return false;
6861 
6862   RLI.Ptr = LD->getBasePtr();
6863   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
6864     assert(LD->getAddressingMode() == ISD::PRE_INC &&
6865            "Non-pre-inc AM on PPC?");
6866     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6867                           LD->getOffset());
6868   }
6869 
6870   RLI.Chain = LD->getChain();
6871   RLI.MPI = LD->getPointerInfo();
6872   RLI.IsDereferenceable = LD->isDereferenceable();
6873   RLI.IsInvariant = LD->isInvariant();
6874   RLI.Alignment = LD->getAlignment();
6875   RLI.AAInfo = LD->getAAInfo();
6876   RLI.Ranges = LD->getRanges();
6877 
6878   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
6879   return true;
6880 }
6881 
6882 // Given the head of the old chain, ResChain, insert a token factor containing
6883 // it and NewResChain, and make users of ResChain now be users of that token
6884 // factor.
6885 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
6886 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6887                                         SDValue NewResChain,
6888                                         SelectionDAG &DAG) const {
6889   if (!ResChain)
6890     return;
6891 
6892   SDLoc dl(NewResChain);
6893 
6894   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
6895                            NewResChain, DAG.getUNDEF(MVT::Other));
6896   assert(TF.getNode() != NewResChain.getNode() &&
6897          "A new TF really is required here");
6898 
6899   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6900   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6901 }
6902 
6903 /// \brief Analyze profitability of direct move
6904 /// prefer float load to int load plus direct move
6905 /// when there is no integer use of int load
6906 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
6907   SDNode *Origin = Op.getOperand(0).getNode();
6908   if (Origin->getOpcode() != ISD::LOAD)
6909     return true;
6910 
6911   // If there is no LXSIBZX/LXSIHZX, like Power8,
6912   // prefer direct move if the memory size is 1 or 2 bytes.
6913   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
6914   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
6915     return true;
6916 
6917   for (SDNode::use_iterator UI = Origin->use_begin(),
6918                             UE = Origin->use_end();
6919        UI != UE; ++UI) {
6920 
6921     // Only look at the users of the loaded value.
6922     if (UI.getUse().get().getResNo() != 0)
6923       continue;
6924 
6925     if (UI->getOpcode() != ISD::SINT_TO_FP &&
6926         UI->getOpcode() != ISD::UINT_TO_FP)
6927       return true;
6928   }
6929 
6930   return false;
6931 }
6932 
6933 /// \brief Custom lowers integer to floating point conversions to use
6934 /// the direct move instructions available in ISA 2.07 to avoid the
6935 /// need for load/store combinations.
6936 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6937                                                     SelectionDAG &DAG,
6938                                                     const SDLoc &dl) const {
6939   assert((Op.getValueType() == MVT::f32 ||
6940           Op.getValueType() == MVT::f64) &&
6941          "Invalid floating point type as target of conversion");
6942   assert(Subtarget.hasFPCVT() &&
6943          "Int to FP conversions with direct moves require FPCVT");
6944   SDValue FP;
6945   SDValue Src = Op.getOperand(0);
6946   bool SinglePrec = Op.getValueType() == MVT::f32;
6947   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6948   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6949   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
6950                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
6951 
6952   if (WordInt) {
6953     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
6954                      dl, MVT::f64, Src);
6955     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6956   }
6957   else {
6958     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6959     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6960   }
6961 
6962   return FP;
6963 }
6964 
6965 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6966                                           SelectionDAG &DAG) const {
6967   SDLoc dl(Op);
6968 
6969   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6970     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6971       return SDValue();
6972 
6973     SDValue Value = Op.getOperand(0);
6974     // The values are now known to be -1 (false) or 1 (true). To convert this
6975     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6976     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6977     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6978 
6979     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
6980 
6981     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6982 
6983     if (Op.getValueType() != MVT::v4f64)
6984       Value = DAG.getNode(ISD::FP_ROUND, dl,
6985                           Op.getValueType(), Value,
6986                           DAG.getIntPtrConstant(1, dl));
6987     return Value;
6988   }
6989 
6990   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6991   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6992     return SDValue();
6993 
6994   if (Op.getOperand(0).getValueType() == MVT::i1)
6995     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
6996                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
6997                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
6998 
6999   // If we have direct moves, we can do all the conversion, skip the store/load
7000   // however, without FPCVT we can't do most conversions.
7001   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
7002       Subtarget.isPPC64() && Subtarget.hasFPCVT())
7003     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
7004 
7005   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
7006          "UINT_TO_FP is supported only with FPCVT");
7007 
7008   // If we have FCFIDS, then use it when converting to single-precision.
7009   // Otherwise, convert to double-precision and then round.
7010   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7011                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
7012                                                             : PPCISD::FCFIDS)
7013                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
7014                                                             : PPCISD::FCFID);
7015   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7016                   ? MVT::f32
7017                   : MVT::f64;
7018 
7019   if (Op.getOperand(0).getValueType() == MVT::i64) {
7020     SDValue SINT = Op.getOperand(0);
7021     // When converting to single-precision, we actually need to convert
7022     // to double-precision first and then round to single-precision.
7023     // To avoid double-rounding effects during that operation, we have
7024     // to prepare the input operand.  Bits that might be truncated when
7025     // converting to double-precision are replaced by a bit that won't
7026     // be lost at this stage, but is below the single-precision rounding
7027     // position.
7028     //
7029     // However, if -enable-unsafe-fp-math is in effect, accept double
7030     // rounding to avoid the extra overhead.
7031     if (Op.getValueType() == MVT::f32 &&
7032         !Subtarget.hasFPCVT() &&
7033         !DAG.getTarget().Options.UnsafeFPMath) {
7034 
7035       // Twiddle input to make sure the low 11 bits are zero.  (If this
7036       // is the case, we are guaranteed the value will fit into the 53 bit
7037       // mantissa of an IEEE double-precision value without rounding.)
7038       // If any of those low 11 bits were not zero originally, make sure
7039       // bit 12 (value 2048) is set instead, so that the final rounding
7040       // to single-precision gets the correct result.
7041       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7042                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
7043       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
7044                           Round, DAG.getConstant(2047, dl, MVT::i64));
7045       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
7046       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7047                           Round, DAG.getConstant(-2048, dl, MVT::i64));
7048 
7049       // However, we cannot use that value unconditionally: if the magnitude
7050       // of the input value is small, the bit-twiddling we did above might
7051       // end up visibly changing the output.  Fortunately, in that case, we
7052       // don't need to twiddle bits since the original input will convert
7053       // exactly to double-precision floating-point already.  Therefore,
7054       // construct a conditional to use the original value if the top 11
7055       // bits are all sign-bit copies, and use the rounded value computed
7056       // above otherwise.
7057       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
7058                                  SINT, DAG.getConstant(53, dl, MVT::i32));
7059       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
7060                          Cond, DAG.getConstant(1, dl, MVT::i64));
7061       Cond = DAG.getSetCC(dl, MVT::i32,
7062                           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
7063 
7064       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
7065     }
7066 
7067     ReuseLoadInfo RLI;
7068     SDValue Bits;
7069 
7070     MachineFunction &MF = DAG.getMachineFunction();
7071     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
7072       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7073                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7074       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7075     } else if (Subtarget.hasLFIWAX() &&
7076                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
7077       MachineMemOperand *MMO =
7078         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7079                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7080       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7081       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
7082                                      DAG.getVTList(MVT::f64, MVT::Other),
7083                                      Ops, MVT::i32, MMO);
7084       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7085     } else if (Subtarget.hasFPCVT() &&
7086                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
7087       MachineMemOperand *MMO =
7088         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7089                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7090       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7091       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
7092                                      DAG.getVTList(MVT::f64, MVT::Other),
7093                                      Ops, MVT::i32, MMO);
7094       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7095     } else if (((Subtarget.hasLFIWAX() &&
7096                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
7097                 (Subtarget.hasFPCVT() &&
7098                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
7099                SINT.getOperand(0).getValueType() == MVT::i32) {
7100       MachineFrameInfo &MFI = MF.getFrameInfo();
7101       EVT PtrVT = getPointerTy(DAG.getDataLayout());
7102 
7103       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7104       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7105 
7106       SDValue Store =
7107           DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
7108                        MachinePointerInfo::getFixedStack(
7109                            DAG.getMachineFunction(), FrameIdx));
7110 
7111       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7112              "Expected an i32 store");
7113 
7114       RLI.Ptr = FIdx;
7115       RLI.Chain = Store;
7116       RLI.MPI =
7117           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7118       RLI.Alignment = 4;
7119 
7120       MachineMemOperand *MMO =
7121         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7122                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7123       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7124       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
7125                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
7126                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
7127                                      Ops, MVT::i32, MMO);
7128     } else
7129       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
7130 
7131     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
7132 
7133     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7134       FP = DAG.getNode(ISD::FP_ROUND, dl,
7135                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
7136     return FP;
7137   }
7138 
7139   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
7140          "Unhandled INT_TO_FP type in custom expander!");
7141   // Since we only generate this in 64-bit mode, we can take advantage of
7142   // 64-bit registers.  In particular, sign extend the input value into the
7143   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7144   // then lfd it and fcfid it.
7145   MachineFunction &MF = DAG.getMachineFunction();
7146   MachineFrameInfo &MFI = MF.getFrameInfo();
7147   EVT PtrVT = getPointerTy(MF.getDataLayout());
7148 
7149   SDValue Ld;
7150   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
7151     ReuseLoadInfo RLI;
7152     bool ReusingLoad;
7153     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
7154                                             DAG))) {
7155       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7156       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7157 
7158       SDValue Store =
7159           DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7160                        MachinePointerInfo::getFixedStack(
7161                            DAG.getMachineFunction(), FrameIdx));
7162 
7163       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7164              "Expected an i32 store");
7165 
7166       RLI.Ptr = FIdx;
7167       RLI.Chain = Store;
7168       RLI.MPI =
7169           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7170       RLI.Alignment = 4;
7171     }
7172 
7173     MachineMemOperand *MMO =
7174       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7175                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7176     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7177     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
7178                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
7179                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
7180                                  Ops, MVT::i32, MMO);
7181     if (ReusingLoad)
7182       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
7183   } else {
7184     assert(Subtarget.isPPC64() &&
7185            "i32->FP without LFIWAX supported only on PPC64");
7186 
7187     int FrameIdx = MFI.CreateStackObject(8, 8, false);
7188     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7189 
7190     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
7191                                 Op.getOperand(0));
7192 
7193     // STD the extended value into the stack slot.
7194     SDValue Store = DAG.getStore(
7195         DAG.getEntryNode(), dl, Ext64, FIdx,
7196         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7197 
7198     // Load the value as a double.
7199     Ld = DAG.getLoad(
7200         MVT::f64, dl, Store, FIdx,
7201         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7202   }
7203 
7204   // FCFID it and return it.
7205   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
7206   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7207     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
7208                      DAG.getIntPtrConstant(0, dl));
7209   return FP;
7210 }
7211 
7212 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
7213                                             SelectionDAG &DAG) const {
7214   SDLoc dl(Op);
7215   /*
7216    The rounding mode is in bits 30:31 of FPSR, and has the following
7217    settings:
7218      00 Round to nearest
7219      01 Round to 0
7220      10 Round to +inf
7221      11 Round to -inf
7222 
7223   FLT_ROUNDS, on the other hand, expects the following:
7224     -1 Undefined
7225      0 Round to 0
7226      1 Round to nearest
7227      2 Round to +inf
7228      3 Round to -inf
7229 
7230   To perform the conversion, we do:
7231     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7232   */
7233 
7234   MachineFunction &MF = DAG.getMachineFunction();
7235   EVT VT = Op.getValueType();
7236   EVT PtrVT = getPointerTy(MF.getDataLayout());
7237 
7238   // Save FP Control Word to register
7239   EVT NodeTys[] = {
7240     MVT::f64,    // return register
7241     MVT::Glue    // unused in this context
7242   };
7243   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
7244 
7245   // Save FP register to stack slot
7246   int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
7247   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
7248   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
7249                                MachinePointerInfo());
7250 
7251   // Load FP Control Word from low 32 bits of stack slot.
7252   SDValue Four = DAG.getConstant(4, dl, PtrVT);
7253   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
7254   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
7255 
7256   // Transform as necessary
7257   SDValue CWD1 =
7258     DAG.getNode(ISD::AND, dl, MVT::i32,
7259                 CWD, DAG.getConstant(3, dl, MVT::i32));
7260   SDValue CWD2 =
7261     DAG.getNode(ISD::SRL, dl, MVT::i32,
7262                 DAG.getNode(ISD::AND, dl, MVT::i32,
7263                             DAG.getNode(ISD::XOR, dl, MVT::i32,
7264                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
7265                             DAG.getConstant(3, dl, MVT::i32)),
7266                 DAG.getConstant(1, dl, MVT::i32));
7267 
7268   SDValue RetVal =
7269     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
7270 
7271   return DAG.getNode((VT.getSizeInBits() < 16 ?
7272                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
7273 }
7274 
7275 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7276   EVT VT = Op.getValueType();
7277   unsigned BitWidth = VT.getSizeInBits();
7278   SDLoc dl(Op);
7279   assert(Op.getNumOperands() == 3 &&
7280          VT == Op.getOperand(1).getValueType() &&
7281          "Unexpected SHL!");
7282 
7283   // Expand into a bunch of logical ops.  Note that these ops
7284   // depend on the PPC behavior for oversized shift amounts.
7285   SDValue Lo = Op.getOperand(0);
7286   SDValue Hi = Op.getOperand(1);
7287   SDValue Amt = Op.getOperand(2);
7288   EVT AmtVT = Amt.getValueType();
7289 
7290   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7291                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7292   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
7293   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
7294   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
7295   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7296                              DAG.getConstant(-BitWidth, dl, AmtVT));
7297   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7298   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7299   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7300   SDValue OutOps[] = { OutLo, OutHi };
7301   return DAG.getMergeValues(OutOps, dl);
7302 }
7303 
7304 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7305   EVT VT = Op.getValueType();
7306   SDLoc dl(Op);
7307   unsigned BitWidth = VT.getSizeInBits();
7308   assert(Op.getNumOperands() == 3 &&
7309          VT == Op.getOperand(1).getValueType() &&
7310          "Unexpected SRL!");
7311 
7312   // Expand into a bunch of logical ops.  Note that these ops
7313   // depend on the PPC behavior for oversized shift amounts.
7314   SDValue Lo = Op.getOperand(0);
7315   SDValue Hi = Op.getOperand(1);
7316   SDValue Amt = Op.getOperand(2);
7317   EVT AmtVT = Amt.getValueType();
7318 
7319   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7320                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7321   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7322   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7323   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7324   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7325                              DAG.getConstant(-BitWidth, dl, AmtVT));
7326   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
7327   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7328   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
7329   SDValue OutOps[] = { OutLo, OutHi };
7330   return DAG.getMergeValues(OutOps, dl);
7331 }
7332 
7333 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
7334   SDLoc dl(Op);
7335   EVT VT = Op.getValueType();
7336   unsigned BitWidth = VT.getSizeInBits();
7337   assert(Op.getNumOperands() == 3 &&
7338          VT == Op.getOperand(1).getValueType() &&
7339          "Unexpected SRA!");
7340 
7341   // Expand into a bunch of logical ops, followed by a select_cc.
7342   SDValue Lo = Op.getOperand(0);
7343   SDValue Hi = Op.getOperand(1);
7344   SDValue Amt = Op.getOperand(2);
7345   EVT AmtVT = Amt.getValueType();
7346 
7347   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7348                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7349   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7350   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7351   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7352   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7353                              DAG.getConstant(-BitWidth, dl, AmtVT));
7354   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
7355   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
7356   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
7357                                   Tmp4, Tmp6, ISD::SETLE);
7358   SDValue OutOps[] = { OutLo, OutHi };
7359   return DAG.getMergeValues(OutOps, dl);
7360 }
7361 
7362 //===----------------------------------------------------------------------===//
7363 // Vector related lowering.
7364 //
7365 
7366 /// BuildSplatI - Build a canonical splati of Val with an element size of
7367 /// SplatSize.  Cast the result to VT.
7368 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
7369                            SelectionDAG &DAG, const SDLoc &dl) {
7370   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
7371 
7372   static const MVT VTys[] = { // canonical VT to use for each size.
7373     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
7374   };
7375 
7376   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
7377 
7378   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
7379   if (Val == -1)
7380     SplatSize = 1;
7381 
7382   EVT CanonicalVT = VTys[SplatSize-1];
7383 
7384   // Build a canonical splat for this value.
7385   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
7386 }
7387 
7388 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
7389 /// specified intrinsic ID.
7390 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
7391                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
7392   if (DestVT == MVT::Other) DestVT = Op.getValueType();
7393   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7394                      DAG.getConstant(IID, dl, MVT::i32), Op);
7395 }
7396 
7397 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
7398 /// specified intrinsic ID.
7399 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
7400                                 SelectionDAG &DAG, const SDLoc &dl,
7401                                 EVT DestVT = MVT::Other) {
7402   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
7403   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7404                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
7405 }
7406 
7407 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
7408 /// specified intrinsic ID.
7409 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
7410                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
7411                                 EVT DestVT = MVT::Other) {
7412   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
7413   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7414                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
7415 }
7416 
7417 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
7418 /// amount.  The result has the specified value type.
7419 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
7420                            SelectionDAG &DAG, const SDLoc &dl) {
7421   // Force LHS/RHS to be the right type.
7422   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
7423   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
7424 
7425   int Ops[16];
7426   for (unsigned i = 0; i != 16; ++i)
7427     Ops[i] = i + Amt;
7428   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
7429   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7430 }
7431 
7432 /// Do we have an efficient pattern in a .td file for this node?
7433 ///
7434 /// \param V - pointer to the BuildVectorSDNode being matched
7435 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
7436 ///
7437 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
7438 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
7439 /// the opposite is true (expansion is beneficial) are:
7440 /// - The node builds a vector out of integers that are not 32 or 64-bits
7441 /// - The node builds a vector out of constants
7442 /// - The node is a "load-and-splat"
7443 /// In all other cases, we will choose to keep the BUILD_VECTOR.
7444 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
7445                                             bool HasDirectMove) {
7446   EVT VecVT = V->getValueType(0);
7447   bool RightType = VecVT == MVT::v2f64 || VecVT == MVT::v4f32 ||
7448     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
7449   if (!RightType)
7450     return false;
7451 
7452   bool IsSplat = true;
7453   bool IsLoad = false;
7454   SDValue Op0 = V->getOperand(0);
7455 
7456   // This function is called in a block that confirms the node is not a constant
7457   // splat. So a constant BUILD_VECTOR here means the vector is built out of
7458   // different constants.
7459   if (V->isConstant())
7460     return false;
7461   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
7462     if (V->getOperand(i).isUndef())
7463       return false;
7464     // We want to expand nodes that represent load-and-splat even if the
7465     // loaded value is a floating point truncation or conversion to int.
7466     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
7467         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
7468          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7469         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
7470          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7471         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
7472          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
7473       IsLoad = true;
7474     // If the operands are different or the input is not a load and has more
7475     // uses than just this BV node, then it isn't a splat.
7476     if (V->getOperand(i) != Op0 ||
7477         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
7478       IsSplat = false;
7479   }
7480   return !(IsSplat && IsLoad);
7481 }
7482 
7483 // If this is a case we can't handle, return null and let the default
7484 // expansion code take care of it.  If we CAN select this case, and if it
7485 // selects to a single instruction, return Op.  Otherwise, if we can codegen
7486 // this case more efficiently than a constant pool load, lower it to the
7487 // sequence of ops that should be used.
7488 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
7489                                              SelectionDAG &DAG) const {
7490   SDLoc dl(Op);
7491   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7492   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
7493 
7494   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
7495     // We first build an i32 vector, load it into a QPX register,
7496     // then convert it to a floating-point vector and compare it
7497     // to a zero vector to get the boolean result.
7498     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7499     int FrameIdx = MFI.CreateStackObject(16, 16, false);
7500     MachinePointerInfo PtrInfo =
7501         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7502     EVT PtrVT = getPointerTy(DAG.getDataLayout());
7503     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7504 
7505     assert(BVN->getNumOperands() == 4 &&
7506       "BUILD_VECTOR for v4i1 does not have 4 operands");
7507 
7508     bool IsConst = true;
7509     for (unsigned i = 0; i < 4; ++i) {
7510       if (BVN->getOperand(i).isUndef()) continue;
7511       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
7512         IsConst = false;
7513         break;
7514       }
7515     }
7516 
7517     if (IsConst) {
7518       Constant *One =
7519         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
7520       Constant *NegOne =
7521         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
7522 
7523       Constant *CV[4];
7524       for (unsigned i = 0; i < 4; ++i) {
7525         if (BVN->getOperand(i).isUndef())
7526           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
7527         else if (isNullConstant(BVN->getOperand(i)))
7528           CV[i] = NegOne;
7529         else
7530           CV[i] = One;
7531       }
7532 
7533       Constant *CP = ConstantVector::get(CV);
7534       SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
7535                                           16 /* alignment */);
7536 
7537       SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
7538       SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
7539       return DAG.getMemIntrinsicNode(
7540           PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
7541           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7542     }
7543 
7544     SmallVector<SDValue, 4> Stores;
7545     for (unsigned i = 0; i < 4; ++i) {
7546       if (BVN->getOperand(i).isUndef()) continue;
7547 
7548       unsigned Offset = 4*i;
7549       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7550       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7551 
7552       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
7553       if (StoreSize > 4) {
7554         Stores.push_back(
7555             DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
7556                               PtrInfo.getWithOffset(Offset), MVT::i32));
7557       } else {
7558         SDValue StoreValue = BVN->getOperand(i);
7559         if (StoreSize < 4)
7560           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
7561 
7562         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
7563                                       PtrInfo.getWithOffset(Offset)));
7564       }
7565     }
7566 
7567     SDValue StoreChain;
7568     if (!Stores.empty())
7569       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7570     else
7571       StoreChain = DAG.getEntryNode();
7572 
7573     // Now load from v4i32 into the QPX register; this will extend it to
7574     // v4i64 but not yet convert it to a floating point. Nevertheless, this
7575     // is typed as v4f64 because the QPX register integer states are not
7576     // explicitly represented.
7577 
7578     SDValue Ops[] = {StoreChain,
7579                      DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
7580                      FIdx};
7581     SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
7582 
7583     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
7584       dl, VTs, Ops, MVT::v4i32, PtrInfo);
7585     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7586       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
7587       LoadedVect);
7588 
7589     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
7590 
7591     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
7592   }
7593 
7594   // All other QPX vectors are handled by generic code.
7595   if (Subtarget.hasQPX())
7596     return SDValue();
7597 
7598   // Check if this is a splat of a constant value.
7599   APInt APSplatBits, APSplatUndef;
7600   unsigned SplatBitSize;
7601   bool HasAnyUndefs;
7602   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
7603                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
7604       SplatBitSize > 32) {
7605     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
7606     // lowered to VSX instructions under certain conditions.
7607     // Without VSX, there is no pattern more efficient than expanding the node.
7608     if (Subtarget.hasVSX() &&
7609         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove()))
7610       return Op;
7611     return SDValue();
7612   }
7613 
7614   unsigned SplatBits = APSplatBits.getZExtValue();
7615   unsigned SplatUndef = APSplatUndef.getZExtValue();
7616   unsigned SplatSize = SplatBitSize / 8;
7617 
7618   // First, handle single instruction cases.
7619 
7620   // All zeros?
7621   if (SplatBits == 0) {
7622     // Canonicalize all zero vectors to be v4i32.
7623     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
7624       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
7625       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
7626     }
7627     return Op;
7628   }
7629 
7630   // We have XXSPLTIB for constant splats one byte wide
7631   if (Subtarget.hasP9Vector() && SplatSize == 1) {
7632     // This is a splat of 1-byte elements with some elements potentially undef.
7633     // Rather than trying to match undef in the SDAG patterns, ensure that all
7634     // elements are the same constant.
7635     if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
7636       SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
7637                                                        dl, MVT::i32));
7638       SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
7639       if (Op.getValueType() != MVT::v16i8)
7640         return DAG.getBitcast(Op.getValueType(), NewBV);
7641       return NewBV;
7642     }
7643     return Op;
7644   }
7645 
7646   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
7647   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
7648                     (32-SplatBitSize));
7649   if (SextVal >= -16 && SextVal <= 15)
7650     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
7651 
7652   // Two instruction sequences.
7653 
7654   // If this value is in the range [-32,30] and is even, use:
7655   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
7656   // If this value is in the range [17,31] and is odd, use:
7657   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
7658   // If this value is in the range [-31,-17] and is odd, use:
7659   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
7660   // Note the last two are three-instruction sequences.
7661   if (SextVal >= -32 && SextVal <= 31) {
7662     // To avoid having these optimizations undone by constant folding,
7663     // we convert to a pseudo that will be expanded later into one of
7664     // the above forms.
7665     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
7666     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
7667               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
7668     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
7669     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
7670     if (VT == Op.getValueType())
7671       return RetVal;
7672     else
7673       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
7674   }
7675 
7676   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
7677   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
7678   // for fneg/fabs.
7679   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
7680     // Make -1 and vspltisw -1:
7681     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
7682 
7683     // Make the VSLW intrinsic, computing 0x8000_0000.
7684     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
7685                                    OnesV, DAG, dl);
7686 
7687     // xor by OnesV to invert it.
7688     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
7689     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7690   }
7691 
7692   // Check to see if this is a wide variety of vsplti*, binop self cases.
7693   static const signed char SplatCsts[] = {
7694     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
7695     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
7696   };
7697 
7698   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
7699     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
7700     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
7701     int i = SplatCsts[idx];
7702 
7703     // Figure out what shift amount will be used by altivec if shifted by i in
7704     // this splat size.
7705     unsigned TypeShiftAmt = i & (SplatBitSize-1);
7706 
7707     // vsplti + shl self.
7708     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
7709       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7710       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7711         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
7712         Intrinsic::ppc_altivec_vslw
7713       };
7714       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7715       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7716     }
7717 
7718     // vsplti + srl self.
7719     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7720       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7721       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7722         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
7723         Intrinsic::ppc_altivec_vsrw
7724       };
7725       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7726       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7727     }
7728 
7729     // vsplti + sra self.
7730     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7731       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7732       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7733         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
7734         Intrinsic::ppc_altivec_vsraw
7735       };
7736       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7737       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7738     }
7739 
7740     // vsplti + rol self.
7741     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
7742                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
7743       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7744       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7745         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
7746         Intrinsic::ppc_altivec_vrlw
7747       };
7748       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7749       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7750     }
7751 
7752     // t = vsplti c, result = vsldoi t, t, 1
7753     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
7754       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7755       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
7756       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7757     }
7758     // t = vsplti c, result = vsldoi t, t, 2
7759     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
7760       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7761       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
7762       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7763     }
7764     // t = vsplti c, result = vsldoi t, t, 3
7765     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
7766       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7767       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
7768       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7769     }
7770   }
7771 
7772   return SDValue();
7773 }
7774 
7775 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7776 /// the specified operations to build the shuffle.
7777 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7778                                       SDValue RHS, SelectionDAG &DAG,
7779                                       const SDLoc &dl) {
7780   unsigned OpNum = (PFEntry >> 26) & 0x0F;
7781   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7782   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
7783 
7784   enum {
7785     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7786     OP_VMRGHW,
7787     OP_VMRGLW,
7788     OP_VSPLTISW0,
7789     OP_VSPLTISW1,
7790     OP_VSPLTISW2,
7791     OP_VSPLTISW3,
7792     OP_VSLDOI4,
7793     OP_VSLDOI8,
7794     OP_VSLDOI12
7795   };
7796 
7797   if (OpNum == OP_COPY) {
7798     if (LHSID == (1*9+2)*9+3) return LHS;
7799     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7800     return RHS;
7801   }
7802 
7803   SDValue OpLHS, OpRHS;
7804   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7805   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7806 
7807   int ShufIdxs[16];
7808   switch (OpNum) {
7809   default: llvm_unreachable("Unknown i32 permute!");
7810   case OP_VMRGHW:
7811     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
7812     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
7813     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
7814     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
7815     break;
7816   case OP_VMRGLW:
7817     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
7818     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
7819     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
7820     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
7821     break;
7822   case OP_VSPLTISW0:
7823     for (unsigned i = 0; i != 16; ++i)
7824       ShufIdxs[i] = (i&3)+0;
7825     break;
7826   case OP_VSPLTISW1:
7827     for (unsigned i = 0; i != 16; ++i)
7828       ShufIdxs[i] = (i&3)+4;
7829     break;
7830   case OP_VSPLTISW2:
7831     for (unsigned i = 0; i != 16; ++i)
7832       ShufIdxs[i] = (i&3)+8;
7833     break;
7834   case OP_VSPLTISW3:
7835     for (unsigned i = 0; i != 16; ++i)
7836       ShufIdxs[i] = (i&3)+12;
7837     break;
7838   case OP_VSLDOI4:
7839     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
7840   case OP_VSLDOI8:
7841     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
7842   case OP_VSLDOI12:
7843     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
7844   }
7845   EVT VT = OpLHS.getValueType();
7846   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
7847   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
7848   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
7849   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7850 }
7851 
7852 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
7853 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
7854 /// return the code it can be lowered into.  Worst case, it can always be
7855 /// lowered into a vperm.
7856 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
7857                                                SelectionDAG &DAG) const {
7858   SDLoc dl(Op);
7859   SDValue V1 = Op.getOperand(0);
7860   SDValue V2 = Op.getOperand(1);
7861   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7862   EVT VT = Op.getValueType();
7863   bool isLittleEndian = Subtarget.isLittleEndian();
7864 
7865   unsigned ShiftElts, InsertAtByte;
7866   bool Swap;
7867   if (Subtarget.hasP9Vector() &&
7868       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
7869                            isLittleEndian)) {
7870     if (Swap)
7871       std::swap(V1, V2);
7872     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7873     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
7874     if (ShiftElts) {
7875       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
7876                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
7877       SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl,
7878                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
7879       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7880     }
7881     SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2,
7882                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
7883     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7884   }
7885 
7886 
7887   if (Subtarget.hasVSX() &&
7888       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
7889     if (Swap)
7890       std::swap(V1, V2);
7891     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7892     SDValue Conv2 =
7893         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
7894 
7895     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
7896                               DAG.getConstant(ShiftElts, dl, MVT::i32));
7897     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
7898   }
7899 
7900   if (Subtarget.hasVSX() &&
7901     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
7902     if (Swap)
7903       std::swap(V1, V2);
7904     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
7905     SDValue Conv2 =
7906         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
7907 
7908     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
7909                               DAG.getConstant(ShiftElts, dl, MVT::i32));
7910     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
7911   }
7912 
7913   if (Subtarget.hasP9Vector()) {
7914      if (PPC::isXXBRHShuffleMask(SVOp)) {
7915       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
7916       SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
7917       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
7918     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
7919       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7920       SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
7921       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
7922     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
7923       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
7924       SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
7925       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
7926     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
7927       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
7928       SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
7929       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
7930     }
7931   }
7932 
7933   if (Subtarget.hasVSX()) {
7934     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
7935       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
7936 
7937       // If the source for the shuffle is a scalar_to_vector that came from a
7938       // 32-bit load, it will have used LXVWSX so we don't need to splat again.
7939       if (Subtarget.hasP9Vector() &&
7940           ((isLittleEndian && SplatIdx == 3) ||
7941            (!isLittleEndian && SplatIdx == 0))) {
7942         SDValue Src = V1.getOperand(0);
7943         if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
7944             Src.getOperand(0).getOpcode() == ISD::LOAD &&
7945             Src.getOperand(0).hasOneUse())
7946           return V1;
7947       }
7948       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7949       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
7950                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
7951       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
7952     }
7953 
7954     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
7955     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
7956       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
7957       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
7958       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
7959     }
7960   }
7961 
7962   if (Subtarget.hasQPX()) {
7963     if (VT.getVectorNumElements() != 4)
7964       return SDValue();
7965 
7966     if (V2.isUndef()) V2 = V1;
7967 
7968     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
7969     if (AlignIdx != -1) {
7970       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
7971                          DAG.getConstant(AlignIdx, dl, MVT::i32));
7972     } else if (SVOp->isSplat()) {
7973       int SplatIdx = SVOp->getSplatIndex();
7974       if (SplatIdx >= 4) {
7975         std::swap(V1, V2);
7976         SplatIdx -= 4;
7977       }
7978 
7979       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
7980                          DAG.getConstant(SplatIdx, dl, MVT::i32));
7981     }
7982 
7983     // Lower this into a qvgpci/qvfperm pair.
7984 
7985     // Compute the qvgpci literal
7986     unsigned idx = 0;
7987     for (unsigned i = 0; i < 4; ++i) {
7988       int m = SVOp->getMaskElt(i);
7989       unsigned mm = m >= 0 ? (unsigned) m : i;
7990       idx |= mm << (3-i)*3;
7991     }
7992 
7993     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
7994                              DAG.getConstant(idx, dl, MVT::i32));
7995     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
7996   }
7997 
7998   // Cases that are handled by instructions that take permute immediates
7999   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8000   // selected by the instruction selector.
8001   if (V2.isUndef()) {
8002     if (PPC::isSplatShuffleMask(SVOp, 1) ||
8003         PPC::isSplatShuffleMask(SVOp, 2) ||
8004         PPC::isSplatShuffleMask(SVOp, 4) ||
8005         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
8006         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
8007         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
8008         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
8009         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
8010         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
8011         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
8012         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
8013         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
8014         (Subtarget.hasP8Altivec() && (
8015          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
8016          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
8017          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
8018       return Op;
8019     }
8020   }
8021 
8022   // Altivec has a variety of "shuffle immediates" that take two vector inputs
8023   // and produce a fixed permutation.  If any of these match, do not lower to
8024   // VPERM.
8025   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
8026   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8027       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8028       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
8029       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8030       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8031       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8032       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8033       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8034       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8035       (Subtarget.hasP8Altivec() && (
8036        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8037        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
8038        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
8039     return Op;
8040 
8041   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
8042   // perfect shuffle table to emit an optimal matching sequence.
8043   ArrayRef<int> PermMask = SVOp->getMask();
8044 
8045   unsigned PFIndexes[4];
8046   bool isFourElementShuffle = true;
8047   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
8048     unsigned EltNo = 8;   // Start out undef.
8049     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
8050       if (PermMask[i*4+j] < 0)
8051         continue;   // Undef, ignore it.
8052 
8053       unsigned ByteSource = PermMask[i*4+j];
8054       if ((ByteSource & 3) != j) {
8055         isFourElementShuffle = false;
8056         break;
8057       }
8058 
8059       if (EltNo == 8) {
8060         EltNo = ByteSource/4;
8061       } else if (EltNo != ByteSource/4) {
8062         isFourElementShuffle = false;
8063         break;
8064       }
8065     }
8066     PFIndexes[i] = EltNo;
8067   }
8068 
8069   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8070   // perfect shuffle vector to determine if it is cost effective to do this as
8071   // discrete instructions, or whether we should use a vperm.
8072   // For now, we skip this for little endian until such time as we have a
8073   // little-endian perfect shuffle table.
8074   if (isFourElementShuffle && !isLittleEndian) {
8075     // Compute the index in the perfect shuffle table.
8076     unsigned PFTableIndex =
8077       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8078 
8079     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8080     unsigned Cost  = (PFEntry >> 30);
8081 
8082     // Determining when to avoid vperm is tricky.  Many things affect the cost
8083     // of vperm, particularly how many times the perm mask needs to be computed.
8084     // For example, if the perm mask can be hoisted out of a loop or is already
8085     // used (perhaps because there are multiple permutes with the same shuffle
8086     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
8087     // the loop requires an extra register.
8088     //
8089     // As a compromise, we only emit discrete instructions if the shuffle can be
8090     // generated in 3 or fewer operations.  When we have loop information
8091     // available, if this block is within a loop, we should avoid using vperm
8092     // for 3-operation perms and use a constant pool load instead.
8093     if (Cost < 3)
8094       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8095   }
8096 
8097   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
8098   // vector that will get spilled to the constant pool.
8099   if (V2.isUndef()) V2 = V1;
8100 
8101   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
8102   // that it is in input element units, not in bytes.  Convert now.
8103 
8104   // For little endian, the order of the input vectors is reversed, and
8105   // the permutation mask is complemented with respect to 31.  This is
8106   // necessary to produce proper semantics with the big-endian-biased vperm
8107   // instruction.
8108   EVT EltVT = V1.getValueType().getVectorElementType();
8109   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
8110 
8111   SmallVector<SDValue, 16> ResultMask;
8112   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
8113     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
8114 
8115     for (unsigned j = 0; j != BytesPerElement; ++j)
8116       if (isLittleEndian)
8117         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
8118                                              dl, MVT::i32));
8119       else
8120         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
8121                                              MVT::i32));
8122   }
8123 
8124   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
8125   if (isLittleEndian)
8126     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8127                        V2, V1, VPermMask);
8128   else
8129     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8130                        V1, V2, VPermMask);
8131 }
8132 
8133 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
8134 /// vector comparison.  If it is, return true and fill in Opc/isDot with
8135 /// information about the intrinsic.
8136 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
8137                                  bool &isDot, const PPCSubtarget &Subtarget) {
8138   unsigned IntrinsicID =
8139       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
8140   CompareOpc = -1;
8141   isDot = false;
8142   switch (IntrinsicID) {
8143   default:
8144     return false;
8145   // Comparison predicates.
8146   case Intrinsic::ppc_altivec_vcmpbfp_p:
8147     CompareOpc = 966;
8148     isDot = true;
8149     break;
8150   case Intrinsic::ppc_altivec_vcmpeqfp_p:
8151     CompareOpc = 198;
8152     isDot = true;
8153     break;
8154   case Intrinsic::ppc_altivec_vcmpequb_p:
8155     CompareOpc = 6;
8156     isDot = true;
8157     break;
8158   case Intrinsic::ppc_altivec_vcmpequh_p:
8159     CompareOpc = 70;
8160     isDot = true;
8161     break;
8162   case Intrinsic::ppc_altivec_vcmpequw_p:
8163     CompareOpc = 134;
8164     isDot = true;
8165     break;
8166   case Intrinsic::ppc_altivec_vcmpequd_p:
8167     if (Subtarget.hasP8Altivec()) {
8168       CompareOpc = 199;
8169       isDot = true;
8170     } else
8171       return false;
8172     break;
8173   case Intrinsic::ppc_altivec_vcmpneb_p:
8174   case Intrinsic::ppc_altivec_vcmpneh_p:
8175   case Intrinsic::ppc_altivec_vcmpnew_p:
8176   case Intrinsic::ppc_altivec_vcmpnezb_p:
8177   case Intrinsic::ppc_altivec_vcmpnezh_p:
8178   case Intrinsic::ppc_altivec_vcmpnezw_p:
8179     if (Subtarget.hasP9Altivec()) {
8180       switch (IntrinsicID) {
8181       default:
8182         llvm_unreachable("Unknown comparison intrinsic.");
8183       case Intrinsic::ppc_altivec_vcmpneb_p:
8184         CompareOpc = 7;
8185         break;
8186       case Intrinsic::ppc_altivec_vcmpneh_p:
8187         CompareOpc = 71;
8188         break;
8189       case Intrinsic::ppc_altivec_vcmpnew_p:
8190         CompareOpc = 135;
8191         break;
8192       case Intrinsic::ppc_altivec_vcmpnezb_p:
8193         CompareOpc = 263;
8194         break;
8195       case Intrinsic::ppc_altivec_vcmpnezh_p:
8196         CompareOpc = 327;
8197         break;
8198       case Intrinsic::ppc_altivec_vcmpnezw_p:
8199         CompareOpc = 391;
8200         break;
8201       }
8202       isDot = true;
8203     } else
8204       return false;
8205     break;
8206   case Intrinsic::ppc_altivec_vcmpgefp_p:
8207     CompareOpc = 454;
8208     isDot = true;
8209     break;
8210   case Intrinsic::ppc_altivec_vcmpgtfp_p:
8211     CompareOpc = 710;
8212     isDot = true;
8213     break;
8214   case Intrinsic::ppc_altivec_vcmpgtsb_p:
8215     CompareOpc = 774;
8216     isDot = true;
8217     break;
8218   case Intrinsic::ppc_altivec_vcmpgtsh_p:
8219     CompareOpc = 838;
8220     isDot = true;
8221     break;
8222   case Intrinsic::ppc_altivec_vcmpgtsw_p:
8223     CompareOpc = 902;
8224     isDot = true;
8225     break;
8226   case Intrinsic::ppc_altivec_vcmpgtsd_p:
8227     if (Subtarget.hasP8Altivec()) {
8228       CompareOpc = 967;
8229       isDot = true;
8230     } else
8231       return false;
8232     break;
8233   case Intrinsic::ppc_altivec_vcmpgtub_p:
8234     CompareOpc = 518;
8235     isDot = true;
8236     break;
8237   case Intrinsic::ppc_altivec_vcmpgtuh_p:
8238     CompareOpc = 582;
8239     isDot = true;
8240     break;
8241   case Intrinsic::ppc_altivec_vcmpgtuw_p:
8242     CompareOpc = 646;
8243     isDot = true;
8244     break;
8245   case Intrinsic::ppc_altivec_vcmpgtud_p:
8246     if (Subtarget.hasP8Altivec()) {
8247       CompareOpc = 711;
8248       isDot = true;
8249     } else
8250       return false;
8251     break;
8252 
8253   // VSX predicate comparisons use the same infrastructure
8254   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
8255   case Intrinsic::ppc_vsx_xvcmpgedp_p:
8256   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
8257   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
8258   case Intrinsic::ppc_vsx_xvcmpgesp_p:
8259   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
8260     if (Subtarget.hasVSX()) {
8261       switch (IntrinsicID) {
8262       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
8263         CompareOpc = 99;
8264         break;
8265       case Intrinsic::ppc_vsx_xvcmpgedp_p:
8266         CompareOpc = 115;
8267         break;
8268       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
8269         CompareOpc = 107;
8270         break;
8271       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
8272         CompareOpc = 67;
8273         break;
8274       case Intrinsic::ppc_vsx_xvcmpgesp_p:
8275         CompareOpc = 83;
8276         break;
8277       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
8278         CompareOpc = 75;
8279         break;
8280       }
8281       isDot = true;
8282     } else
8283       return false;
8284     break;
8285 
8286   // Normal Comparisons.
8287   case Intrinsic::ppc_altivec_vcmpbfp:
8288     CompareOpc = 966;
8289     break;
8290   case Intrinsic::ppc_altivec_vcmpeqfp:
8291     CompareOpc = 198;
8292     break;
8293   case Intrinsic::ppc_altivec_vcmpequb:
8294     CompareOpc = 6;
8295     break;
8296   case Intrinsic::ppc_altivec_vcmpequh:
8297     CompareOpc = 70;
8298     break;
8299   case Intrinsic::ppc_altivec_vcmpequw:
8300     CompareOpc = 134;
8301     break;
8302   case Intrinsic::ppc_altivec_vcmpequd:
8303     if (Subtarget.hasP8Altivec())
8304       CompareOpc = 199;
8305     else
8306       return false;
8307     break;
8308   case Intrinsic::ppc_altivec_vcmpneb:
8309   case Intrinsic::ppc_altivec_vcmpneh:
8310   case Intrinsic::ppc_altivec_vcmpnew:
8311   case Intrinsic::ppc_altivec_vcmpnezb:
8312   case Intrinsic::ppc_altivec_vcmpnezh:
8313   case Intrinsic::ppc_altivec_vcmpnezw:
8314     if (Subtarget.hasP9Altivec())
8315       switch (IntrinsicID) {
8316       default:
8317         llvm_unreachable("Unknown comparison intrinsic.");
8318       case Intrinsic::ppc_altivec_vcmpneb:
8319         CompareOpc = 7;
8320         break;
8321       case Intrinsic::ppc_altivec_vcmpneh:
8322         CompareOpc = 71;
8323         break;
8324       case Intrinsic::ppc_altivec_vcmpnew:
8325         CompareOpc = 135;
8326         break;
8327       case Intrinsic::ppc_altivec_vcmpnezb:
8328         CompareOpc = 263;
8329         break;
8330       case Intrinsic::ppc_altivec_vcmpnezh:
8331         CompareOpc = 327;
8332         break;
8333       case Intrinsic::ppc_altivec_vcmpnezw:
8334         CompareOpc = 391;
8335         break;
8336       }
8337     else
8338       return false;
8339     break;
8340   case Intrinsic::ppc_altivec_vcmpgefp:
8341     CompareOpc = 454;
8342     break;
8343   case Intrinsic::ppc_altivec_vcmpgtfp:
8344     CompareOpc = 710;
8345     break;
8346   case Intrinsic::ppc_altivec_vcmpgtsb:
8347     CompareOpc = 774;
8348     break;
8349   case Intrinsic::ppc_altivec_vcmpgtsh:
8350     CompareOpc = 838;
8351     break;
8352   case Intrinsic::ppc_altivec_vcmpgtsw:
8353     CompareOpc = 902;
8354     break;
8355   case Intrinsic::ppc_altivec_vcmpgtsd:
8356     if (Subtarget.hasP8Altivec())
8357       CompareOpc = 967;
8358     else
8359       return false;
8360     break;
8361   case Intrinsic::ppc_altivec_vcmpgtub:
8362     CompareOpc = 518;
8363     break;
8364   case Intrinsic::ppc_altivec_vcmpgtuh:
8365     CompareOpc = 582;
8366     break;
8367   case Intrinsic::ppc_altivec_vcmpgtuw:
8368     CompareOpc = 646;
8369     break;
8370   case Intrinsic::ppc_altivec_vcmpgtud:
8371     if (Subtarget.hasP8Altivec())
8372       CompareOpc = 711;
8373     else
8374       return false;
8375     break;
8376   }
8377   return true;
8378 }
8379 
8380 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
8381 /// lower, do it, otherwise return null.
8382 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8383                                                    SelectionDAG &DAG) const {
8384   unsigned IntrinsicID =
8385     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8386 
8387   if (IntrinsicID == Intrinsic::thread_pointer) {
8388     // Reads the thread pointer register, used for __builtin_thread_pointer.
8389     if (Subtarget.isPPC64())
8390       return DAG.getRegister(PPC::X13, MVT::i64);
8391     return DAG.getRegister(PPC::R2, MVT::i32);
8392   }
8393 
8394   // If this is a lowered altivec predicate compare, CompareOpc is set to the
8395   // opcode number of the comparison.
8396   SDLoc dl(Op);
8397   int CompareOpc;
8398   bool isDot;
8399   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
8400     return SDValue();    // Don't custom lower most intrinsics.
8401 
8402   // If this is a non-dot comparison, make the VCMP node and we are done.
8403   if (!isDot) {
8404     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
8405                               Op.getOperand(1), Op.getOperand(2),
8406                               DAG.getConstant(CompareOpc, dl, MVT::i32));
8407     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
8408   }
8409 
8410   // Create the PPCISD altivec 'dot' comparison node.
8411   SDValue Ops[] = {
8412     Op.getOperand(2),  // LHS
8413     Op.getOperand(3),  // RHS
8414     DAG.getConstant(CompareOpc, dl, MVT::i32)
8415   };
8416   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
8417   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
8418 
8419   // Now that we have the comparison, emit a copy from the CR to a GPR.
8420   // This is flagged to the above dot comparison.
8421   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
8422                                 DAG.getRegister(PPC::CR6, MVT::i32),
8423                                 CompNode.getValue(1));
8424 
8425   // Unpack the result based on how the target uses it.
8426   unsigned BitNo;   // Bit # of CR6.
8427   bool InvertBit;   // Invert result?
8428   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
8429   default:  // Can't happen, don't crash on invalid number though.
8430   case 0:   // Return the value of the EQ bit of CR6.
8431     BitNo = 0; InvertBit = false;
8432     break;
8433   case 1:   // Return the inverted value of the EQ bit of CR6.
8434     BitNo = 0; InvertBit = true;
8435     break;
8436   case 2:   // Return the value of the LT bit of CR6.
8437     BitNo = 2; InvertBit = false;
8438     break;
8439   case 3:   // Return the inverted value of the LT bit of CR6.
8440     BitNo = 2; InvertBit = true;
8441     break;
8442   }
8443 
8444   // Shift the bit into the low position.
8445   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
8446                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
8447   // Isolate the bit.
8448   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
8449                       DAG.getConstant(1, dl, MVT::i32));
8450 
8451   // If we are supposed to, toggle the bit.
8452   if (InvertBit)
8453     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
8454                         DAG.getConstant(1, dl, MVT::i32));
8455   return Flags;
8456 }
8457 
8458 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
8459                                                SelectionDAG &DAG) const {
8460   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
8461   // the beginning of the argument list.
8462   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
8463   SDLoc DL(Op);
8464   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
8465   case Intrinsic::ppc_cfence: {
8466     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
8467     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
8468     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
8469                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
8470                                                   Op.getOperand(ArgStart + 1)),
8471                                       Op.getOperand(0)),
8472                    0);
8473   }
8474   default:
8475     break;
8476   }
8477   return SDValue();
8478 }
8479 
8480 SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
8481   // Check for a DIV with the same operands as this REM.
8482   for (auto UI : Op.getOperand(1)->uses()) {
8483     if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
8484         (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
8485       if (UI->getOperand(0) == Op.getOperand(0) &&
8486           UI->getOperand(1) == Op.getOperand(1))
8487         return SDValue();
8488   }
8489   return Op;
8490 }
8491 
8492 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
8493                                                   SelectionDAG &DAG) const {
8494   SDLoc dl(Op);
8495   // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
8496   // instructions), but for smaller types, we need to first extend up to v2i32
8497   // before doing going farther.
8498   if (Op.getValueType() == MVT::v2i64) {
8499     EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
8500     if (ExtVT != MVT::v2i32) {
8501       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
8502       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
8503                        DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
8504                                         ExtVT.getVectorElementType(), 4)));
8505       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
8506       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
8507                        DAG.getValueType(MVT::v2i32));
8508     }
8509 
8510     return Op;
8511   }
8512 
8513   return SDValue();
8514 }
8515 
8516 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
8517                                                  SelectionDAG &DAG) const {
8518   SDLoc dl(Op);
8519   // Create a stack slot that is 16-byte aligned.
8520   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8521   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8522   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8523   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8524 
8525   // Store the input value into Value#0 of the stack slot.
8526   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
8527                                MachinePointerInfo());
8528   // Load it out.
8529   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
8530 }
8531 
8532 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8533                                                   SelectionDAG &DAG) const {
8534   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
8535          "Should only be called for ISD::INSERT_VECTOR_ELT");
8536   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
8537   // We have legal lowering for constant indices but not for variable ones.
8538   if (C)
8539     return Op;
8540   return SDValue();
8541 }
8542 
8543 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
8544                                                    SelectionDAG &DAG) const {
8545   SDLoc dl(Op);
8546   SDNode *N = Op.getNode();
8547 
8548   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
8549          "Unknown extract_vector_elt type");
8550 
8551   SDValue Value = N->getOperand(0);
8552 
8553   // The first part of this is like the store lowering except that we don't
8554   // need to track the chain.
8555 
8556   // The values are now known to be -1 (false) or 1 (true). To convert this
8557   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8558   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8559   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8560 
8561   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8562   // understand how to form the extending load.
8563   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8564 
8565   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8566 
8567   // Now convert to an integer and store.
8568   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8569     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8570     Value);
8571 
8572   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8573   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8574   MachinePointerInfo PtrInfo =
8575       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8576   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8577   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8578 
8579   SDValue StoreChain = DAG.getEntryNode();
8580   SDValue Ops[] = {StoreChain,
8581                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8582                    Value, FIdx};
8583   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8584 
8585   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8586     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8587 
8588   // Extract the value requested.
8589   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
8590   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8591   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8592 
8593   SDValue IntVal =
8594       DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
8595 
8596   if (!Subtarget.useCRBits())
8597     return IntVal;
8598 
8599   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
8600 }
8601 
8602 /// Lowering for QPX v4i1 loads
8603 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
8604                                            SelectionDAG &DAG) const {
8605   SDLoc dl(Op);
8606   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
8607   SDValue LoadChain = LN->getChain();
8608   SDValue BasePtr = LN->getBasePtr();
8609 
8610   if (Op.getValueType() == MVT::v4f64 ||
8611       Op.getValueType() == MVT::v4f32) {
8612     EVT MemVT = LN->getMemoryVT();
8613     unsigned Alignment = LN->getAlignment();
8614 
8615     // If this load is properly aligned, then it is legal.
8616     if (Alignment >= MemVT.getStoreSize())
8617       return Op;
8618 
8619     EVT ScalarVT = Op.getValueType().getScalarType(),
8620         ScalarMemVT = MemVT.getScalarType();
8621     unsigned Stride = ScalarMemVT.getStoreSize();
8622 
8623     SDValue Vals[4], LoadChains[4];
8624     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8625       SDValue Load;
8626       if (ScalarVT != ScalarMemVT)
8627         Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
8628                               BasePtr,
8629                               LN->getPointerInfo().getWithOffset(Idx * Stride),
8630                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8631                               LN->getMemOperand()->getFlags(), LN->getAAInfo());
8632       else
8633         Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
8634                            LN->getPointerInfo().getWithOffset(Idx * Stride),
8635                            MinAlign(Alignment, Idx * Stride),
8636                            LN->getMemOperand()->getFlags(), LN->getAAInfo());
8637 
8638       if (Idx == 0 && LN->isIndexed()) {
8639         assert(LN->getAddressingMode() == ISD::PRE_INC &&
8640                "Unknown addressing mode on vector load");
8641         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
8642                                   LN->getAddressingMode());
8643       }
8644 
8645       Vals[Idx] = Load;
8646       LoadChains[Idx] = Load.getValue(1);
8647 
8648       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8649                             DAG.getConstant(Stride, dl,
8650                                             BasePtr.getValueType()));
8651     }
8652 
8653     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8654     SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
8655 
8656     if (LN->isIndexed()) {
8657       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
8658       return DAG.getMergeValues(RetOps, dl);
8659     }
8660 
8661     SDValue RetOps[] = { Value, TF };
8662     return DAG.getMergeValues(RetOps, dl);
8663   }
8664 
8665   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
8666   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
8667 
8668   // To lower v4i1 from a byte array, we load the byte elements of the
8669   // vector and then reuse the BUILD_VECTOR logic.
8670 
8671   SDValue VectElmts[4], VectElmtChains[4];
8672   for (unsigned i = 0; i < 4; ++i) {
8673     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8674     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8675 
8676     VectElmts[i] = DAG.getExtLoad(
8677         ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
8678         LN->getPointerInfo().getWithOffset(i), MVT::i8,
8679         /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
8680     VectElmtChains[i] = VectElmts[i].getValue(1);
8681   }
8682 
8683   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
8684   SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
8685 
8686   SDValue RVals[] = { Value, LoadChain };
8687   return DAG.getMergeValues(RVals, dl);
8688 }
8689 
8690 /// Lowering for QPX v4i1 stores
8691 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
8692                                             SelectionDAG &DAG) const {
8693   SDLoc dl(Op);
8694   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
8695   SDValue StoreChain = SN->getChain();
8696   SDValue BasePtr = SN->getBasePtr();
8697   SDValue Value = SN->getValue();
8698 
8699   if (Value.getValueType() == MVT::v4f64 ||
8700       Value.getValueType() == MVT::v4f32) {
8701     EVT MemVT = SN->getMemoryVT();
8702     unsigned Alignment = SN->getAlignment();
8703 
8704     // If this store is properly aligned, then it is legal.
8705     if (Alignment >= MemVT.getStoreSize())
8706       return Op;
8707 
8708     EVT ScalarVT = Value.getValueType().getScalarType(),
8709         ScalarMemVT = MemVT.getScalarType();
8710     unsigned Stride = ScalarMemVT.getStoreSize();
8711 
8712     SDValue Stores[4];
8713     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8714       SDValue Ex = DAG.getNode(
8715           ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
8716           DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
8717       SDValue Store;
8718       if (ScalarVT != ScalarMemVT)
8719         Store =
8720             DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
8721                               SN->getPointerInfo().getWithOffset(Idx * Stride),
8722                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8723                               SN->getMemOperand()->getFlags(), SN->getAAInfo());
8724       else
8725         Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
8726                              SN->getPointerInfo().getWithOffset(Idx * Stride),
8727                              MinAlign(Alignment, Idx * Stride),
8728                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
8729 
8730       if (Idx == 0 && SN->isIndexed()) {
8731         assert(SN->getAddressingMode() == ISD::PRE_INC &&
8732                "Unknown addressing mode on vector store");
8733         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
8734                                     SN->getAddressingMode());
8735       }
8736 
8737       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8738                             DAG.getConstant(Stride, dl,
8739                                             BasePtr.getValueType()));
8740       Stores[Idx] = Store;
8741     }
8742 
8743     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8744 
8745     if (SN->isIndexed()) {
8746       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
8747       return DAG.getMergeValues(RetOps, dl);
8748     }
8749 
8750     return TF;
8751   }
8752 
8753   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
8754   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
8755 
8756   // The values are now known to be -1 (false) or 1 (true). To convert this
8757   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8758   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8759   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8760 
8761   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8762   // understand how to form the extending load.
8763   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8764 
8765   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8766 
8767   // Now convert to an integer and store.
8768   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8769     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8770     Value);
8771 
8772   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8773   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8774   MachinePointerInfo PtrInfo =
8775       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8776   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8777   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8778 
8779   SDValue Ops[] = {StoreChain,
8780                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8781                    Value, FIdx};
8782   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8783 
8784   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8785     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8786 
8787   // Move data into the byte array.
8788   SDValue Loads[4], LoadChains[4];
8789   for (unsigned i = 0; i < 4; ++i) {
8790     unsigned Offset = 4*i;
8791     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8792     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8793 
8794     Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
8795                            PtrInfo.getWithOffset(Offset));
8796     LoadChains[i] = Loads[i].getValue(1);
8797   }
8798 
8799   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8800 
8801   SDValue Stores[4];
8802   for (unsigned i = 0; i < 4; ++i) {
8803     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8804     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8805 
8806     Stores[i] = DAG.getTruncStore(
8807         StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
8808         MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
8809         SN->getAAInfo());
8810   }
8811 
8812   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8813 
8814   return StoreChain;
8815 }
8816 
8817 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
8818   SDLoc dl(Op);
8819   if (Op.getValueType() == MVT::v4i32) {
8820     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8821 
8822     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
8823     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
8824 
8825     SDValue RHSSwap =   // = vrlw RHS, 16
8826       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
8827 
8828     // Shrinkify inputs to v8i16.
8829     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
8830     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
8831     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
8832 
8833     // Low parts multiplied together, generating 32-bit results (we ignore the
8834     // top parts).
8835     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
8836                                         LHS, RHS, DAG, dl, MVT::v4i32);
8837 
8838     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
8839                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
8840     // Shift the high parts up 16 bits.
8841     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
8842                               Neg16, DAG, dl);
8843     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
8844   } else if (Op.getValueType() == MVT::v8i16) {
8845     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8846 
8847     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
8848 
8849     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
8850                             LHS, RHS, Zero, DAG, dl);
8851   } else if (Op.getValueType() == MVT::v16i8) {
8852     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8853     bool isLittleEndian = Subtarget.isLittleEndian();
8854 
8855     // Multiply the even 8-bit parts, producing 16-bit sums.
8856     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
8857                                            LHS, RHS, DAG, dl, MVT::v8i16);
8858     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
8859 
8860     // Multiply the odd 8-bit parts, producing 16-bit sums.
8861     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
8862                                           LHS, RHS, DAG, dl, MVT::v8i16);
8863     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
8864 
8865     // Merge the results together.  Because vmuleub and vmuloub are
8866     // instructions with a big-endian bias, we must reverse the
8867     // element numbering and reverse the meaning of "odd" and "even"
8868     // when generating little endian code.
8869     int Ops[16];
8870     for (unsigned i = 0; i != 8; ++i) {
8871       if (isLittleEndian) {
8872         Ops[i*2  ] = 2*i;
8873         Ops[i*2+1] = 2*i+16;
8874       } else {
8875         Ops[i*2  ] = 2*i+1;
8876         Ops[i*2+1] = 2*i+1+16;
8877       }
8878     }
8879     if (isLittleEndian)
8880       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
8881     else
8882       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
8883   } else {
8884     llvm_unreachable("Unknown mul to lower!");
8885   }
8886 }
8887 
8888 /// LowerOperation - Provide custom lowering hooks for some operations.
8889 ///
8890 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
8891   switch (Op.getOpcode()) {
8892   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
8893   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
8894   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
8895   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
8896   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
8897   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
8898   case ISD::SETCC:              return LowerSETCC(Op, DAG);
8899   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
8900   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
8901   case ISD::VASTART:
8902     return LowerVASTART(Op, DAG);
8903 
8904   case ISD::VAARG:
8905     return LowerVAARG(Op, DAG);
8906 
8907   case ISD::VACOPY:
8908     return LowerVACOPY(Op, DAG);
8909 
8910   case ISD::STACKRESTORE:
8911     return LowerSTACKRESTORE(Op, DAG);
8912 
8913   case ISD::DYNAMIC_STACKALLOC:
8914     return LowerDYNAMIC_STACKALLOC(Op, DAG);
8915 
8916   case ISD::GET_DYNAMIC_AREA_OFFSET:
8917     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
8918 
8919   case ISD::EH_DWARF_CFA:
8920     return LowerEH_DWARF_CFA(Op, DAG);
8921 
8922   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
8923   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
8924 
8925   case ISD::LOAD:               return LowerLOAD(Op, DAG);
8926   case ISD::STORE:              return LowerSTORE(Op, DAG);
8927   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
8928   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
8929   case ISD::FP_TO_UINT:
8930   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
8931                                                       SDLoc(Op));
8932   case ISD::UINT_TO_FP:
8933   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
8934   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
8935 
8936   // Lower 64-bit shifts.
8937   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
8938   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
8939   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
8940 
8941   // Vector-related lowering.
8942   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
8943   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
8944   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
8945   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
8946   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
8947   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
8948   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
8949   case ISD::MUL:                return LowerMUL(Op, DAG);
8950 
8951   // For counter-based loop handling.
8952   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
8953 
8954   // Frame & Return address.
8955   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
8956   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
8957 
8958   case ISD::INTRINSIC_VOID:
8959     return LowerINTRINSIC_VOID(Op, DAG);
8960   case ISD::SREM:
8961   case ISD::UREM:
8962     return LowerREM(Op, DAG);
8963   }
8964 }
8965 
8966 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
8967                                            SmallVectorImpl<SDValue>&Results,
8968                                            SelectionDAG &DAG) const {
8969   SDLoc dl(N);
8970   switch (N->getOpcode()) {
8971   default:
8972     llvm_unreachable("Do not know how to custom type legalize this operation!");
8973   case ISD::READCYCLECOUNTER: {
8974     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
8975     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
8976 
8977     Results.push_back(RTB);
8978     Results.push_back(RTB.getValue(1));
8979     Results.push_back(RTB.getValue(2));
8980     break;
8981   }
8982   case ISD::INTRINSIC_W_CHAIN: {
8983     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
8984         Intrinsic::ppc_is_decremented_ctr_nonzero)
8985       break;
8986 
8987     assert(N->getValueType(0) == MVT::i1 &&
8988            "Unexpected result type for CTR decrement intrinsic");
8989     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
8990                                  N->getValueType(0));
8991     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
8992     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
8993                                  N->getOperand(1));
8994 
8995     Results.push_back(NewInt);
8996     Results.push_back(NewInt.getValue(1));
8997     break;
8998   }
8999   case ISD::VAARG: {
9000     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
9001       return;
9002 
9003     EVT VT = N->getValueType(0);
9004 
9005     if (VT == MVT::i64) {
9006       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
9007 
9008       Results.push_back(NewNode);
9009       Results.push_back(NewNode.getValue(1));
9010     }
9011     return;
9012   }
9013   case ISD::FP_ROUND_INREG: {
9014     assert(N->getValueType(0) == MVT::ppcf128);
9015     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
9016     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
9017                              MVT::f64, N->getOperand(0),
9018                              DAG.getIntPtrConstant(0, dl));
9019     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
9020                              MVT::f64, N->getOperand(0),
9021                              DAG.getIntPtrConstant(1, dl));
9022 
9023     // Add the two halves of the long double in round-to-zero mode.
9024     SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
9025 
9026     // We know the low half is about to be thrown away, so just use something
9027     // convenient.
9028     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
9029                                 FPreg, FPreg));
9030     return;
9031   }
9032   case ISD::FP_TO_SINT:
9033   case ISD::FP_TO_UINT:
9034     // LowerFP_TO_INT() can only handle f32 and f64.
9035     if (N->getOperand(0).getValueType() == MVT::ppcf128)
9036       return;
9037     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
9038     return;
9039   }
9040 }
9041 
9042 //===----------------------------------------------------------------------===//
9043 //  Other Lowering Code
9044 //===----------------------------------------------------------------------===//
9045 
9046 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
9047   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
9048   Function *Func = Intrinsic::getDeclaration(M, Id);
9049   return Builder.CreateCall(Func, {});
9050 }
9051 
9052 // The mappings for emitLeading/TrailingFence is taken from
9053 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
9054 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
9055                                                  Instruction *Inst,
9056                                                  AtomicOrdering Ord) const {
9057   if (Ord == AtomicOrdering::SequentiallyConsistent)
9058     return callIntrinsic(Builder, Intrinsic::ppc_sync);
9059   if (isReleaseOrStronger(Ord))
9060     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9061   return nullptr;
9062 }
9063 
9064 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
9065                                                   Instruction *Inst,
9066                                                   AtomicOrdering Ord) const {
9067   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
9068     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
9069     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
9070     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
9071     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
9072       return Builder.CreateCall(
9073           Intrinsic::getDeclaration(
9074               Builder.GetInsertBlock()->getParent()->getParent(),
9075               Intrinsic::ppc_cfence, {Inst->getType()}),
9076           {Inst});
9077     // FIXME: Can use isync for rmw operation.
9078     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9079   }
9080   return nullptr;
9081 }
9082 
9083 MachineBasicBlock *
9084 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
9085                                     unsigned AtomicSize,
9086                                     unsigned BinOpcode,
9087                                     unsigned CmpOpcode,
9088                                     unsigned CmpPred) const {
9089   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9090   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9091 
9092   auto LoadMnemonic = PPC::LDARX;
9093   auto StoreMnemonic = PPC::STDCX;
9094   switch (AtomicSize) {
9095   default:
9096     llvm_unreachable("Unexpected size of atomic entity");
9097   case 1:
9098     LoadMnemonic = PPC::LBARX;
9099     StoreMnemonic = PPC::STBCX;
9100     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9101     break;
9102   case 2:
9103     LoadMnemonic = PPC::LHARX;
9104     StoreMnemonic = PPC::STHCX;
9105     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9106     break;
9107   case 4:
9108     LoadMnemonic = PPC::LWARX;
9109     StoreMnemonic = PPC::STWCX;
9110     break;
9111   case 8:
9112     LoadMnemonic = PPC::LDARX;
9113     StoreMnemonic = PPC::STDCX;
9114     break;
9115   }
9116 
9117   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9118   MachineFunction *F = BB->getParent();
9119   MachineFunction::iterator It = ++BB->getIterator();
9120 
9121   unsigned dest = MI.getOperand(0).getReg();
9122   unsigned ptrA = MI.getOperand(1).getReg();
9123   unsigned ptrB = MI.getOperand(2).getReg();
9124   unsigned incr = MI.getOperand(3).getReg();
9125   DebugLoc dl = MI.getDebugLoc();
9126 
9127   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9128   MachineBasicBlock *loop2MBB =
9129     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
9130   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9131   F->insert(It, loopMBB);
9132   if (CmpOpcode)
9133     F->insert(It, loop2MBB);
9134   F->insert(It, exitMBB);
9135   exitMBB->splice(exitMBB->begin(), BB,
9136                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
9137   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9138 
9139   MachineRegisterInfo &RegInfo = F->getRegInfo();
9140   unsigned TmpReg = (!BinOpcode) ? incr :
9141     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
9142                                            : &PPC::GPRCRegClass);
9143 
9144   //  thisMBB:
9145   //   ...
9146   //   fallthrough --> loopMBB
9147   BB->addSuccessor(loopMBB);
9148 
9149   //  loopMBB:
9150   //   l[wd]arx dest, ptr
9151   //   add r0, dest, incr
9152   //   st[wd]cx. r0, ptr
9153   //   bne- loopMBB
9154   //   fallthrough --> exitMBB
9155 
9156   // For max/min...
9157   //  loopMBB:
9158   //   l[wd]arx dest, ptr
9159   //   cmpl?[wd] incr, dest
9160   //   bgt exitMBB
9161   //  loop2MBB:
9162   //   st[wd]cx. dest, ptr
9163   //   bne- loopMBB
9164   //   fallthrough --> exitMBB
9165 
9166   BB = loopMBB;
9167   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9168     .addReg(ptrA).addReg(ptrB);
9169   if (BinOpcode)
9170     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
9171   if (CmpOpcode) {
9172     // Signed comparisons of byte or halfword values must be sign-extended.
9173     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
9174       unsigned ExtReg =  RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9175       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
9176               ExtReg).addReg(dest);
9177       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9178         .addReg(incr).addReg(ExtReg);
9179     } else
9180       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9181         .addReg(incr).addReg(dest);
9182 
9183     BuildMI(BB, dl, TII->get(PPC::BCC))
9184       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
9185     BB->addSuccessor(loop2MBB);
9186     BB->addSuccessor(exitMBB);
9187     BB = loop2MBB;
9188   }
9189   BuildMI(BB, dl, TII->get(StoreMnemonic))
9190     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
9191   BuildMI(BB, dl, TII->get(PPC::BCC))
9192     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
9193   BB->addSuccessor(loopMBB);
9194   BB->addSuccessor(exitMBB);
9195 
9196   //  exitMBB:
9197   //   ...
9198   BB = exitMBB;
9199   return BB;
9200 }
9201 
9202 MachineBasicBlock *
9203 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
9204                                             MachineBasicBlock *BB,
9205                                             bool is8bit, // operation
9206                                             unsigned BinOpcode,
9207                                             unsigned CmpOpcode,
9208                                             unsigned CmpPred) const {
9209   // If we support part-word atomic mnemonics, just use them
9210   if (Subtarget.hasPartwordAtomics())
9211     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
9212                             CmpOpcode, CmpPred);
9213 
9214   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9215   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9216   // In 64 bit mode we have to use 64 bits for addresses, even though the
9217   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
9218   // registers without caring whether they're 32 or 64, but here we're
9219   // doing actual arithmetic on the addresses.
9220   bool is64bit = Subtarget.isPPC64();
9221   bool isLittleEndian = Subtarget.isLittleEndian();
9222   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
9223 
9224   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9225   MachineFunction *F = BB->getParent();
9226   MachineFunction::iterator It = ++BB->getIterator();
9227 
9228   unsigned dest = MI.getOperand(0).getReg();
9229   unsigned ptrA = MI.getOperand(1).getReg();
9230   unsigned ptrB = MI.getOperand(2).getReg();
9231   unsigned incr = MI.getOperand(3).getReg();
9232   DebugLoc dl = MI.getDebugLoc();
9233 
9234   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9235   MachineBasicBlock *loop2MBB =
9236     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
9237   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9238   F->insert(It, loopMBB);
9239   if (CmpOpcode)
9240     F->insert(It, loop2MBB);
9241   F->insert(It, exitMBB);
9242   exitMBB->splice(exitMBB->begin(), BB,
9243                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
9244   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9245 
9246   MachineRegisterInfo &RegInfo = F->getRegInfo();
9247   const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
9248                                           : &PPC::GPRCRegClass;
9249   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
9250   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
9251   unsigned ShiftReg =
9252     isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
9253   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
9254   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
9255   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
9256   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
9257   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
9258   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
9259   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
9260   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
9261   unsigned Ptr1Reg;
9262   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
9263 
9264   //  thisMBB:
9265   //   ...
9266   //   fallthrough --> loopMBB
9267   BB->addSuccessor(loopMBB);
9268 
9269   // The 4-byte load must be aligned, while a char or short may be
9270   // anywhere in the word.  Hence all this nasty bookkeeping code.
9271   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
9272   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
9273   //   xori shift, shift1, 24 [16]
9274   //   rlwinm ptr, ptr1, 0, 0, 29
9275   //   slw incr2, incr, shift
9276   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
9277   //   slw mask, mask2, shift
9278   //  loopMBB:
9279   //   lwarx tmpDest, ptr
9280   //   add tmp, tmpDest, incr2
9281   //   andc tmp2, tmpDest, mask
9282   //   and tmp3, tmp, mask
9283   //   or tmp4, tmp3, tmp2
9284   //   stwcx. tmp4, ptr
9285   //   bne- loopMBB
9286   //   fallthrough --> exitMBB
9287   //   srw dest, tmpDest, shift
9288   if (ptrA != ZeroReg) {
9289     Ptr1Reg = RegInfo.createVirtualRegister(RC);
9290     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
9291       .addReg(ptrA).addReg(ptrB);
9292   } else {
9293     Ptr1Reg = ptrB;
9294   }
9295   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
9296       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
9297   if (!isLittleEndian)
9298     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
9299         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
9300   if (is64bit)
9301     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
9302       .addReg(Ptr1Reg).addImm(0).addImm(61);
9303   else
9304     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
9305       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
9306   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
9307       .addReg(incr).addReg(ShiftReg);
9308   if (is8bit)
9309     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
9310   else {
9311     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
9312     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
9313   }
9314   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
9315       .addReg(Mask2Reg).addReg(ShiftReg);
9316 
9317   BB = loopMBB;
9318   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
9319     .addReg(ZeroReg).addReg(PtrReg);
9320   if (BinOpcode)
9321     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
9322       .addReg(Incr2Reg).addReg(TmpDestReg);
9323   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
9324     .addReg(TmpDestReg).addReg(MaskReg);
9325   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
9326     .addReg(TmpReg).addReg(MaskReg);
9327   if (CmpOpcode) {
9328     // For unsigned comparisons, we can directly compare the shifted values.
9329     // For signed comparisons we shift and sign extend.
9330     unsigned SReg = RegInfo.createVirtualRegister(RC);
9331     BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
9332       .addReg(TmpDestReg).addReg(MaskReg);
9333     unsigned ValueReg = SReg;
9334     unsigned CmpReg = Incr2Reg;
9335     if (CmpOpcode == PPC::CMPW) {
9336       ValueReg = RegInfo.createVirtualRegister(RC);
9337       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
9338         .addReg(SReg).addReg(ShiftReg);
9339       unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
9340       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
9341         .addReg(ValueReg);
9342       ValueReg = ValueSReg;
9343       CmpReg = incr;
9344     }
9345     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9346       .addReg(CmpReg).addReg(ValueReg);
9347     BuildMI(BB, dl, TII->get(PPC::BCC))
9348       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
9349     BB->addSuccessor(loop2MBB);
9350     BB->addSuccessor(exitMBB);
9351     BB = loop2MBB;
9352   }
9353   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
9354     .addReg(Tmp3Reg).addReg(Tmp2Reg);
9355   BuildMI(BB, dl, TII->get(PPC::STWCX))
9356     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
9357   BuildMI(BB, dl, TII->get(PPC::BCC))
9358     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
9359   BB->addSuccessor(loopMBB);
9360   BB->addSuccessor(exitMBB);
9361 
9362   //  exitMBB:
9363   //   ...
9364   BB = exitMBB;
9365   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
9366     .addReg(ShiftReg);
9367   return BB;
9368 }
9369 
9370 llvm::MachineBasicBlock *
9371 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
9372                                     MachineBasicBlock *MBB) const {
9373   DebugLoc DL = MI.getDebugLoc();
9374   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9375   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
9376 
9377   MachineFunction *MF = MBB->getParent();
9378   MachineRegisterInfo &MRI = MF->getRegInfo();
9379 
9380   const BasicBlock *BB = MBB->getBasicBlock();
9381   MachineFunction::iterator I = ++MBB->getIterator();
9382 
9383   // Memory Reference
9384   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
9385   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
9386 
9387   unsigned DstReg = MI.getOperand(0).getReg();
9388   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
9389   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
9390   unsigned mainDstReg = MRI.createVirtualRegister(RC);
9391   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
9392 
9393   MVT PVT = getPointerTy(MF->getDataLayout());
9394   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
9395          "Invalid Pointer Size!");
9396   // For v = setjmp(buf), we generate
9397   //
9398   // thisMBB:
9399   //  SjLjSetup mainMBB
9400   //  bl mainMBB
9401   //  v_restore = 1
9402   //  b sinkMBB
9403   //
9404   // mainMBB:
9405   //  buf[LabelOffset] = LR
9406   //  v_main = 0
9407   //
9408   // sinkMBB:
9409   //  v = phi(main, restore)
9410   //
9411 
9412   MachineBasicBlock *thisMBB = MBB;
9413   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
9414   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
9415   MF->insert(I, mainMBB);
9416   MF->insert(I, sinkMBB);
9417 
9418   MachineInstrBuilder MIB;
9419 
9420   // Transfer the remainder of BB and its successor edges to sinkMBB.
9421   sinkMBB->splice(sinkMBB->begin(), MBB,
9422                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
9423   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
9424 
9425   // Note that the structure of the jmp_buf used here is not compatible
9426   // with that used by libc, and is not designed to be. Specifically, it
9427   // stores only those 'reserved' registers that LLVM does not otherwise
9428   // understand how to spill. Also, by convention, by the time this
9429   // intrinsic is called, Clang has already stored the frame address in the
9430   // first slot of the buffer and stack address in the third. Following the
9431   // X86 target code, we'll store the jump address in the second slot. We also
9432   // need to save the TOC pointer (R2) to handle jumps between shared
9433   // libraries, and that will be stored in the fourth slot. The thread
9434   // identifier (R13) is not affected.
9435 
9436   // thisMBB:
9437   const int64_t LabelOffset = 1 * PVT.getStoreSize();
9438   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
9439   const int64_t BPOffset    = 4 * PVT.getStoreSize();
9440 
9441   // Prepare IP either in reg.
9442   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
9443   unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
9444   unsigned BufReg = MI.getOperand(1).getReg();
9445 
9446   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
9447     setUsesTOCBasePtr(*MBB->getParent());
9448     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
9449             .addReg(PPC::X2)
9450             .addImm(TOCOffset)
9451             .addReg(BufReg);
9452     MIB.setMemRefs(MMOBegin, MMOEnd);
9453   }
9454 
9455   // Naked functions never have a base pointer, and so we use r1. For all
9456   // other functions, this decision must be delayed until during PEI.
9457   unsigned BaseReg;
9458   if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
9459     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
9460   else
9461     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
9462 
9463   MIB = BuildMI(*thisMBB, MI, DL,
9464                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
9465             .addReg(BaseReg)
9466             .addImm(BPOffset)
9467             .addReg(BufReg);
9468   MIB.setMemRefs(MMOBegin, MMOEnd);
9469 
9470   // Setup
9471   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
9472   MIB.addRegMask(TRI->getNoPreservedMask());
9473 
9474   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
9475 
9476   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
9477           .addMBB(mainMBB);
9478   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
9479 
9480   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
9481   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
9482 
9483   // mainMBB:
9484   //  mainDstReg = 0
9485   MIB =
9486       BuildMI(mainMBB, DL,
9487               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
9488 
9489   // Store IP
9490   if (Subtarget.isPPC64()) {
9491     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
9492             .addReg(LabelReg)
9493             .addImm(LabelOffset)
9494             .addReg(BufReg);
9495   } else {
9496     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
9497             .addReg(LabelReg)
9498             .addImm(LabelOffset)
9499             .addReg(BufReg);
9500   }
9501 
9502   MIB.setMemRefs(MMOBegin, MMOEnd);
9503 
9504   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
9505   mainMBB->addSuccessor(sinkMBB);
9506 
9507   // sinkMBB:
9508   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
9509           TII->get(PPC::PHI), DstReg)
9510     .addReg(mainDstReg).addMBB(mainMBB)
9511     .addReg(restoreDstReg).addMBB(thisMBB);
9512 
9513   MI.eraseFromParent();
9514   return sinkMBB;
9515 }
9516 
9517 MachineBasicBlock *
9518 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
9519                                      MachineBasicBlock *MBB) const {
9520   DebugLoc DL = MI.getDebugLoc();
9521   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9522 
9523   MachineFunction *MF = MBB->getParent();
9524   MachineRegisterInfo &MRI = MF->getRegInfo();
9525 
9526   // Memory Reference
9527   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
9528   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
9529 
9530   MVT PVT = getPointerTy(MF->getDataLayout());
9531   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
9532          "Invalid Pointer Size!");
9533 
9534   const TargetRegisterClass *RC =
9535     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
9536   unsigned Tmp = MRI.createVirtualRegister(RC);
9537   // Since FP is only updated here but NOT referenced, it's treated as GPR.
9538   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
9539   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
9540   unsigned BP =
9541       (PVT == MVT::i64)
9542           ? PPC::X30
9543           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
9544                                                               : PPC::R30);
9545 
9546   MachineInstrBuilder MIB;
9547 
9548   const int64_t LabelOffset = 1 * PVT.getStoreSize();
9549   const int64_t SPOffset    = 2 * PVT.getStoreSize();
9550   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
9551   const int64_t BPOffset    = 4 * PVT.getStoreSize();
9552 
9553   unsigned BufReg = MI.getOperand(0).getReg();
9554 
9555   // Reload FP (the jumped-to function may not have had a
9556   // frame pointer, and if so, then its r31 will be restored
9557   // as necessary).
9558   if (PVT == MVT::i64) {
9559     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
9560             .addImm(0)
9561             .addReg(BufReg);
9562   } else {
9563     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
9564             .addImm(0)
9565             .addReg(BufReg);
9566   }
9567   MIB.setMemRefs(MMOBegin, MMOEnd);
9568 
9569   // Reload IP
9570   if (PVT == MVT::i64) {
9571     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
9572             .addImm(LabelOffset)
9573             .addReg(BufReg);
9574   } else {
9575     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
9576             .addImm(LabelOffset)
9577             .addReg(BufReg);
9578   }
9579   MIB.setMemRefs(MMOBegin, MMOEnd);
9580 
9581   // Reload SP
9582   if (PVT == MVT::i64) {
9583     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
9584             .addImm(SPOffset)
9585             .addReg(BufReg);
9586   } else {
9587     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
9588             .addImm(SPOffset)
9589             .addReg(BufReg);
9590   }
9591   MIB.setMemRefs(MMOBegin, MMOEnd);
9592 
9593   // Reload BP
9594   if (PVT == MVT::i64) {
9595     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
9596             .addImm(BPOffset)
9597             .addReg(BufReg);
9598   } else {
9599     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
9600             .addImm(BPOffset)
9601             .addReg(BufReg);
9602   }
9603   MIB.setMemRefs(MMOBegin, MMOEnd);
9604 
9605   // Reload TOC
9606   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
9607     setUsesTOCBasePtr(*MBB->getParent());
9608     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
9609             .addImm(TOCOffset)
9610             .addReg(BufReg);
9611 
9612     MIB.setMemRefs(MMOBegin, MMOEnd);
9613   }
9614 
9615   // Jump
9616   BuildMI(*MBB, MI, DL,
9617           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
9618   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
9619 
9620   MI.eraseFromParent();
9621   return MBB;
9622 }
9623 
9624 MachineBasicBlock *
9625 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
9626                                                MachineBasicBlock *BB) const {
9627   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
9628       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9629     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
9630         MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9631       // Call lowering should have added an r2 operand to indicate a dependence
9632       // on the TOC base pointer value. It can't however, because there is no
9633       // way to mark the dependence as implicit there, and so the stackmap code
9634       // will confuse it with a regular operand. Instead, add the dependence
9635       // here.
9636       setUsesTOCBasePtr(*BB->getParent());
9637       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
9638     }
9639 
9640     return emitPatchPoint(MI, BB);
9641   }
9642 
9643   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
9644       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
9645     return emitEHSjLjSetJmp(MI, BB);
9646   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
9647              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
9648     return emitEHSjLjLongJmp(MI, BB);
9649   }
9650 
9651   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9652 
9653   // To "insert" these instructions we actually have to insert their
9654   // control-flow patterns.
9655   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9656   MachineFunction::iterator It = ++BB->getIterator();
9657 
9658   MachineFunction *F = BB->getParent();
9659 
9660   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9661        MI.getOpcode() == PPC::SELECT_CC_I8 ||
9662        MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
9663     SmallVector<MachineOperand, 2> Cond;
9664     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9665         MI.getOpcode() == PPC::SELECT_CC_I8)
9666       Cond.push_back(MI.getOperand(4));
9667     else
9668       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
9669     Cond.push_back(MI.getOperand(1));
9670 
9671     DebugLoc dl = MI.getDebugLoc();
9672     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
9673                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
9674   } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9675              MI.getOpcode() == PPC::SELECT_CC_I8 ||
9676              MI.getOpcode() == PPC::SELECT_CC_F4 ||
9677              MI.getOpcode() == PPC::SELECT_CC_F8 ||
9678              MI.getOpcode() == PPC::SELECT_CC_QFRC ||
9679              MI.getOpcode() == PPC::SELECT_CC_QSRC ||
9680              MI.getOpcode() == PPC::SELECT_CC_QBRC ||
9681              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
9682              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
9683              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
9684              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
9685              MI.getOpcode() == PPC::SELECT_I4 ||
9686              MI.getOpcode() == PPC::SELECT_I8 ||
9687              MI.getOpcode() == PPC::SELECT_F4 ||
9688              MI.getOpcode() == PPC::SELECT_F8 ||
9689              MI.getOpcode() == PPC::SELECT_QFRC ||
9690              MI.getOpcode() == PPC::SELECT_QSRC ||
9691              MI.getOpcode() == PPC::SELECT_QBRC ||
9692              MI.getOpcode() == PPC::SELECT_VRRC ||
9693              MI.getOpcode() == PPC::SELECT_VSFRC ||
9694              MI.getOpcode() == PPC::SELECT_VSSRC ||
9695              MI.getOpcode() == PPC::SELECT_VSRC) {
9696     // The incoming instruction knows the destination vreg to set, the
9697     // condition code register to branch on, the true/false values to
9698     // select between, and a branch opcode to use.
9699 
9700     //  thisMBB:
9701     //  ...
9702     //   TrueVal = ...
9703     //   cmpTY ccX, r1, r2
9704     //   bCC copy1MBB
9705     //   fallthrough --> copy0MBB
9706     MachineBasicBlock *thisMBB = BB;
9707     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
9708     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9709     DebugLoc dl = MI.getDebugLoc();
9710     F->insert(It, copy0MBB);
9711     F->insert(It, sinkMBB);
9712 
9713     // Transfer the remainder of BB and its successor edges to sinkMBB.
9714     sinkMBB->splice(sinkMBB->begin(), BB,
9715                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9716     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9717 
9718     // Next, add the true and fallthrough blocks as its successors.
9719     BB->addSuccessor(copy0MBB);
9720     BB->addSuccessor(sinkMBB);
9721 
9722     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
9723         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
9724         MI.getOpcode() == PPC::SELECT_QFRC ||
9725         MI.getOpcode() == PPC::SELECT_QSRC ||
9726         MI.getOpcode() == PPC::SELECT_QBRC ||
9727         MI.getOpcode() == PPC::SELECT_VRRC ||
9728         MI.getOpcode() == PPC::SELECT_VSFRC ||
9729         MI.getOpcode() == PPC::SELECT_VSSRC ||
9730         MI.getOpcode() == PPC::SELECT_VSRC) {
9731       BuildMI(BB, dl, TII->get(PPC::BC))
9732           .addReg(MI.getOperand(1).getReg())
9733           .addMBB(sinkMBB);
9734     } else {
9735       unsigned SelectPred = MI.getOperand(4).getImm();
9736       BuildMI(BB, dl, TII->get(PPC::BCC))
9737           .addImm(SelectPred)
9738           .addReg(MI.getOperand(1).getReg())
9739           .addMBB(sinkMBB);
9740     }
9741 
9742     //  copy0MBB:
9743     //   %FalseValue = ...
9744     //   # fallthrough to sinkMBB
9745     BB = copy0MBB;
9746 
9747     // Update machine-CFG edges
9748     BB->addSuccessor(sinkMBB);
9749 
9750     //  sinkMBB:
9751     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9752     //  ...
9753     BB = sinkMBB;
9754     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
9755         .addReg(MI.getOperand(3).getReg())
9756         .addMBB(copy0MBB)
9757         .addReg(MI.getOperand(2).getReg())
9758         .addMBB(thisMBB);
9759   } else if (MI.getOpcode() == PPC::ReadTB) {
9760     // To read the 64-bit time-base register on a 32-bit target, we read the
9761     // two halves. Should the counter have wrapped while it was being read, we
9762     // need to try again.
9763     // ...
9764     // readLoop:
9765     // mfspr Rx,TBU # load from TBU
9766     // mfspr Ry,TB  # load from TB
9767     // mfspr Rz,TBU # load from TBU
9768     // cmpw crX,Rx,Rz # check if 'old'='new'
9769     // bne readLoop   # branch if they're not equal
9770     // ...
9771 
9772     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
9773     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9774     DebugLoc dl = MI.getDebugLoc();
9775     F->insert(It, readMBB);
9776     F->insert(It, sinkMBB);
9777 
9778     // Transfer the remainder of BB and its successor edges to sinkMBB.
9779     sinkMBB->splice(sinkMBB->begin(), BB,
9780                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9781     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9782 
9783     BB->addSuccessor(readMBB);
9784     BB = readMBB;
9785 
9786     MachineRegisterInfo &RegInfo = F->getRegInfo();
9787     unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9788     unsigned LoReg = MI.getOperand(0).getReg();
9789     unsigned HiReg = MI.getOperand(1).getReg();
9790 
9791     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
9792     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
9793     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
9794 
9795     unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9796 
9797     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
9798       .addReg(HiReg).addReg(ReadAgainReg);
9799     BuildMI(BB, dl, TII->get(PPC::BCC))
9800       .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
9801 
9802     BB->addSuccessor(readMBB);
9803     BB->addSuccessor(sinkMBB);
9804   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
9805     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
9806   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
9807     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
9808   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
9809     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
9810   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
9811     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
9812 
9813   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
9814     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
9815   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
9816     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
9817   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
9818     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
9819   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
9820     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
9821 
9822   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
9823     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
9824   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
9825     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
9826   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
9827     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
9828   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
9829     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
9830 
9831   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
9832     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
9833   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
9834     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
9835   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
9836     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
9837   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
9838     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
9839 
9840   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
9841     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
9842   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
9843     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
9844   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
9845     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
9846   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
9847     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
9848 
9849   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
9850     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
9851   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
9852     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
9853   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
9854     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
9855   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
9856     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
9857 
9858   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
9859     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
9860   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
9861     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
9862   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
9863     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
9864   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
9865     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
9866 
9867   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
9868     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
9869   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
9870     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
9871   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
9872     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
9873   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
9874     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
9875 
9876   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
9877     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
9878   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
9879     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
9880   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
9881     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
9882   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
9883     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
9884 
9885   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
9886     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
9887   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
9888     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
9889   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
9890     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
9891   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
9892     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
9893 
9894   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
9895     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
9896   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
9897     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
9898   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
9899     BB = EmitAtomicBinary(MI, BB, 4, 0);
9900   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
9901     BB = EmitAtomicBinary(MI, BB, 8, 0);
9902   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
9903            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
9904            (Subtarget.hasPartwordAtomics() &&
9905             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
9906            (Subtarget.hasPartwordAtomics() &&
9907             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
9908     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
9909 
9910     auto LoadMnemonic = PPC::LDARX;
9911     auto StoreMnemonic = PPC::STDCX;
9912     switch (MI.getOpcode()) {
9913     default:
9914       llvm_unreachable("Compare and swap of unknown size");
9915     case PPC::ATOMIC_CMP_SWAP_I8:
9916       LoadMnemonic = PPC::LBARX;
9917       StoreMnemonic = PPC::STBCX;
9918       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9919       break;
9920     case PPC::ATOMIC_CMP_SWAP_I16:
9921       LoadMnemonic = PPC::LHARX;
9922       StoreMnemonic = PPC::STHCX;
9923       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9924       break;
9925     case PPC::ATOMIC_CMP_SWAP_I32:
9926       LoadMnemonic = PPC::LWARX;
9927       StoreMnemonic = PPC::STWCX;
9928       break;
9929     case PPC::ATOMIC_CMP_SWAP_I64:
9930       LoadMnemonic = PPC::LDARX;
9931       StoreMnemonic = PPC::STDCX;
9932       break;
9933     }
9934     unsigned dest = MI.getOperand(0).getReg();
9935     unsigned ptrA = MI.getOperand(1).getReg();
9936     unsigned ptrB = MI.getOperand(2).getReg();
9937     unsigned oldval = MI.getOperand(3).getReg();
9938     unsigned newval = MI.getOperand(4).getReg();
9939     DebugLoc dl = MI.getDebugLoc();
9940 
9941     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9942     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9943     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9944     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9945     F->insert(It, loop1MBB);
9946     F->insert(It, loop2MBB);
9947     F->insert(It, midMBB);
9948     F->insert(It, exitMBB);
9949     exitMBB->splice(exitMBB->begin(), BB,
9950                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9951     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9952 
9953     //  thisMBB:
9954     //   ...
9955     //   fallthrough --> loopMBB
9956     BB->addSuccessor(loop1MBB);
9957 
9958     // loop1MBB:
9959     //   l[bhwd]arx dest, ptr
9960     //   cmp[wd] dest, oldval
9961     //   bne- midMBB
9962     // loop2MBB:
9963     //   st[bhwd]cx. newval, ptr
9964     //   bne- loopMBB
9965     //   b exitBB
9966     // midMBB:
9967     //   st[bhwd]cx. dest, ptr
9968     // exitBB:
9969     BB = loop1MBB;
9970     BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9971       .addReg(ptrA).addReg(ptrB);
9972     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
9973       .addReg(oldval).addReg(dest);
9974     BuildMI(BB, dl, TII->get(PPC::BCC))
9975       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9976     BB->addSuccessor(loop2MBB);
9977     BB->addSuccessor(midMBB);
9978 
9979     BB = loop2MBB;
9980     BuildMI(BB, dl, TII->get(StoreMnemonic))
9981       .addReg(newval).addReg(ptrA).addReg(ptrB);
9982     BuildMI(BB, dl, TII->get(PPC::BCC))
9983       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9984     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9985     BB->addSuccessor(loop1MBB);
9986     BB->addSuccessor(exitMBB);
9987 
9988     BB = midMBB;
9989     BuildMI(BB, dl, TII->get(StoreMnemonic))
9990       .addReg(dest).addReg(ptrA).addReg(ptrB);
9991     BB->addSuccessor(exitMBB);
9992 
9993     //  exitMBB:
9994     //   ...
9995     BB = exitMBB;
9996   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
9997              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
9998     // We must use 64-bit registers for addresses when targeting 64-bit,
9999     // since we're actually doing arithmetic on them.  Other registers
10000     // can be 32-bit.
10001     bool is64bit = Subtarget.isPPC64();
10002     bool isLittleEndian = Subtarget.isLittleEndian();
10003     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
10004 
10005     unsigned dest = MI.getOperand(0).getReg();
10006     unsigned ptrA = MI.getOperand(1).getReg();
10007     unsigned ptrB = MI.getOperand(2).getReg();
10008     unsigned oldval = MI.getOperand(3).getReg();
10009     unsigned newval = MI.getOperand(4).getReg();
10010     DebugLoc dl = MI.getDebugLoc();
10011 
10012     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
10013     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
10014     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
10015     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10016     F->insert(It, loop1MBB);
10017     F->insert(It, loop2MBB);
10018     F->insert(It, midMBB);
10019     F->insert(It, exitMBB);
10020     exitMBB->splice(exitMBB->begin(), BB,
10021                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
10022     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10023 
10024     MachineRegisterInfo &RegInfo = F->getRegInfo();
10025     const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
10026                                             : &PPC::GPRCRegClass;
10027     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
10028     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
10029     unsigned ShiftReg =
10030       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
10031     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
10032     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
10033     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
10034     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
10035     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
10036     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
10037     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
10038     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
10039     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
10040     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
10041     unsigned Ptr1Reg;
10042     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
10043     unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10044     //  thisMBB:
10045     //   ...
10046     //   fallthrough --> loopMBB
10047     BB->addSuccessor(loop1MBB);
10048 
10049     // The 4-byte load must be aligned, while a char or short may be
10050     // anywhere in the word.  Hence all this nasty bookkeeping code.
10051     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
10052     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10053     //   xori shift, shift1, 24 [16]
10054     //   rlwinm ptr, ptr1, 0, 0, 29
10055     //   slw newval2, newval, shift
10056     //   slw oldval2, oldval,shift
10057     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10058     //   slw mask, mask2, shift
10059     //   and newval3, newval2, mask
10060     //   and oldval3, oldval2, mask
10061     // loop1MBB:
10062     //   lwarx tmpDest, ptr
10063     //   and tmp, tmpDest, mask
10064     //   cmpw tmp, oldval3
10065     //   bne- midMBB
10066     // loop2MBB:
10067     //   andc tmp2, tmpDest, mask
10068     //   or tmp4, tmp2, newval3
10069     //   stwcx. tmp4, ptr
10070     //   bne- loop1MBB
10071     //   b exitBB
10072     // midMBB:
10073     //   stwcx. tmpDest, ptr
10074     // exitBB:
10075     //   srw dest, tmpDest, shift
10076     if (ptrA != ZeroReg) {
10077       Ptr1Reg = RegInfo.createVirtualRegister(RC);
10078       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10079         .addReg(ptrA).addReg(ptrB);
10080     } else {
10081       Ptr1Reg = ptrB;
10082     }
10083     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
10084         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
10085     if (!isLittleEndian)
10086       BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
10087           .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
10088     if (is64bit)
10089       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10090         .addReg(Ptr1Reg).addImm(0).addImm(61);
10091     else
10092       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10093         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
10094     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
10095         .addReg(newval).addReg(ShiftReg);
10096     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
10097         .addReg(oldval).addReg(ShiftReg);
10098     if (is8bit)
10099       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10100     else {
10101       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10102       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10103         .addReg(Mask3Reg).addImm(65535);
10104     }
10105     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10106         .addReg(Mask2Reg).addReg(ShiftReg);
10107     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
10108         .addReg(NewVal2Reg).addReg(MaskReg);
10109     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
10110         .addReg(OldVal2Reg).addReg(MaskReg);
10111 
10112     BB = loop1MBB;
10113     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10114         .addReg(ZeroReg).addReg(PtrReg);
10115     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
10116         .addReg(TmpDestReg).addReg(MaskReg);
10117     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
10118         .addReg(TmpReg).addReg(OldVal3Reg);
10119     BuildMI(BB, dl, TII->get(PPC::BCC))
10120         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
10121     BB->addSuccessor(loop2MBB);
10122     BB->addSuccessor(midMBB);
10123 
10124     BB = loop2MBB;
10125     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
10126         .addReg(TmpDestReg).addReg(MaskReg);
10127     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
10128         .addReg(Tmp2Reg).addReg(NewVal3Reg);
10129     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
10130         .addReg(ZeroReg).addReg(PtrReg);
10131     BuildMI(BB, dl, TII->get(PPC::BCC))
10132       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
10133     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
10134     BB->addSuccessor(loop1MBB);
10135     BB->addSuccessor(exitMBB);
10136 
10137     BB = midMBB;
10138     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
10139       .addReg(ZeroReg).addReg(PtrReg);
10140     BB->addSuccessor(exitMBB);
10141 
10142     //  exitMBB:
10143     //   ...
10144     BB = exitMBB;
10145     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
10146       .addReg(ShiftReg);
10147   } else if (MI.getOpcode() == PPC::FADDrtz) {
10148     // This pseudo performs an FADD with rounding mode temporarily forced
10149     // to round-to-zero.  We emit this via custom inserter since the FPSCR
10150     // is not modeled at the SelectionDAG level.
10151     unsigned Dest = MI.getOperand(0).getReg();
10152     unsigned Src1 = MI.getOperand(1).getReg();
10153     unsigned Src2 = MI.getOperand(2).getReg();
10154     DebugLoc dl = MI.getDebugLoc();
10155 
10156     MachineRegisterInfo &RegInfo = F->getRegInfo();
10157     unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
10158 
10159     // Save FPSCR value.
10160     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
10161 
10162     // Set rounding mode to round-to-zero.
10163     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
10164     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
10165 
10166     // Perform addition.
10167     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
10168 
10169     // Restore FPSCR value.
10170     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
10171   } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10172              MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
10173              MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10174              MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
10175     unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10176                        MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
10177                           ? PPC::ANDIo8
10178                           : PPC::ANDIo;
10179     bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10180                  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
10181 
10182     MachineRegisterInfo &RegInfo = F->getRegInfo();
10183     unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
10184                                                   &PPC::GPRCRegClass :
10185                                                   &PPC::G8RCRegClass);
10186 
10187     DebugLoc dl = MI.getDebugLoc();
10188     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
10189         .addReg(MI.getOperand(1).getReg())
10190         .addImm(1);
10191     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
10192             MI.getOperand(0).getReg())
10193         .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
10194   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
10195     DebugLoc Dl = MI.getDebugLoc();
10196     MachineRegisterInfo &RegInfo = F->getRegInfo();
10197     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10198     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
10199     return BB;
10200   } else {
10201     llvm_unreachable("Unexpected instr type to insert");
10202   }
10203 
10204   MI.eraseFromParent(); // The pseudo instruction is gone now.
10205   return BB;
10206 }
10207 
10208 //===----------------------------------------------------------------------===//
10209 // Target Optimization Hooks
10210 //===----------------------------------------------------------------------===//
10211 
10212 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
10213   // For the estimates, convergence is quadratic, so we essentially double the
10214   // number of digits correct after every iteration. For both FRE and FRSQRTE,
10215   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
10216   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
10217   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
10218   if (VT.getScalarType() == MVT::f64)
10219     RefinementSteps++;
10220   return RefinementSteps;
10221 }
10222 
10223 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
10224                                            int Enabled, int &RefinementSteps,
10225                                            bool &UseOneConstNR,
10226                                            bool Reciprocal) const {
10227   EVT VT = Operand.getValueType();
10228   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
10229       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
10230       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
10231       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
10232       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
10233       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
10234     if (RefinementSteps == ReciprocalEstimate::Unspecified)
10235       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10236 
10237     UseOneConstNR = true;
10238     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
10239   }
10240   return SDValue();
10241 }
10242 
10243 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
10244                                             int Enabled,
10245                                             int &RefinementSteps) const {
10246   EVT VT = Operand.getValueType();
10247   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
10248       (VT == MVT::f64 && Subtarget.hasFRE()) ||
10249       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
10250       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
10251       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
10252       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
10253     if (RefinementSteps == ReciprocalEstimate::Unspecified)
10254       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10255     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
10256   }
10257   return SDValue();
10258 }
10259 
10260 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
10261   // Note: This functionality is used only when unsafe-fp-math is enabled, and
10262   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
10263   // enabled for division), this functionality is redundant with the default
10264   // combiner logic (once the division -> reciprocal/multiply transformation
10265   // has taken place). As a result, this matters more for older cores than for
10266   // newer ones.
10267 
10268   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10269   // reciprocal if there are two or more FDIVs (for embedded cores with only
10270   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
10271   switch (Subtarget.getDarwinDirective()) {
10272   default:
10273     return 3;
10274   case PPC::DIR_440:
10275   case PPC::DIR_A2:
10276   case PPC::DIR_E500mc:
10277   case PPC::DIR_E5500:
10278     return 2;
10279   }
10280 }
10281 
10282 // isConsecutiveLSLoc needs to work even if all adds have not yet been
10283 // collapsed, and so we need to look through chains of them.
10284 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
10285                                      int64_t& Offset, SelectionDAG &DAG) {
10286   if (DAG.isBaseWithConstantOffset(Loc)) {
10287     Base = Loc.getOperand(0);
10288     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
10289 
10290     // The base might itself be a base plus an offset, and if so, accumulate
10291     // that as well.
10292     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
10293   }
10294 }
10295 
10296 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
10297                             unsigned Bytes, int Dist,
10298                             SelectionDAG &DAG) {
10299   if (VT.getSizeInBits() / 8 != Bytes)
10300     return false;
10301 
10302   SDValue BaseLoc = Base->getBasePtr();
10303   if (Loc.getOpcode() == ISD::FrameIndex) {
10304     if (BaseLoc.getOpcode() != ISD::FrameIndex)
10305       return false;
10306     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10307     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
10308     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
10309     int FS  = MFI.getObjectSize(FI);
10310     int BFS = MFI.getObjectSize(BFI);
10311     if (FS != BFS || FS != (int)Bytes) return false;
10312     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
10313   }
10314 
10315   SDValue Base1 = Loc, Base2 = BaseLoc;
10316   int64_t Offset1 = 0, Offset2 = 0;
10317   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
10318   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
10319   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
10320     return true;
10321 
10322   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10323   const GlobalValue *GV1 = nullptr;
10324   const GlobalValue *GV2 = nullptr;
10325   Offset1 = 0;
10326   Offset2 = 0;
10327   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
10328   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
10329   if (isGA1 && isGA2 && GV1 == GV2)
10330     return Offset1 == (Offset2 + Dist*Bytes);
10331   return false;
10332 }
10333 
10334 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
10335 // not enforce equality of the chain operands.
10336 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
10337                             unsigned Bytes, int Dist,
10338                             SelectionDAG &DAG) {
10339   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
10340     EVT VT = LS->getMemoryVT();
10341     SDValue Loc = LS->getBasePtr();
10342     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
10343   }
10344 
10345   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
10346     EVT VT;
10347     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10348     default: return false;
10349     case Intrinsic::ppc_qpx_qvlfd:
10350     case Intrinsic::ppc_qpx_qvlfda:
10351       VT = MVT::v4f64;
10352       break;
10353     case Intrinsic::ppc_qpx_qvlfs:
10354     case Intrinsic::ppc_qpx_qvlfsa:
10355       VT = MVT::v4f32;
10356       break;
10357     case Intrinsic::ppc_qpx_qvlfcd:
10358     case Intrinsic::ppc_qpx_qvlfcda:
10359       VT = MVT::v2f64;
10360       break;
10361     case Intrinsic::ppc_qpx_qvlfcs:
10362     case Intrinsic::ppc_qpx_qvlfcsa:
10363       VT = MVT::v2f32;
10364       break;
10365     case Intrinsic::ppc_qpx_qvlfiwa:
10366     case Intrinsic::ppc_qpx_qvlfiwz:
10367     case Intrinsic::ppc_altivec_lvx:
10368     case Intrinsic::ppc_altivec_lvxl:
10369     case Intrinsic::ppc_vsx_lxvw4x:
10370     case Intrinsic::ppc_vsx_lxvw4x_be:
10371       VT = MVT::v4i32;
10372       break;
10373     case Intrinsic::ppc_vsx_lxvd2x:
10374     case Intrinsic::ppc_vsx_lxvd2x_be:
10375       VT = MVT::v2f64;
10376       break;
10377     case Intrinsic::ppc_altivec_lvebx:
10378       VT = MVT::i8;
10379       break;
10380     case Intrinsic::ppc_altivec_lvehx:
10381       VT = MVT::i16;
10382       break;
10383     case Intrinsic::ppc_altivec_lvewx:
10384       VT = MVT::i32;
10385       break;
10386     }
10387 
10388     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
10389   }
10390 
10391   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
10392     EVT VT;
10393     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10394     default: return false;
10395     case Intrinsic::ppc_qpx_qvstfd:
10396     case Intrinsic::ppc_qpx_qvstfda:
10397       VT = MVT::v4f64;
10398       break;
10399     case Intrinsic::ppc_qpx_qvstfs:
10400     case Intrinsic::ppc_qpx_qvstfsa:
10401       VT = MVT::v4f32;
10402       break;
10403     case Intrinsic::ppc_qpx_qvstfcd:
10404     case Intrinsic::ppc_qpx_qvstfcda:
10405       VT = MVT::v2f64;
10406       break;
10407     case Intrinsic::ppc_qpx_qvstfcs:
10408     case Intrinsic::ppc_qpx_qvstfcsa:
10409       VT = MVT::v2f32;
10410       break;
10411     case Intrinsic::ppc_qpx_qvstfiw:
10412     case Intrinsic::ppc_qpx_qvstfiwa:
10413     case Intrinsic::ppc_altivec_stvx:
10414     case Intrinsic::ppc_altivec_stvxl:
10415     case Intrinsic::ppc_vsx_stxvw4x:
10416       VT = MVT::v4i32;
10417       break;
10418     case Intrinsic::ppc_vsx_stxvd2x:
10419       VT = MVT::v2f64;
10420       break;
10421     case Intrinsic::ppc_vsx_stxvw4x_be:
10422       VT = MVT::v4i32;
10423       break;
10424     case Intrinsic::ppc_vsx_stxvd2x_be:
10425       VT = MVT::v2f64;
10426       break;
10427     case Intrinsic::ppc_altivec_stvebx:
10428       VT = MVT::i8;
10429       break;
10430     case Intrinsic::ppc_altivec_stvehx:
10431       VT = MVT::i16;
10432       break;
10433     case Intrinsic::ppc_altivec_stvewx:
10434       VT = MVT::i32;
10435       break;
10436     }
10437 
10438     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
10439   }
10440 
10441   return false;
10442 }
10443 
10444 // Return true is there is a nearyby consecutive load to the one provided
10445 // (regardless of alignment). We search up and down the chain, looking though
10446 // token factors and other loads (but nothing else). As a result, a true result
10447 // indicates that it is safe to create a new consecutive load adjacent to the
10448 // load provided.
10449 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
10450   SDValue Chain = LD->getChain();
10451   EVT VT = LD->getMemoryVT();
10452 
10453   SmallSet<SDNode *, 16> LoadRoots;
10454   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
10455   SmallSet<SDNode *, 16> Visited;
10456 
10457   // First, search up the chain, branching to follow all token-factor operands.
10458   // If we find a consecutive load, then we're done, otherwise, record all
10459   // nodes just above the top-level loads and token factors.
10460   while (!Queue.empty()) {
10461     SDNode *ChainNext = Queue.pop_back_val();
10462     if (!Visited.insert(ChainNext).second)
10463       continue;
10464 
10465     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
10466       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
10467         return true;
10468 
10469       if (!Visited.count(ChainLD->getChain().getNode()))
10470         Queue.push_back(ChainLD->getChain().getNode());
10471     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
10472       for (const SDUse &O : ChainNext->ops())
10473         if (!Visited.count(O.getNode()))
10474           Queue.push_back(O.getNode());
10475     } else
10476       LoadRoots.insert(ChainNext);
10477   }
10478 
10479   // Second, search down the chain, starting from the top-level nodes recorded
10480   // in the first phase. These top-level nodes are the nodes just above all
10481   // loads and token factors. Starting with their uses, recursively look though
10482   // all loads (just the chain uses) and token factors to find a consecutive
10483   // load.
10484   Visited.clear();
10485   Queue.clear();
10486 
10487   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
10488        IE = LoadRoots.end(); I != IE; ++I) {
10489     Queue.push_back(*I);
10490 
10491     while (!Queue.empty()) {
10492       SDNode *LoadRoot = Queue.pop_back_val();
10493       if (!Visited.insert(LoadRoot).second)
10494         continue;
10495 
10496       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
10497         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
10498           return true;
10499 
10500       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
10501            UE = LoadRoot->use_end(); UI != UE; ++UI)
10502         if (((isa<MemSDNode>(*UI) &&
10503             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
10504             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
10505           Queue.push_back(*UI);
10506     }
10507   }
10508 
10509   return false;
10510 }
10511 
10512 /// This function is called when we have proved that a SETCC node can be replaced
10513 /// by subtraction (and other supporting instructions) so that the result of
10514 /// comparison is kept in a GPR instead of CR. This function is purely for
10515 /// codegen purposes and has some flags to guide the codegen process.
10516 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
10517                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
10518   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10519 
10520   // Zero extend the operands to the largest legal integer. Originally, they
10521   // must be of a strictly smaller size.
10522   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
10523                          DAG.getConstant(Size, DL, MVT::i32));
10524   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
10525                          DAG.getConstant(Size, DL, MVT::i32));
10526 
10527   // Swap if needed. Depends on the condition code.
10528   if (Swap)
10529     std::swap(Op0, Op1);
10530 
10531   // Subtract extended integers.
10532   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
10533 
10534   // Move the sign bit to the least significant position and zero out the rest.
10535   // Now the least significant bit carries the result of original comparison.
10536   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
10537                              DAG.getConstant(Size - 1, DL, MVT::i32));
10538   auto Final = Shifted;
10539 
10540   // Complement the result if needed. Based on the condition code.
10541   if (Complement)
10542     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
10543                         DAG.getConstant(1, DL, MVT::i64));
10544 
10545   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
10546 }
10547 
10548 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
10549                                                   DAGCombinerInfo &DCI) const {
10550   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10551 
10552   SelectionDAG &DAG = DCI.DAG;
10553   SDLoc DL(N);
10554 
10555   // Size of integers being compared has a critical role in the following
10556   // analysis, so we prefer to do this when all types are legal.
10557   if (!DCI.isAfterLegalizeVectorOps())
10558     return SDValue();
10559 
10560   // If all users of SETCC extend its value to a legal integer type
10561   // then we replace SETCC with a subtraction
10562   for (SDNode::use_iterator UI = N->use_begin(),
10563        UE = N->use_end(); UI != UE; ++UI) {
10564     if (UI->getOpcode() != ISD::ZERO_EXTEND)
10565       return SDValue();
10566   }
10567 
10568   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
10569   auto OpSize = N->getOperand(0).getValueSizeInBits();
10570 
10571   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
10572 
10573   if (OpSize < Size) {
10574     switch (CC) {
10575     default: break;
10576     case ISD::SETULT:
10577       return generateEquivalentSub(N, Size, false, false, DL, DAG);
10578     case ISD::SETULE:
10579       return generateEquivalentSub(N, Size, true, true, DL, DAG);
10580     case ISD::SETUGT:
10581       return generateEquivalentSub(N, Size, false, true, DL, DAG);
10582     case ISD::SETUGE:
10583       return generateEquivalentSub(N, Size, true, false, DL, DAG);
10584     }
10585   }
10586 
10587   return SDValue();
10588 }
10589 
10590 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
10591                                                   DAGCombinerInfo &DCI) const {
10592   SelectionDAG &DAG = DCI.DAG;
10593   SDLoc dl(N);
10594 
10595   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
10596   // If we're tracking CR bits, we need to be careful that we don't have:
10597   //   trunc(binary-ops(zext(x), zext(y)))
10598   // or
10599   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
10600   // such that we're unnecessarily moving things into GPRs when it would be
10601   // better to keep them in CR bits.
10602 
10603   // Note that trunc here can be an actual i1 trunc, or can be the effective
10604   // truncation that comes from a setcc or select_cc.
10605   if (N->getOpcode() == ISD::TRUNCATE &&
10606       N->getValueType(0) != MVT::i1)
10607     return SDValue();
10608 
10609   if (N->getOperand(0).getValueType() != MVT::i32 &&
10610       N->getOperand(0).getValueType() != MVT::i64)
10611     return SDValue();
10612 
10613   if (N->getOpcode() == ISD::SETCC ||
10614       N->getOpcode() == ISD::SELECT_CC) {
10615     // If we're looking at a comparison, then we need to make sure that the
10616     // high bits (all except for the first) don't matter the result.
10617     ISD::CondCode CC =
10618       cast<CondCodeSDNode>(N->getOperand(
10619         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
10620     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
10621 
10622     if (ISD::isSignedIntSetCC(CC)) {
10623       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
10624           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
10625         return SDValue();
10626     } else if (ISD::isUnsignedIntSetCC(CC)) {
10627       if (!DAG.MaskedValueIsZero(N->getOperand(0),
10628                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
10629           !DAG.MaskedValueIsZero(N->getOperand(1),
10630                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
10631         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
10632                                              : SDValue());
10633     } else {
10634       // This is neither a signed nor an unsigned comparison, just make sure
10635       // that the high bits are equal.
10636       KnownBits Op1Known, Op2Known;
10637       DAG.computeKnownBits(N->getOperand(0), Op1Known);
10638       DAG.computeKnownBits(N->getOperand(1), Op2Known);
10639 
10640       // We don't really care about what is known about the first bit (if
10641       // anything), so clear it in all masks prior to comparing them.
10642       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
10643       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
10644 
10645       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
10646         return SDValue();
10647     }
10648   }
10649 
10650   // We now know that the higher-order bits are irrelevant, we just need to
10651   // make sure that all of the intermediate operations are bit operations, and
10652   // all inputs are extensions.
10653   if (N->getOperand(0).getOpcode() != ISD::AND &&
10654       N->getOperand(0).getOpcode() != ISD::OR  &&
10655       N->getOperand(0).getOpcode() != ISD::XOR &&
10656       N->getOperand(0).getOpcode() != ISD::SELECT &&
10657       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
10658       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
10659       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
10660       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
10661       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
10662     return SDValue();
10663 
10664   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
10665       N->getOperand(1).getOpcode() != ISD::AND &&
10666       N->getOperand(1).getOpcode() != ISD::OR  &&
10667       N->getOperand(1).getOpcode() != ISD::XOR &&
10668       N->getOperand(1).getOpcode() != ISD::SELECT &&
10669       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
10670       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
10671       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
10672       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
10673       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
10674     return SDValue();
10675 
10676   SmallVector<SDValue, 4> Inputs;
10677   SmallVector<SDValue, 8> BinOps, PromOps;
10678   SmallPtrSet<SDNode *, 16> Visited;
10679 
10680   for (unsigned i = 0; i < 2; ++i) {
10681     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10682           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10683           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10684           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10685         isa<ConstantSDNode>(N->getOperand(i)))
10686       Inputs.push_back(N->getOperand(i));
10687     else
10688       BinOps.push_back(N->getOperand(i));
10689 
10690     if (N->getOpcode() == ISD::TRUNCATE)
10691       break;
10692   }
10693 
10694   // Visit all inputs, collect all binary operations (and, or, xor and
10695   // select) that are all fed by extensions.
10696   while (!BinOps.empty()) {
10697     SDValue BinOp = BinOps.back();
10698     BinOps.pop_back();
10699 
10700     if (!Visited.insert(BinOp.getNode()).second)
10701       continue;
10702 
10703     PromOps.push_back(BinOp);
10704 
10705     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10706       // The condition of the select is not promoted.
10707       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10708         continue;
10709       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10710         continue;
10711 
10712       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10713             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10714             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10715            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10716           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10717         Inputs.push_back(BinOp.getOperand(i));
10718       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10719                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10720                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10721                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10722                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
10723                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10724                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10725                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10726                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
10727         BinOps.push_back(BinOp.getOperand(i));
10728       } else {
10729         // We have an input that is not an extension or another binary
10730         // operation; we'll abort this transformation.
10731         return SDValue();
10732       }
10733     }
10734   }
10735 
10736   // Make sure that this is a self-contained cluster of operations (which
10737   // is not quite the same thing as saying that everything has only one
10738   // use).
10739   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10740     if (isa<ConstantSDNode>(Inputs[i]))
10741       continue;
10742 
10743     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10744                               UE = Inputs[i].getNode()->use_end();
10745          UI != UE; ++UI) {
10746       SDNode *User = *UI;
10747       if (User != N && !Visited.count(User))
10748         return SDValue();
10749 
10750       // Make sure that we're not going to promote the non-output-value
10751       // operand(s) or SELECT or SELECT_CC.
10752       // FIXME: Although we could sometimes handle this, and it does occur in
10753       // practice that one of the condition inputs to the select is also one of
10754       // the outputs, we currently can't deal with this.
10755       if (User->getOpcode() == ISD::SELECT) {
10756         if (User->getOperand(0) == Inputs[i])
10757           return SDValue();
10758       } else if (User->getOpcode() == ISD::SELECT_CC) {
10759         if (User->getOperand(0) == Inputs[i] ||
10760             User->getOperand(1) == Inputs[i])
10761           return SDValue();
10762       }
10763     }
10764   }
10765 
10766   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10767     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10768                               UE = PromOps[i].getNode()->use_end();
10769          UI != UE; ++UI) {
10770       SDNode *User = *UI;
10771       if (User != N && !Visited.count(User))
10772         return SDValue();
10773 
10774       // Make sure that we're not going to promote the non-output-value
10775       // operand(s) or SELECT or SELECT_CC.
10776       // FIXME: Although we could sometimes handle this, and it does occur in
10777       // practice that one of the condition inputs to the select is also one of
10778       // the outputs, we currently can't deal with this.
10779       if (User->getOpcode() == ISD::SELECT) {
10780         if (User->getOperand(0) == PromOps[i])
10781           return SDValue();
10782       } else if (User->getOpcode() == ISD::SELECT_CC) {
10783         if (User->getOperand(0) == PromOps[i] ||
10784             User->getOperand(1) == PromOps[i])
10785           return SDValue();
10786       }
10787     }
10788   }
10789 
10790   // Replace all inputs with the extension operand.
10791   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10792     // Constants may have users outside the cluster of to-be-promoted nodes,
10793     // and so we need to replace those as we do the promotions.
10794     if (isa<ConstantSDNode>(Inputs[i]))
10795       continue;
10796     else
10797       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
10798   }
10799 
10800   std::list<HandleSDNode> PromOpHandles;
10801   for (auto &PromOp : PromOps)
10802     PromOpHandles.emplace_back(PromOp);
10803 
10804   // Replace all operations (these are all the same, but have a different
10805   // (i1) return type). DAG.getNode will validate that the types of
10806   // a binary operator match, so go through the list in reverse so that
10807   // we've likely promoted both operands first. Any intermediate truncations or
10808   // extensions disappear.
10809   while (!PromOpHandles.empty()) {
10810     SDValue PromOp = PromOpHandles.back().getValue();
10811     PromOpHandles.pop_back();
10812 
10813     if (PromOp.getOpcode() == ISD::TRUNCATE ||
10814         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
10815         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
10816         PromOp.getOpcode() == ISD::ANY_EXTEND) {
10817       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
10818           PromOp.getOperand(0).getValueType() != MVT::i1) {
10819         // The operand is not yet ready (see comment below).
10820         PromOpHandles.emplace_front(PromOp);
10821         continue;
10822       }
10823 
10824       SDValue RepValue = PromOp.getOperand(0);
10825       if (isa<ConstantSDNode>(RepValue))
10826         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
10827 
10828       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
10829       continue;
10830     }
10831 
10832     unsigned C;
10833     switch (PromOp.getOpcode()) {
10834     default:             C = 0; break;
10835     case ISD::SELECT:    C = 1; break;
10836     case ISD::SELECT_CC: C = 2; break;
10837     }
10838 
10839     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10840          PromOp.getOperand(C).getValueType() != MVT::i1) ||
10841         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10842          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
10843       // The to-be-promoted operands of this node have not yet been
10844       // promoted (this should be rare because we're going through the
10845       // list backward, but if one of the operands has several users in
10846       // this cluster of to-be-promoted nodes, it is possible).
10847       PromOpHandles.emplace_front(PromOp);
10848       continue;
10849     }
10850 
10851     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10852                                 PromOp.getNode()->op_end());
10853 
10854     // If there are any constant inputs, make sure they're replaced now.
10855     for (unsigned i = 0; i < 2; ++i)
10856       if (isa<ConstantSDNode>(Ops[C+i]))
10857         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
10858 
10859     DAG.ReplaceAllUsesOfValueWith(PromOp,
10860       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
10861   }
10862 
10863   // Now we're left with the initial truncation itself.
10864   if (N->getOpcode() == ISD::TRUNCATE)
10865     return N->getOperand(0);
10866 
10867   // Otherwise, this is a comparison. The operands to be compared have just
10868   // changed type (to i1), but everything else is the same.
10869   return SDValue(N, 0);
10870 }
10871 
10872 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
10873                                                   DAGCombinerInfo &DCI) const {
10874   SelectionDAG &DAG = DCI.DAG;
10875   SDLoc dl(N);
10876 
10877   // If we're tracking CR bits, we need to be careful that we don't have:
10878   //   zext(binary-ops(trunc(x), trunc(y)))
10879   // or
10880   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
10881   // such that we're unnecessarily moving things into CR bits that can more
10882   // efficiently stay in GPRs. Note that if we're not certain that the high
10883   // bits are set as required by the final extension, we still may need to do
10884   // some masking to get the proper behavior.
10885 
10886   // This same functionality is important on PPC64 when dealing with
10887   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
10888   // the return values of functions. Because it is so similar, it is handled
10889   // here as well.
10890 
10891   if (N->getValueType(0) != MVT::i32 &&
10892       N->getValueType(0) != MVT::i64)
10893     return SDValue();
10894 
10895   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
10896         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
10897     return SDValue();
10898 
10899   if (N->getOperand(0).getOpcode() != ISD::AND &&
10900       N->getOperand(0).getOpcode() != ISD::OR  &&
10901       N->getOperand(0).getOpcode() != ISD::XOR &&
10902       N->getOperand(0).getOpcode() != ISD::SELECT &&
10903       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
10904     return SDValue();
10905 
10906   SmallVector<SDValue, 4> Inputs;
10907   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
10908   SmallPtrSet<SDNode *, 16> Visited;
10909 
10910   // Visit all inputs, collect all binary operations (and, or, xor and
10911   // select) that are all fed by truncations.
10912   while (!BinOps.empty()) {
10913     SDValue BinOp = BinOps.back();
10914     BinOps.pop_back();
10915 
10916     if (!Visited.insert(BinOp.getNode()).second)
10917       continue;
10918 
10919     PromOps.push_back(BinOp);
10920 
10921     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10922       // The condition of the select is not promoted.
10923       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10924         continue;
10925       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10926         continue;
10927 
10928       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10929           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10930         Inputs.push_back(BinOp.getOperand(i));
10931       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10932                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10933                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10934                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10935                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
10936         BinOps.push_back(BinOp.getOperand(i));
10937       } else {
10938         // We have an input that is not a truncation or another binary
10939         // operation; we'll abort this transformation.
10940         return SDValue();
10941       }
10942     }
10943   }
10944 
10945   // The operands of a select that must be truncated when the select is
10946   // promoted because the operand is actually part of the to-be-promoted set.
10947   DenseMap<SDNode *, EVT> SelectTruncOp[2];
10948 
10949   // Make sure that this is a self-contained cluster of operations (which
10950   // is not quite the same thing as saying that everything has only one
10951   // use).
10952   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10953     if (isa<ConstantSDNode>(Inputs[i]))
10954       continue;
10955 
10956     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10957                               UE = Inputs[i].getNode()->use_end();
10958          UI != UE; ++UI) {
10959       SDNode *User = *UI;
10960       if (User != N && !Visited.count(User))
10961         return SDValue();
10962 
10963       // If we're going to promote the non-output-value operand(s) or SELECT or
10964       // SELECT_CC, record them for truncation.
10965       if (User->getOpcode() == ISD::SELECT) {
10966         if (User->getOperand(0) == Inputs[i])
10967           SelectTruncOp[0].insert(std::make_pair(User,
10968                                     User->getOperand(0).getValueType()));
10969       } else if (User->getOpcode() == ISD::SELECT_CC) {
10970         if (User->getOperand(0) == Inputs[i])
10971           SelectTruncOp[0].insert(std::make_pair(User,
10972                                     User->getOperand(0).getValueType()));
10973         if (User->getOperand(1) == Inputs[i])
10974           SelectTruncOp[1].insert(std::make_pair(User,
10975                                     User->getOperand(1).getValueType()));
10976       }
10977     }
10978   }
10979 
10980   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10981     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10982                               UE = PromOps[i].getNode()->use_end();
10983          UI != UE; ++UI) {
10984       SDNode *User = *UI;
10985       if (User != N && !Visited.count(User))
10986         return SDValue();
10987 
10988       // If we're going to promote the non-output-value operand(s) or SELECT or
10989       // SELECT_CC, record them for truncation.
10990       if (User->getOpcode() == ISD::SELECT) {
10991         if (User->getOperand(0) == PromOps[i])
10992           SelectTruncOp[0].insert(std::make_pair(User,
10993                                     User->getOperand(0).getValueType()));
10994       } else if (User->getOpcode() == ISD::SELECT_CC) {
10995         if (User->getOperand(0) == PromOps[i])
10996           SelectTruncOp[0].insert(std::make_pair(User,
10997                                     User->getOperand(0).getValueType()));
10998         if (User->getOperand(1) == PromOps[i])
10999           SelectTruncOp[1].insert(std::make_pair(User,
11000                                     User->getOperand(1).getValueType()));
11001       }
11002     }
11003   }
11004 
11005   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
11006   bool ReallyNeedsExt = false;
11007   if (N->getOpcode() != ISD::ANY_EXTEND) {
11008     // If all of the inputs are not already sign/zero extended, then
11009     // we'll still need to do that at the end.
11010     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11011       if (isa<ConstantSDNode>(Inputs[i]))
11012         continue;
11013 
11014       unsigned OpBits =
11015         Inputs[i].getOperand(0).getValueSizeInBits();
11016       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
11017 
11018       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
11019            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
11020                                   APInt::getHighBitsSet(OpBits,
11021                                                         OpBits-PromBits))) ||
11022           (N->getOpcode() == ISD::SIGN_EXTEND &&
11023            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
11024              (OpBits-(PromBits-1)))) {
11025         ReallyNeedsExt = true;
11026         break;
11027       }
11028     }
11029   }
11030 
11031   // Replace all inputs, either with the truncation operand, or a
11032   // truncation or extension to the final output type.
11033   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11034     // Constant inputs need to be replaced with the to-be-promoted nodes that
11035     // use them because they might have users outside of the cluster of
11036     // promoted nodes.
11037     if (isa<ConstantSDNode>(Inputs[i]))
11038       continue;
11039 
11040     SDValue InSrc = Inputs[i].getOperand(0);
11041     if (Inputs[i].getValueType() == N->getValueType(0))
11042       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
11043     else if (N->getOpcode() == ISD::SIGN_EXTEND)
11044       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11045         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
11046     else if (N->getOpcode() == ISD::ZERO_EXTEND)
11047       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11048         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
11049     else
11050       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11051         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
11052   }
11053 
11054   std::list<HandleSDNode> PromOpHandles;
11055   for (auto &PromOp : PromOps)
11056     PromOpHandles.emplace_back(PromOp);
11057 
11058   // Replace all operations (these are all the same, but have a different
11059   // (promoted) return type). DAG.getNode will validate that the types of
11060   // a binary operator match, so go through the list in reverse so that
11061   // we've likely promoted both operands first.
11062   while (!PromOpHandles.empty()) {
11063     SDValue PromOp = PromOpHandles.back().getValue();
11064     PromOpHandles.pop_back();
11065 
11066     unsigned C;
11067     switch (PromOp.getOpcode()) {
11068     default:             C = 0; break;
11069     case ISD::SELECT:    C = 1; break;
11070     case ISD::SELECT_CC: C = 2; break;
11071     }
11072 
11073     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
11074          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
11075         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
11076          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
11077       // The to-be-promoted operands of this node have not yet been
11078       // promoted (this should be rare because we're going through the
11079       // list backward, but if one of the operands has several users in
11080       // this cluster of to-be-promoted nodes, it is possible).
11081       PromOpHandles.emplace_front(PromOp);
11082       continue;
11083     }
11084 
11085     // For SELECT and SELECT_CC nodes, we do a similar check for any
11086     // to-be-promoted comparison inputs.
11087     if (PromOp.getOpcode() == ISD::SELECT ||
11088         PromOp.getOpcode() == ISD::SELECT_CC) {
11089       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
11090            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
11091           (SelectTruncOp[1].count(PromOp.getNode()) &&
11092            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
11093         PromOpHandles.emplace_front(PromOp);
11094         continue;
11095       }
11096     }
11097 
11098     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
11099                                 PromOp.getNode()->op_end());
11100 
11101     // If this node has constant inputs, then they'll need to be promoted here.
11102     for (unsigned i = 0; i < 2; ++i) {
11103       if (!isa<ConstantSDNode>(Ops[C+i]))
11104         continue;
11105       if (Ops[C+i].getValueType() == N->getValueType(0))
11106         continue;
11107 
11108       if (N->getOpcode() == ISD::SIGN_EXTEND)
11109         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11110       else if (N->getOpcode() == ISD::ZERO_EXTEND)
11111         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11112       else
11113         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11114     }
11115 
11116     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
11117     // truncate them again to the original value type.
11118     if (PromOp.getOpcode() == ISD::SELECT ||
11119         PromOp.getOpcode() == ISD::SELECT_CC) {
11120       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
11121       if (SI0 != SelectTruncOp[0].end())
11122         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
11123       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
11124       if (SI1 != SelectTruncOp[1].end())
11125         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
11126     }
11127 
11128     DAG.ReplaceAllUsesOfValueWith(PromOp,
11129       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
11130   }
11131 
11132   // Now we're left with the initial extension itself.
11133   if (!ReallyNeedsExt)
11134     return N->getOperand(0);
11135 
11136   // To zero extend, just mask off everything except for the first bit (in the
11137   // i1 case).
11138   if (N->getOpcode() == ISD::ZERO_EXTEND)
11139     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
11140                        DAG.getConstant(APInt::getLowBitsSet(
11141                                          N->getValueSizeInBits(0), PromBits),
11142                                        dl, N->getValueType(0)));
11143 
11144   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
11145          "Invalid extension type");
11146   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
11147   SDValue ShiftCst =
11148       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
11149   return DAG.getNode(
11150       ISD::SRA, dl, N->getValueType(0),
11151       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
11152       ShiftCst);
11153 }
11154 
11155 /// \brief Reduces the number of fp-to-int conversion when building a vector.
11156 ///
11157 /// If this vector is built out of floating to integer conversions,
11158 /// transform it to a vector built out of floating point values followed by a
11159 /// single floating to integer conversion of the vector.
11160 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
11161 /// becomes (fptosi (build_vector ($A, $B, ...)))
11162 SDValue PPCTargetLowering::
11163 combineElementTruncationToVectorTruncation(SDNode *N,
11164                                            DAGCombinerInfo &DCI) const {
11165   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11166          "Should be called with a BUILD_VECTOR node");
11167 
11168   SelectionDAG &DAG = DCI.DAG;
11169   SDLoc dl(N);
11170 
11171   SDValue FirstInput = N->getOperand(0);
11172   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
11173          "The input operand must be an fp-to-int conversion.");
11174 
11175   // This combine happens after legalization so the fp_to_[su]i nodes are
11176   // already converted to PPCSISD nodes.
11177   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
11178   if (FirstConversion == PPCISD::FCTIDZ ||
11179       FirstConversion == PPCISD::FCTIDUZ ||
11180       FirstConversion == PPCISD::FCTIWZ ||
11181       FirstConversion == PPCISD::FCTIWUZ) {
11182     bool IsSplat = true;
11183     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
11184       FirstConversion == PPCISD::FCTIWUZ;
11185     EVT SrcVT = FirstInput.getOperand(0).getValueType();
11186     SmallVector<SDValue, 4> Ops;
11187     EVT TargetVT = N->getValueType(0);
11188     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
11189       if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
11190         return SDValue();
11191       unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
11192       if (NextConversion != FirstConversion)
11193         return SDValue();
11194       if (N->getOperand(i) != FirstInput)
11195         IsSplat = false;
11196     }
11197 
11198     // If this is a splat, we leave it as-is since there will be only a single
11199     // fp-to-int conversion followed by a splat of the integer. This is better
11200     // for 32-bit and smaller ints and neutral for 64-bit ints.
11201     if (IsSplat)
11202       return SDValue();
11203 
11204     // Now that we know we have the right type of node, get its operands
11205     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
11206       SDValue In = N->getOperand(i).getOperand(0);
11207       // For 32-bit values, we need to add an FP_ROUND node.
11208       if (Is32Bit) {
11209         if (In.isUndef())
11210           Ops.push_back(DAG.getUNDEF(SrcVT));
11211         else {
11212           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
11213                                       MVT::f32, In.getOperand(0),
11214                                       DAG.getIntPtrConstant(1, dl));
11215           Ops.push_back(Trunc);
11216         }
11217       } else
11218         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
11219     }
11220 
11221     unsigned Opcode;
11222     if (FirstConversion == PPCISD::FCTIDZ ||
11223         FirstConversion == PPCISD::FCTIWZ)
11224       Opcode = ISD::FP_TO_SINT;
11225     else
11226       Opcode = ISD::FP_TO_UINT;
11227 
11228     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
11229     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
11230     return DAG.getNode(Opcode, dl, TargetVT, BV);
11231   }
11232   return SDValue();
11233 }
11234 
11235 /// \brief Reduce the number of loads when building a vector.
11236 ///
11237 /// Building a vector out of multiple loads can be converted to a load
11238 /// of the vector type if the loads are consecutive. If the loads are
11239 /// consecutive but in descending order, a shuffle is added at the end
11240 /// to reorder the vector.
11241 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
11242   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11243          "Should be called with a BUILD_VECTOR node");
11244 
11245   SDLoc dl(N);
11246   bool InputsAreConsecutiveLoads = true;
11247   bool InputsAreReverseConsecutive = true;
11248   unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
11249   SDValue FirstInput = N->getOperand(0);
11250   bool IsRoundOfExtLoad = false;
11251 
11252   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
11253       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
11254     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
11255     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
11256   }
11257   // Not a build vector of (possibly fp_rounded) loads.
11258   if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD)
11259     return SDValue();
11260 
11261   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
11262     // If any inputs are fp_round(extload), they all must be.
11263     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
11264       return SDValue();
11265 
11266     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
11267       N->getOperand(i);
11268     if (NextInput.getOpcode() != ISD::LOAD)
11269       return SDValue();
11270 
11271     SDValue PreviousInput =
11272       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
11273     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
11274     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
11275 
11276     // If any inputs are fp_round(extload), they all must be.
11277     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
11278       return SDValue();
11279 
11280     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
11281       InputsAreConsecutiveLoads = false;
11282     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
11283       InputsAreReverseConsecutive = false;
11284 
11285     // Exit early if the loads are neither consecutive nor reverse consecutive.
11286     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
11287       return SDValue();
11288   }
11289 
11290   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
11291          "The loads cannot be both consecutive and reverse consecutive.");
11292 
11293   SDValue FirstLoadOp =
11294     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
11295   SDValue LastLoadOp =
11296     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
11297                        N->getOperand(N->getNumOperands()-1);
11298 
11299   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
11300   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
11301   if (InputsAreConsecutiveLoads) {
11302     assert(LD1 && "Input needs to be a LoadSDNode.");
11303     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
11304                        LD1->getBasePtr(), LD1->getPointerInfo(),
11305                        LD1->getAlignment());
11306   }
11307   if (InputsAreReverseConsecutive) {
11308     assert(LDL && "Input needs to be a LoadSDNode.");
11309     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
11310                                LDL->getBasePtr(), LDL->getPointerInfo(),
11311                                LDL->getAlignment());
11312     SmallVector<int, 16> Ops;
11313     for (int i = N->getNumOperands() - 1; i >= 0; i--)
11314       Ops.push_back(i);
11315 
11316     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
11317                                 DAG.getUNDEF(N->getValueType(0)), Ops);
11318   }
11319   return SDValue();
11320 }
11321 
11322 // This function adds the required vector_shuffle needed to get
11323 // the elements of the vector extract in the correct position
11324 // as specified by the CorrectElems encoding.
11325 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
11326                                       SDValue Input, uint64_t Elems,
11327                                       uint64_t CorrectElems) {
11328   SDLoc dl(N);
11329 
11330   unsigned NumElems = Input.getValueType().getVectorNumElements();
11331   SmallVector<int, 16> ShuffleMask(NumElems, -1);
11332 
11333   // Knowing the element indices being extracted from the original
11334   // vector and the order in which they're being inserted, just put
11335   // them at element indices required for the instruction.
11336   for (unsigned i = 0; i < N->getNumOperands(); i++) {
11337     if (DAG.getDataLayout().isLittleEndian())
11338       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
11339     else
11340       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
11341     CorrectElems = CorrectElems >> 8;
11342     Elems = Elems >> 8;
11343   }
11344 
11345   SDValue Shuffle =
11346       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
11347                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
11348 
11349   EVT Ty = N->getValueType(0);
11350   SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
11351   return BV;
11352 }
11353 
11354 // Look for build vector patterns where input operands come from sign
11355 // extended vector_extract elements of specific indices. If the correct indices
11356 // aren't used, add a vector shuffle to fix up the indices and create a new
11357 // PPCISD:SExtVElems node which selects the vector sign extend instructions
11358 // during instruction selection.
11359 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
11360   // This array encodes the indices that the vector sign extend instructions
11361   // extract from when extending from one type to another for both BE and LE.
11362   // The right nibble of each byte corresponds to the LE incides.
11363   // and the left nibble of each byte corresponds to the BE incides.
11364   // For example: 0x3074B8FC  byte->word
11365   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
11366   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
11367   // For example: 0x000070F8  byte->double word
11368   // For LE: the allowed indices are: 0x0,0x8
11369   // For BE: the allowed indices are: 0x7,0xF
11370   uint64_t TargetElems[] = {
11371       0x3074B8FC, // b->w
11372       0x000070F8, // b->d
11373       0x10325476, // h->w
11374       0x00003074, // h->d
11375       0x00001032, // w->d
11376   };
11377 
11378   uint64_t Elems = 0;
11379   int Index;
11380   SDValue Input;
11381 
11382   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
11383     if (!Op)
11384       return false;
11385     if (Op.getOpcode() != ISD::SIGN_EXTEND)
11386       return false;
11387 
11388     SDValue Extract = Op.getOperand(0);
11389     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11390       return false;
11391 
11392     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
11393     if (!ExtOp)
11394       return false;
11395 
11396     Index = ExtOp->getZExtValue();
11397     if (Input && Input != Extract.getOperand(0))
11398       return false;
11399 
11400     if (!Input)
11401       Input = Extract.getOperand(0);
11402 
11403     Elems = Elems << 8;
11404     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
11405     Elems |= Index;
11406 
11407     return true;
11408   };
11409 
11410   // If the build vector operands aren't sign extended vector extracts,
11411   // of the same input vector, then return.
11412   for (unsigned i = 0; i < N->getNumOperands(); i++) {
11413     if (!isSExtOfVecExtract(N->getOperand(i))) {
11414       return SDValue();
11415     }
11416   }
11417 
11418   // If the vector extract indicies are not correct, add the appropriate
11419   // vector_shuffle.
11420   int TgtElemArrayIdx;
11421   int InputSize = Input.getValueType().getScalarSizeInBits();
11422   int OutputSize = N->getValueType(0).getScalarSizeInBits();
11423   if (InputSize + OutputSize == 40)
11424     TgtElemArrayIdx = 0;
11425   else if (InputSize + OutputSize == 72)
11426     TgtElemArrayIdx = 1;
11427   else if (InputSize + OutputSize == 48)
11428     TgtElemArrayIdx = 2;
11429   else if (InputSize + OutputSize == 80)
11430     TgtElemArrayIdx = 3;
11431   else if (InputSize + OutputSize == 96)
11432     TgtElemArrayIdx = 4;
11433   else
11434     return SDValue();
11435 
11436   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
11437   CorrectElems = DAG.getDataLayout().isLittleEndian()
11438                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
11439                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
11440   if (Elems != CorrectElems) {
11441     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
11442   }
11443 
11444   // Regular lowering will catch cases where a shuffle is not needed.
11445   return SDValue();
11446 }
11447 
11448 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
11449                                                  DAGCombinerInfo &DCI) const {
11450   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11451          "Should be called with a BUILD_VECTOR node");
11452 
11453   SelectionDAG &DAG = DCI.DAG;
11454   SDLoc dl(N);
11455 
11456   if (!Subtarget.hasVSX())
11457     return SDValue();
11458 
11459   // The target independent DAG combiner will leave a build_vector of
11460   // float-to-int conversions intact. We can generate MUCH better code for
11461   // a float-to-int conversion of a vector of floats.
11462   SDValue FirstInput = N->getOperand(0);
11463   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
11464     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
11465     if (Reduced)
11466       return Reduced;
11467   }
11468 
11469   // If we're building a vector out of consecutive loads, just load that
11470   // vector type.
11471   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
11472   if (Reduced)
11473     return Reduced;
11474 
11475   // If we're building a vector out of extended elements from another vector
11476   // we have P9 vector integer extend instructions.
11477   if (Subtarget.hasP9Altivec()) {
11478     Reduced = combineBVOfVecSExt(N, DAG);
11479     if (Reduced)
11480       return Reduced;
11481   }
11482 
11483 
11484   if (N->getValueType(0) != MVT::v2f64)
11485     return SDValue();
11486 
11487   // Looking for:
11488   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
11489   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
11490       FirstInput.getOpcode() != ISD::UINT_TO_FP)
11491     return SDValue();
11492   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
11493       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
11494     return SDValue();
11495   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
11496     return SDValue();
11497 
11498   SDValue Ext1 = FirstInput.getOperand(0);
11499   SDValue Ext2 = N->getOperand(1).getOperand(0);
11500   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
11501      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11502     return SDValue();
11503 
11504   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
11505   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
11506   if (!Ext1Op || !Ext2Op)
11507     return SDValue();
11508   if (Ext1.getValueType() != MVT::i32 ||
11509       Ext2.getValueType() != MVT::i32)
11510   if (Ext1.getOperand(0) != Ext2.getOperand(0))
11511     return SDValue();
11512 
11513   int FirstElem = Ext1Op->getZExtValue();
11514   int SecondElem = Ext2Op->getZExtValue();
11515   int SubvecIdx;
11516   if (FirstElem == 0 && SecondElem == 1)
11517     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
11518   else if (FirstElem == 2 && SecondElem == 3)
11519     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
11520   else
11521     return SDValue();
11522 
11523   SDValue SrcVec = Ext1.getOperand(0);
11524   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
11525     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
11526   return DAG.getNode(NodeType, dl, MVT::v2f64,
11527                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
11528 }
11529 
11530 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
11531                                               DAGCombinerInfo &DCI) const {
11532   assert((N->getOpcode() == ISD::SINT_TO_FP ||
11533           N->getOpcode() == ISD::UINT_TO_FP) &&
11534          "Need an int -> FP conversion node here");
11535 
11536   if (useSoftFloat() || !Subtarget.has64BitSupport())
11537     return SDValue();
11538 
11539   SelectionDAG &DAG = DCI.DAG;
11540   SDLoc dl(N);
11541   SDValue Op(N, 0);
11542 
11543   SDValue FirstOperand(Op.getOperand(0));
11544   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
11545     (FirstOperand.getValueType() == MVT::i8 ||
11546      FirstOperand.getValueType() == MVT::i16);
11547   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
11548     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
11549     bool DstDouble = Op.getValueType() == MVT::f64;
11550     unsigned ConvOp = Signed ?
11551       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
11552       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
11553     SDValue WidthConst =
11554       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
11555                             dl, false);
11556     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
11557     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
11558     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
11559                                          DAG.getVTList(MVT::f64, MVT::Other),
11560                                          Ops, MVT::i8, LDN->getMemOperand());
11561 
11562     // For signed conversion, we need to sign-extend the value in the VSR
11563     if (Signed) {
11564       SDValue ExtOps[] = { Ld, WidthConst };
11565       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
11566       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
11567     } else
11568       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
11569   }
11570 
11571   // Don't handle ppc_fp128 here or i1 conversions.
11572   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
11573     return SDValue();
11574   if (Op.getOperand(0).getValueType() == MVT::i1)
11575     return SDValue();
11576 
11577   // For i32 intermediate values, unfortunately, the conversion functions
11578   // leave the upper 32 bits of the value are undefined. Within the set of
11579   // scalar instructions, we have no method for zero- or sign-extending the
11580   // value. Thus, we cannot handle i32 intermediate values here.
11581   if (Op.getOperand(0).getValueType() == MVT::i32)
11582     return SDValue();
11583 
11584   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
11585          "UINT_TO_FP is supported only with FPCVT");
11586 
11587   // If we have FCFIDS, then use it when converting to single-precision.
11588   // Otherwise, convert to double-precision and then round.
11589   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
11590                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
11591                                                             : PPCISD::FCFIDS)
11592                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
11593                                                             : PPCISD::FCFID);
11594   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
11595                   ? MVT::f32
11596                   : MVT::f64;
11597 
11598   // If we're converting from a float, to an int, and back to a float again,
11599   // then we don't need the store/load pair at all.
11600   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
11601        Subtarget.hasFPCVT()) ||
11602       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
11603     SDValue Src = Op.getOperand(0).getOperand(0);
11604     if (Src.getValueType() == MVT::f32) {
11605       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
11606       DCI.AddToWorklist(Src.getNode());
11607     } else if (Src.getValueType() != MVT::f64) {
11608       // Make sure that we don't pick up a ppc_fp128 source value.
11609       return SDValue();
11610     }
11611 
11612     unsigned FCTOp =
11613       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
11614                                                         PPCISD::FCTIDUZ;
11615 
11616     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
11617     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
11618 
11619     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
11620       FP = DAG.getNode(ISD::FP_ROUND, dl,
11621                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
11622       DCI.AddToWorklist(FP.getNode());
11623     }
11624 
11625     return FP;
11626   }
11627 
11628   return SDValue();
11629 }
11630 
11631 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
11632 // builtins) into loads with swaps.
11633 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
11634                                               DAGCombinerInfo &DCI) const {
11635   SelectionDAG &DAG = DCI.DAG;
11636   SDLoc dl(N);
11637   SDValue Chain;
11638   SDValue Base;
11639   MachineMemOperand *MMO;
11640 
11641   switch (N->getOpcode()) {
11642   default:
11643     llvm_unreachable("Unexpected opcode for little endian VSX load");
11644   case ISD::LOAD: {
11645     LoadSDNode *LD = cast<LoadSDNode>(N);
11646     Chain = LD->getChain();
11647     Base = LD->getBasePtr();
11648     MMO = LD->getMemOperand();
11649     // If the MMO suggests this isn't a load of a full vector, leave
11650     // things alone.  For a built-in, we have to make the change for
11651     // correctness, so if there is a size problem that will be a bug.
11652     if (MMO->getSize() < 16)
11653       return SDValue();
11654     break;
11655   }
11656   case ISD::INTRINSIC_W_CHAIN: {
11657     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11658     Chain = Intrin->getChain();
11659     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
11660     // us what we want. Get operand 2 instead.
11661     Base = Intrin->getOperand(2);
11662     MMO = Intrin->getMemOperand();
11663     break;
11664   }
11665   }
11666 
11667   MVT VecTy = N->getValueType(0).getSimpleVT();
11668 
11669   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
11670   // aligned and the type is a vector with elements up to 4 bytes
11671   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
11672       && VecTy.getScalarSizeInBits() <= 32 ) {
11673     return SDValue();
11674   }
11675 
11676   SDValue LoadOps[] = { Chain, Base };
11677   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
11678                                          DAG.getVTList(MVT::v2f64, MVT::Other),
11679                                          LoadOps, MVT::v2f64, MMO);
11680 
11681   DCI.AddToWorklist(Load.getNode());
11682   Chain = Load.getValue(1);
11683   SDValue Swap = DAG.getNode(
11684       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
11685   DCI.AddToWorklist(Swap.getNode());
11686 
11687   // Add a bitcast if the resulting load type doesn't match v2f64.
11688   if (VecTy != MVT::v2f64) {
11689     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
11690     DCI.AddToWorklist(N.getNode());
11691     // Package {bitcast value, swap's chain} to match Load's shape.
11692     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
11693                        N, Swap.getValue(1));
11694   }
11695 
11696   return Swap;
11697 }
11698 
11699 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
11700 // builtins) into stores with swaps.
11701 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
11702                                                DAGCombinerInfo &DCI) const {
11703   SelectionDAG &DAG = DCI.DAG;
11704   SDLoc dl(N);
11705   SDValue Chain;
11706   SDValue Base;
11707   unsigned SrcOpnd;
11708   MachineMemOperand *MMO;
11709 
11710   switch (N->getOpcode()) {
11711   default:
11712     llvm_unreachable("Unexpected opcode for little endian VSX store");
11713   case ISD::STORE: {
11714     StoreSDNode *ST = cast<StoreSDNode>(N);
11715     Chain = ST->getChain();
11716     Base = ST->getBasePtr();
11717     MMO = ST->getMemOperand();
11718     SrcOpnd = 1;
11719     // If the MMO suggests this isn't a store of a full vector, leave
11720     // things alone.  For a built-in, we have to make the change for
11721     // correctness, so if there is a size problem that will be a bug.
11722     if (MMO->getSize() < 16)
11723       return SDValue();
11724     break;
11725   }
11726   case ISD::INTRINSIC_VOID: {
11727     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11728     Chain = Intrin->getChain();
11729     // Intrin->getBasePtr() oddly does not get what we want.
11730     Base = Intrin->getOperand(3);
11731     MMO = Intrin->getMemOperand();
11732     SrcOpnd = 2;
11733     break;
11734   }
11735   }
11736 
11737   SDValue Src = N->getOperand(SrcOpnd);
11738   MVT VecTy = Src.getValueType().getSimpleVT();
11739 
11740   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
11741   // aligned and the type is a vector with elements up to 4 bytes
11742   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
11743       && VecTy.getScalarSizeInBits() <= 32 ) {
11744     return SDValue();
11745   }
11746 
11747   // All stores are done as v2f64 and possible bit cast.
11748   if (VecTy != MVT::v2f64) {
11749     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
11750     DCI.AddToWorklist(Src.getNode());
11751   }
11752 
11753   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
11754                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
11755   DCI.AddToWorklist(Swap.getNode());
11756   Chain = Swap.getValue(1);
11757   SDValue StoreOps[] = { Chain, Swap, Base };
11758   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
11759                                           DAG.getVTList(MVT::Other),
11760                                           StoreOps, VecTy, MMO);
11761   DCI.AddToWorklist(Store.getNode());
11762   return Store;
11763 }
11764 
11765 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
11766                                              DAGCombinerInfo &DCI) const {
11767   SelectionDAG &DAG = DCI.DAG;
11768   SDLoc dl(N);
11769   switch (N->getOpcode()) {
11770   default: break;
11771   case ISD::SHL:
11772     return combineSHL(N, DCI);
11773   case ISD::SRA:
11774     return combineSRA(N, DCI);
11775   case ISD::SRL:
11776     return combineSRL(N, DCI);
11777   case PPCISD::SHL:
11778     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
11779         return N->getOperand(0);
11780     break;
11781   case PPCISD::SRL:
11782     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
11783         return N->getOperand(0);
11784     break;
11785   case PPCISD::SRA:
11786     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
11787       if (C->isNullValue() ||   //  0 >>s V -> 0.
11788           C->isAllOnesValue())    // -1 >>s V -> -1.
11789         return N->getOperand(0);
11790     }
11791     break;
11792   case ISD::SIGN_EXTEND:
11793   case ISD::ZERO_EXTEND:
11794   case ISD::ANY_EXTEND:
11795     return DAGCombineExtBoolTrunc(N, DCI);
11796   case ISD::TRUNCATE:
11797   case ISD::SETCC:
11798   case ISD::SELECT_CC:
11799     return DAGCombineTruncBoolExt(N, DCI);
11800   case ISD::SINT_TO_FP:
11801   case ISD::UINT_TO_FP:
11802     return combineFPToIntToFP(N, DCI);
11803   case ISD::STORE: {
11804     EVT Op1VT = N->getOperand(1).getValueType();
11805     bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
11806       (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
11807 
11808     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
11809     if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
11810         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
11811         ValidTypeForStoreFltAsInt &&
11812         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
11813       SDValue Val = N->getOperand(1).getOperand(0);
11814       if (Val.getValueType() == MVT::f32) {
11815         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
11816         DCI.AddToWorklist(Val.getNode());
11817       }
11818       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
11819       DCI.AddToWorklist(Val.getNode());
11820 
11821       if (Op1VT == MVT::i32) {
11822         SDValue Ops[] = {
11823           N->getOperand(0), Val, N->getOperand(2),
11824           DAG.getValueType(N->getOperand(1).getValueType())
11825         };
11826 
11827         Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
11828                 DAG.getVTList(MVT::Other), Ops,
11829                 cast<StoreSDNode>(N)->getMemoryVT(),
11830                 cast<StoreSDNode>(N)->getMemOperand());
11831       } else {
11832         unsigned WidthInBytes =
11833           N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
11834         SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
11835 
11836         SDValue Ops[] = {
11837           N->getOperand(0), Val, N->getOperand(2), WidthConst,
11838           DAG.getValueType(N->getOperand(1).getValueType())
11839         };
11840         Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
11841                                       DAG.getVTList(MVT::Other), Ops,
11842                                       cast<StoreSDNode>(N)->getMemoryVT(),
11843                                       cast<StoreSDNode>(N)->getMemOperand());
11844       }
11845 
11846       DCI.AddToWorklist(Val.getNode());
11847       return Val;
11848     }
11849 
11850     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
11851     if (cast<StoreSDNode>(N)->isUnindexed() &&
11852         N->getOperand(1).getOpcode() == ISD::BSWAP &&
11853         N->getOperand(1).getNode()->hasOneUse() &&
11854         (N->getOperand(1).getValueType() == MVT::i32 ||
11855          N->getOperand(1).getValueType() == MVT::i16 ||
11856          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
11857           N->getOperand(1).getValueType() == MVT::i64))) {
11858       SDValue BSwapOp = N->getOperand(1).getOperand(0);
11859       // Do an any-extend to 32-bits if this is a half-word input.
11860       if (BSwapOp.getValueType() == MVT::i16)
11861         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
11862 
11863       // If the type of BSWAP operand is wider than stored memory width
11864       // it need to be shifted to the right side before STBRX.
11865       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
11866       if (Op1VT.bitsGT(mVT)) {
11867         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
11868         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
11869                               DAG.getConstant(Shift, dl, MVT::i32));
11870         // Need to truncate if this is a bswap of i64 stored as i32/i16.
11871         if (Op1VT == MVT::i64)
11872           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
11873       }
11874 
11875       SDValue Ops[] = {
11876         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
11877       };
11878       return
11879         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
11880                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
11881                                 cast<StoreSDNode>(N)->getMemOperand());
11882     }
11883 
11884     // For little endian, VSX stores require generating xxswapd/lxvd2x.
11885     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
11886     EVT VT = N->getOperand(1).getValueType();
11887     if (VT.isSimple()) {
11888       MVT StoreVT = VT.getSimpleVT();
11889       if (Subtarget.needsSwapsForVSXMemOps() &&
11890           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
11891            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
11892         return expandVSXStoreForLE(N, DCI);
11893     }
11894     break;
11895   }
11896   case ISD::LOAD: {
11897     LoadSDNode *LD = cast<LoadSDNode>(N);
11898     EVT VT = LD->getValueType(0);
11899 
11900     // For little endian, VSX loads require generating lxvd2x/xxswapd.
11901     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
11902     if (VT.isSimple()) {
11903       MVT LoadVT = VT.getSimpleVT();
11904       if (Subtarget.needsSwapsForVSXMemOps() &&
11905           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
11906            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
11907         return expandVSXLoadForLE(N, DCI);
11908     }
11909 
11910     // We sometimes end up with a 64-bit integer load, from which we extract
11911     // two single-precision floating-point numbers. This happens with
11912     // std::complex<float>, and other similar structures, because of the way we
11913     // canonicalize structure copies. However, if we lack direct moves,
11914     // then the final bitcasts from the extracted integer values to the
11915     // floating-point numbers turn into store/load pairs. Even with direct moves,
11916     // just loading the two floating-point numbers is likely better.
11917     auto ReplaceTwoFloatLoad = [&]() {
11918       if (VT != MVT::i64)
11919         return false;
11920 
11921       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
11922           LD->isVolatile())
11923         return false;
11924 
11925       //  We're looking for a sequence like this:
11926       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
11927       //      t16: i64 = srl t13, Constant:i32<32>
11928       //    t17: i32 = truncate t16
11929       //  t18: f32 = bitcast t17
11930       //    t19: i32 = truncate t13
11931       //  t20: f32 = bitcast t19
11932 
11933       if (!LD->hasNUsesOfValue(2, 0))
11934         return false;
11935 
11936       auto UI = LD->use_begin();
11937       while (UI.getUse().getResNo() != 0) ++UI;
11938       SDNode *Trunc = *UI++;
11939       while (UI.getUse().getResNo() != 0) ++UI;
11940       SDNode *RightShift = *UI;
11941       if (Trunc->getOpcode() != ISD::TRUNCATE)
11942         std::swap(Trunc, RightShift);
11943 
11944       if (Trunc->getOpcode() != ISD::TRUNCATE ||
11945           Trunc->getValueType(0) != MVT::i32 ||
11946           !Trunc->hasOneUse())
11947         return false;
11948       if (RightShift->getOpcode() != ISD::SRL ||
11949           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
11950           RightShift->getConstantOperandVal(1) != 32 ||
11951           !RightShift->hasOneUse())
11952         return false;
11953 
11954       SDNode *Trunc2 = *RightShift->use_begin();
11955       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
11956           Trunc2->getValueType(0) != MVT::i32 ||
11957           !Trunc2->hasOneUse())
11958         return false;
11959 
11960       SDNode *Bitcast = *Trunc->use_begin();
11961       SDNode *Bitcast2 = *Trunc2->use_begin();
11962 
11963       if (Bitcast->getOpcode() != ISD::BITCAST ||
11964           Bitcast->getValueType(0) != MVT::f32)
11965         return false;
11966       if (Bitcast2->getOpcode() != ISD::BITCAST ||
11967           Bitcast2->getValueType(0) != MVT::f32)
11968         return false;
11969 
11970       if (Subtarget.isLittleEndian())
11971         std::swap(Bitcast, Bitcast2);
11972 
11973       // Bitcast has the second float (in memory-layout order) and Bitcast2
11974       // has the first one.
11975 
11976       SDValue BasePtr = LD->getBasePtr();
11977       if (LD->isIndexed()) {
11978         assert(LD->getAddressingMode() == ISD::PRE_INC &&
11979                "Non-pre-inc AM on PPC?");
11980         BasePtr =
11981           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11982                       LD->getOffset());
11983       }
11984 
11985       auto MMOFlags =
11986           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
11987       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
11988                                       LD->getPointerInfo(), LD->getAlignment(),
11989                                       MMOFlags, LD->getAAInfo());
11990       SDValue AddPtr =
11991         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
11992                     BasePtr, DAG.getIntPtrConstant(4, dl));
11993       SDValue FloatLoad2 = DAG.getLoad(
11994           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
11995           LD->getPointerInfo().getWithOffset(4),
11996           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
11997 
11998       if (LD->isIndexed()) {
11999         // Note that DAGCombine should re-form any pre-increment load(s) from
12000         // what is produced here if that makes sense.
12001         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
12002       }
12003 
12004       DCI.CombineTo(Bitcast2, FloatLoad);
12005       DCI.CombineTo(Bitcast, FloatLoad2);
12006 
12007       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
12008                                     SDValue(FloatLoad2.getNode(), 1));
12009       return true;
12010     };
12011 
12012     if (ReplaceTwoFloatLoad())
12013       return SDValue(N, 0);
12014 
12015     EVT MemVT = LD->getMemoryVT();
12016     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
12017     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
12018     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
12019     unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
12020     if (LD->isUnindexed() && VT.isVector() &&
12021         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
12022           // P8 and later hardware should just use LOAD.
12023           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
12024                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
12025          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
12026           LD->getAlignment() >= ScalarABIAlignment)) &&
12027         LD->getAlignment() < ABIAlignment) {
12028       // This is a type-legal unaligned Altivec or QPX load.
12029       SDValue Chain = LD->getChain();
12030       SDValue Ptr = LD->getBasePtr();
12031       bool isLittleEndian = Subtarget.isLittleEndian();
12032 
12033       // This implements the loading of unaligned vectors as described in
12034       // the venerable Apple Velocity Engine overview. Specifically:
12035       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
12036       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
12037       //
12038       // The general idea is to expand a sequence of one or more unaligned
12039       // loads into an alignment-based permutation-control instruction (lvsl
12040       // or lvsr), a series of regular vector loads (which always truncate
12041       // their input address to an aligned address), and a series of
12042       // permutations.  The results of these permutations are the requested
12043       // loaded values.  The trick is that the last "extra" load is not taken
12044       // from the address you might suspect (sizeof(vector) bytes after the
12045       // last requested load), but rather sizeof(vector) - 1 bytes after the
12046       // last requested vector. The point of this is to avoid a page fault if
12047       // the base address happened to be aligned. This works because if the
12048       // base address is aligned, then adding less than a full vector length
12049       // will cause the last vector in the sequence to be (re)loaded.
12050       // Otherwise, the next vector will be fetched as you might suspect was
12051       // necessary.
12052 
12053       // We might be able to reuse the permutation generation from
12054       // a different base address offset from this one by an aligned amount.
12055       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
12056       // optimization later.
12057       Intrinsic::ID Intr, IntrLD, IntrPerm;
12058       MVT PermCntlTy, PermTy, LDTy;
12059       if (Subtarget.hasAltivec()) {
12060         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
12061                                  Intrinsic::ppc_altivec_lvsl;
12062         IntrLD = Intrinsic::ppc_altivec_lvx;
12063         IntrPerm = Intrinsic::ppc_altivec_vperm;
12064         PermCntlTy = MVT::v16i8;
12065         PermTy = MVT::v4i32;
12066         LDTy = MVT::v4i32;
12067       } else {
12068         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
12069                                        Intrinsic::ppc_qpx_qvlpcls;
12070         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
12071                                        Intrinsic::ppc_qpx_qvlfs;
12072         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
12073         PermCntlTy = MVT::v4f64;
12074         PermTy = MVT::v4f64;
12075         LDTy = MemVT.getSimpleVT();
12076       }
12077 
12078       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
12079 
12080       // Create the new MMO for the new base load. It is like the original MMO,
12081       // but represents an area in memory almost twice the vector size centered
12082       // on the original address. If the address is unaligned, we might start
12083       // reading up to (sizeof(vector)-1) bytes below the address of the
12084       // original unaligned load.
12085       MachineFunction &MF = DAG.getMachineFunction();
12086       MachineMemOperand *BaseMMO =
12087         MF.getMachineMemOperand(LD->getMemOperand(),
12088                                 -(long)MemVT.getStoreSize()+1,
12089                                 2*MemVT.getStoreSize()-1);
12090 
12091       // Create the new base load.
12092       SDValue LDXIntID =
12093           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
12094       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
12095       SDValue BaseLoad =
12096         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
12097                                 DAG.getVTList(PermTy, MVT::Other),
12098                                 BaseLoadOps, LDTy, BaseMMO);
12099 
12100       // Note that the value of IncOffset (which is provided to the next
12101       // load's pointer info offset value, and thus used to calculate the
12102       // alignment), and the value of IncValue (which is actually used to
12103       // increment the pointer value) are different! This is because we
12104       // require the next load to appear to be aligned, even though it
12105       // is actually offset from the base pointer by a lesser amount.
12106       int IncOffset = VT.getSizeInBits() / 8;
12107       int IncValue = IncOffset;
12108 
12109       // Walk (both up and down) the chain looking for another load at the real
12110       // (aligned) offset (the alignment of the other load does not matter in
12111       // this case). If found, then do not use the offset reduction trick, as
12112       // that will prevent the loads from being later combined (as they would
12113       // otherwise be duplicates).
12114       if (!findConsecutiveLoad(LD, DAG))
12115         --IncValue;
12116 
12117       SDValue Increment =
12118           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
12119       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
12120 
12121       MachineMemOperand *ExtraMMO =
12122         MF.getMachineMemOperand(LD->getMemOperand(),
12123                                 1, 2*MemVT.getStoreSize()-1);
12124       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
12125       SDValue ExtraLoad =
12126         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
12127                                 DAG.getVTList(PermTy, MVT::Other),
12128                                 ExtraLoadOps, LDTy, ExtraMMO);
12129 
12130       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
12131         BaseLoad.getValue(1), ExtraLoad.getValue(1));
12132 
12133       // Because vperm has a big-endian bias, we must reverse the order
12134       // of the input vectors and complement the permute control vector
12135       // when generating little endian code.  We have already handled the
12136       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
12137       // and ExtraLoad here.
12138       SDValue Perm;
12139       if (isLittleEndian)
12140         Perm = BuildIntrinsicOp(IntrPerm,
12141                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
12142       else
12143         Perm = BuildIntrinsicOp(IntrPerm,
12144                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
12145 
12146       if (VT != PermTy)
12147         Perm = Subtarget.hasAltivec() ?
12148                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
12149                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
12150                                DAG.getTargetConstant(1, dl, MVT::i64));
12151                                // second argument is 1 because this rounding
12152                                // is always exact.
12153 
12154       // The output of the permutation is our loaded result, the TokenFactor is
12155       // our new chain.
12156       DCI.CombineTo(N, Perm, TF);
12157       return SDValue(N, 0);
12158     }
12159     }
12160     break;
12161     case ISD::INTRINSIC_WO_CHAIN: {
12162       bool isLittleEndian = Subtarget.isLittleEndian();
12163       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
12164       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
12165                                            : Intrinsic::ppc_altivec_lvsl);
12166       if ((IID == Intr ||
12167            IID == Intrinsic::ppc_qpx_qvlpcld  ||
12168            IID == Intrinsic::ppc_qpx_qvlpcls) &&
12169         N->getOperand(1)->getOpcode() == ISD::ADD) {
12170         SDValue Add = N->getOperand(1);
12171 
12172         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
12173                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
12174 
12175         if (DAG.MaskedValueIsZero(Add->getOperand(1),
12176                                   APInt::getAllOnesValue(Bits /* alignment */)
12177                                       .zext(Add.getScalarValueSizeInBits()))) {
12178           SDNode *BasePtr = Add->getOperand(0).getNode();
12179           for (SDNode::use_iterator UI = BasePtr->use_begin(),
12180                                     UE = BasePtr->use_end();
12181                UI != UE; ++UI) {
12182             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12183                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
12184               // We've found another LVSL/LVSR, and this address is an aligned
12185               // multiple of that one. The results will be the same, so use the
12186               // one we've just found instead.
12187 
12188               return SDValue(*UI, 0);
12189             }
12190           }
12191         }
12192 
12193         if (isa<ConstantSDNode>(Add->getOperand(1))) {
12194           SDNode *BasePtr = Add->getOperand(0).getNode();
12195           for (SDNode::use_iterator UI = BasePtr->use_begin(),
12196                UE = BasePtr->use_end(); UI != UE; ++UI) {
12197             if (UI->getOpcode() == ISD::ADD &&
12198                 isa<ConstantSDNode>(UI->getOperand(1)) &&
12199                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
12200                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
12201                 (1ULL << Bits) == 0) {
12202               SDNode *OtherAdd = *UI;
12203               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
12204                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
12205                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12206                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
12207                   return SDValue(*VI, 0);
12208                 }
12209               }
12210             }
12211           }
12212         }
12213       }
12214     }
12215 
12216     break;
12217   case ISD::INTRINSIC_W_CHAIN:
12218     // For little endian, VSX loads require generating lxvd2x/xxswapd.
12219     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
12220     if (Subtarget.needsSwapsForVSXMemOps()) {
12221       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12222       default:
12223         break;
12224       case Intrinsic::ppc_vsx_lxvw4x:
12225       case Intrinsic::ppc_vsx_lxvd2x:
12226         return expandVSXLoadForLE(N, DCI);
12227       }
12228     }
12229     break;
12230   case ISD::INTRINSIC_VOID:
12231     // For little endian, VSX stores require generating xxswapd/stxvd2x.
12232     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
12233     if (Subtarget.needsSwapsForVSXMemOps()) {
12234       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12235       default:
12236         break;
12237       case Intrinsic::ppc_vsx_stxvw4x:
12238       case Intrinsic::ppc_vsx_stxvd2x:
12239         return expandVSXStoreForLE(N, DCI);
12240       }
12241     }
12242     break;
12243   case ISD::BSWAP:
12244     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
12245     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
12246         N->getOperand(0).hasOneUse() &&
12247         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
12248          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
12249           N->getValueType(0) == MVT::i64))) {
12250       SDValue Load = N->getOperand(0);
12251       LoadSDNode *LD = cast<LoadSDNode>(Load);
12252       // Create the byte-swapping load.
12253       SDValue Ops[] = {
12254         LD->getChain(),    // Chain
12255         LD->getBasePtr(),  // Ptr
12256         DAG.getValueType(N->getValueType(0)) // VT
12257       };
12258       SDValue BSLoad =
12259         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
12260                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
12261                                               MVT::i64 : MVT::i32, MVT::Other),
12262                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
12263 
12264       // If this is an i16 load, insert the truncate.
12265       SDValue ResVal = BSLoad;
12266       if (N->getValueType(0) == MVT::i16)
12267         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
12268 
12269       // First, combine the bswap away.  This makes the value produced by the
12270       // load dead.
12271       DCI.CombineTo(N, ResVal);
12272 
12273       // Next, combine the load away, we give it a bogus result value but a real
12274       // chain result.  The result value is dead because the bswap is dead.
12275       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
12276 
12277       // Return N so it doesn't get rechecked!
12278       return SDValue(N, 0);
12279     }
12280     break;
12281   case PPCISD::VCMP:
12282     // If a VCMPo node already exists with exactly the same operands as this
12283     // node, use its result instead of this node (VCMPo computes both a CR6 and
12284     // a normal output).
12285     //
12286     if (!N->getOperand(0).hasOneUse() &&
12287         !N->getOperand(1).hasOneUse() &&
12288         !N->getOperand(2).hasOneUse()) {
12289 
12290       // Scan all of the users of the LHS, looking for VCMPo's that match.
12291       SDNode *VCMPoNode = nullptr;
12292 
12293       SDNode *LHSN = N->getOperand(0).getNode();
12294       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
12295            UI != E; ++UI)
12296         if (UI->getOpcode() == PPCISD::VCMPo &&
12297             UI->getOperand(1) == N->getOperand(1) &&
12298             UI->getOperand(2) == N->getOperand(2) &&
12299             UI->getOperand(0) == N->getOperand(0)) {
12300           VCMPoNode = *UI;
12301           break;
12302         }
12303 
12304       // If there is no VCMPo node, or if the flag value has a single use, don't
12305       // transform this.
12306       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
12307         break;
12308 
12309       // Look at the (necessarily single) use of the flag value.  If it has a
12310       // chain, this transformation is more complex.  Note that multiple things
12311       // could use the value result, which we should ignore.
12312       SDNode *FlagUser = nullptr;
12313       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
12314            FlagUser == nullptr; ++UI) {
12315         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
12316         SDNode *User = *UI;
12317         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
12318           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
12319             FlagUser = User;
12320             break;
12321           }
12322         }
12323       }
12324 
12325       // If the user is a MFOCRF instruction, we know this is safe.
12326       // Otherwise we give up for right now.
12327       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
12328         return SDValue(VCMPoNode, 0);
12329     }
12330     break;
12331   case ISD::BRCOND: {
12332     SDValue Cond = N->getOperand(1);
12333     SDValue Target = N->getOperand(2);
12334 
12335     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12336         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
12337           Intrinsic::ppc_is_decremented_ctr_nonzero) {
12338 
12339       // We now need to make the intrinsic dead (it cannot be instruction
12340       // selected).
12341       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
12342       assert(Cond.getNode()->hasOneUse() &&
12343              "Counter decrement has more than one use");
12344 
12345       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
12346                          N->getOperand(0), Target);
12347     }
12348   }
12349   break;
12350   case ISD::BR_CC: {
12351     // If this is a branch on an altivec predicate comparison, lower this so
12352     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
12353     // lowering is done pre-legalize, because the legalizer lowers the predicate
12354     // compare down to code that is difficult to reassemble.
12355     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
12356     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
12357 
12358     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
12359     // value. If so, pass-through the AND to get to the intrinsic.
12360     if (LHS.getOpcode() == ISD::AND &&
12361         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12362         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
12363           Intrinsic::ppc_is_decremented_ctr_nonzero &&
12364         isa<ConstantSDNode>(LHS.getOperand(1)) &&
12365         !isNullConstant(LHS.getOperand(1)))
12366       LHS = LHS.getOperand(0);
12367 
12368     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12369         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
12370           Intrinsic::ppc_is_decremented_ctr_nonzero &&
12371         isa<ConstantSDNode>(RHS)) {
12372       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
12373              "Counter decrement comparison is not EQ or NE");
12374 
12375       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
12376       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
12377                     (CC == ISD::SETNE && !Val);
12378 
12379       // We now need to make the intrinsic dead (it cannot be instruction
12380       // selected).
12381       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
12382       assert(LHS.getNode()->hasOneUse() &&
12383              "Counter decrement has more than one use");
12384 
12385       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
12386                          N->getOperand(0), N->getOperand(4));
12387     }
12388 
12389     int CompareOpc;
12390     bool isDot;
12391 
12392     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12393         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
12394         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
12395       assert(isDot && "Can't compare against a vector result!");
12396 
12397       // If this is a comparison against something other than 0/1, then we know
12398       // that the condition is never/always true.
12399       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
12400       if (Val != 0 && Val != 1) {
12401         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
12402           return N->getOperand(0);
12403         // Always !=, turn it into an unconditional branch.
12404         return DAG.getNode(ISD::BR, dl, MVT::Other,
12405                            N->getOperand(0), N->getOperand(4));
12406       }
12407 
12408       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
12409 
12410       // Create the PPCISD altivec 'dot' comparison node.
12411       SDValue Ops[] = {
12412         LHS.getOperand(2),  // LHS of compare
12413         LHS.getOperand(3),  // RHS of compare
12414         DAG.getConstant(CompareOpc, dl, MVT::i32)
12415       };
12416       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
12417       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
12418 
12419       // Unpack the result based on how the target uses it.
12420       PPC::Predicate CompOpc;
12421       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
12422       default:  // Can't happen, don't crash on invalid number though.
12423       case 0:   // Branch on the value of the EQ bit of CR6.
12424         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
12425         break;
12426       case 1:   // Branch on the inverted value of the EQ bit of CR6.
12427         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
12428         break;
12429       case 2:   // Branch on the value of the LT bit of CR6.
12430         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
12431         break;
12432       case 3:   // Branch on the inverted value of the LT bit of CR6.
12433         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
12434         break;
12435       }
12436 
12437       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
12438                          DAG.getConstant(CompOpc, dl, MVT::i32),
12439                          DAG.getRegister(PPC::CR6, MVT::i32),
12440                          N->getOperand(4), CompNode.getValue(1));
12441     }
12442     break;
12443   }
12444   case ISD::BUILD_VECTOR:
12445     return DAGCombineBuildVector(N, DCI);
12446   }
12447 
12448   return SDValue();
12449 }
12450 
12451 SDValue
12452 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
12453                                   SelectionDAG &DAG,
12454                                   std::vector<SDNode *> *Created) const {
12455   // fold (sdiv X, pow2)
12456   EVT VT = N->getValueType(0);
12457   if (VT == MVT::i64 && !Subtarget.isPPC64())
12458     return SDValue();
12459   if ((VT != MVT::i32 && VT != MVT::i64) ||
12460       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
12461     return SDValue();
12462 
12463   SDLoc DL(N);
12464   SDValue N0 = N->getOperand(0);
12465 
12466   bool IsNegPow2 = (-Divisor).isPowerOf2();
12467   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
12468   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
12469 
12470   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
12471   if (Created)
12472     Created->push_back(Op.getNode());
12473 
12474   if (IsNegPow2) {
12475     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
12476     if (Created)
12477       Created->push_back(Op.getNode());
12478   }
12479 
12480   return Op;
12481 }
12482 
12483 //===----------------------------------------------------------------------===//
12484 // Inline Assembly Support
12485 //===----------------------------------------------------------------------===//
12486 
12487 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
12488                                                       KnownBits &Known,
12489                                                       const APInt &DemandedElts,
12490                                                       const SelectionDAG &DAG,
12491                                                       unsigned Depth) const {
12492   Known.resetAll();
12493   switch (Op.getOpcode()) {
12494   default: break;
12495   case PPCISD::LBRX: {
12496     // lhbrx is known to have the top bits cleared out.
12497     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
12498       Known.Zero = 0xFFFF0000;
12499     break;
12500   }
12501   case ISD::INTRINSIC_WO_CHAIN: {
12502     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
12503     default: break;
12504     case Intrinsic::ppc_altivec_vcmpbfp_p:
12505     case Intrinsic::ppc_altivec_vcmpeqfp_p:
12506     case Intrinsic::ppc_altivec_vcmpequb_p:
12507     case Intrinsic::ppc_altivec_vcmpequh_p:
12508     case Intrinsic::ppc_altivec_vcmpequw_p:
12509     case Intrinsic::ppc_altivec_vcmpequd_p:
12510     case Intrinsic::ppc_altivec_vcmpgefp_p:
12511     case Intrinsic::ppc_altivec_vcmpgtfp_p:
12512     case Intrinsic::ppc_altivec_vcmpgtsb_p:
12513     case Intrinsic::ppc_altivec_vcmpgtsh_p:
12514     case Intrinsic::ppc_altivec_vcmpgtsw_p:
12515     case Intrinsic::ppc_altivec_vcmpgtsd_p:
12516     case Intrinsic::ppc_altivec_vcmpgtub_p:
12517     case Intrinsic::ppc_altivec_vcmpgtuh_p:
12518     case Intrinsic::ppc_altivec_vcmpgtuw_p:
12519     case Intrinsic::ppc_altivec_vcmpgtud_p:
12520       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
12521       break;
12522     }
12523   }
12524   }
12525 }
12526 
12527 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
12528   switch (Subtarget.getDarwinDirective()) {
12529   default: break;
12530   case PPC::DIR_970:
12531   case PPC::DIR_PWR4:
12532   case PPC::DIR_PWR5:
12533   case PPC::DIR_PWR5X:
12534   case PPC::DIR_PWR6:
12535   case PPC::DIR_PWR6X:
12536   case PPC::DIR_PWR7:
12537   case PPC::DIR_PWR8:
12538   case PPC::DIR_PWR9: {
12539     if (!ML)
12540       break;
12541 
12542     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12543 
12544     // For small loops (between 5 and 8 instructions), align to a 32-byte
12545     // boundary so that the entire loop fits in one instruction-cache line.
12546     uint64_t LoopSize = 0;
12547     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
12548       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
12549         LoopSize += TII->getInstSizeInBytes(*J);
12550         if (LoopSize > 32)
12551           break;
12552       }
12553 
12554     if (LoopSize > 16 && LoopSize <= 32)
12555       return 5;
12556 
12557     break;
12558   }
12559   }
12560 
12561   return TargetLowering::getPrefLoopAlignment(ML);
12562 }
12563 
12564 /// getConstraintType - Given a constraint, return the type of
12565 /// constraint it is for this target.
12566 PPCTargetLowering::ConstraintType
12567 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
12568   if (Constraint.size() == 1) {
12569     switch (Constraint[0]) {
12570     default: break;
12571     case 'b':
12572     case 'r':
12573     case 'f':
12574     case 'd':
12575     case 'v':
12576     case 'y':
12577       return C_RegisterClass;
12578     case 'Z':
12579       // FIXME: While Z does indicate a memory constraint, it specifically
12580       // indicates an r+r address (used in conjunction with the 'y' modifier
12581       // in the replacement string). Currently, we're forcing the base
12582       // register to be r0 in the asm printer (which is interpreted as zero)
12583       // and forming the complete address in the second register. This is
12584       // suboptimal.
12585       return C_Memory;
12586     }
12587   } else if (Constraint == "wc") { // individual CR bits.
12588     return C_RegisterClass;
12589   } else if (Constraint == "wa" || Constraint == "wd" ||
12590              Constraint == "wf" || Constraint == "ws") {
12591     return C_RegisterClass; // VSX registers.
12592   }
12593   return TargetLowering::getConstraintType(Constraint);
12594 }
12595 
12596 /// Examine constraint type and operand type and determine a weight value.
12597 /// This object must already have been set up with the operand type
12598 /// and the current alternative constraint selected.
12599 TargetLowering::ConstraintWeight
12600 PPCTargetLowering::getSingleConstraintMatchWeight(
12601     AsmOperandInfo &info, const char *constraint) const {
12602   ConstraintWeight weight = CW_Invalid;
12603   Value *CallOperandVal = info.CallOperandVal;
12604     // If we don't have a value, we can't do a match,
12605     // but allow it at the lowest weight.
12606   if (!CallOperandVal)
12607     return CW_Default;
12608   Type *type = CallOperandVal->getType();
12609 
12610   // Look at the constraint type.
12611   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
12612     return CW_Register; // an individual CR bit.
12613   else if ((StringRef(constraint) == "wa" ||
12614             StringRef(constraint) == "wd" ||
12615             StringRef(constraint) == "wf") &&
12616            type->isVectorTy())
12617     return CW_Register;
12618   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
12619     return CW_Register;
12620 
12621   switch (*constraint) {
12622   default:
12623     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
12624     break;
12625   case 'b':
12626     if (type->isIntegerTy())
12627       weight = CW_Register;
12628     break;
12629   case 'f':
12630     if (type->isFloatTy())
12631       weight = CW_Register;
12632     break;
12633   case 'd':
12634     if (type->isDoubleTy())
12635       weight = CW_Register;
12636     break;
12637   case 'v':
12638     if (type->isVectorTy())
12639       weight = CW_Register;
12640     break;
12641   case 'y':
12642     weight = CW_Register;
12643     break;
12644   case 'Z':
12645     weight = CW_Memory;
12646     break;
12647   }
12648   return weight;
12649 }
12650 
12651 std::pair<unsigned, const TargetRegisterClass *>
12652 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
12653                                                 StringRef Constraint,
12654                                                 MVT VT) const {
12655   if (Constraint.size() == 1) {
12656     // GCC RS6000 Constraint Letters
12657     switch (Constraint[0]) {
12658     case 'b':   // R1-R31
12659       if (VT == MVT::i64 && Subtarget.isPPC64())
12660         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
12661       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
12662     case 'r':   // R0-R31
12663       if (VT == MVT::i64 && Subtarget.isPPC64())
12664         return std::make_pair(0U, &PPC::G8RCRegClass);
12665       return std::make_pair(0U, &PPC::GPRCRegClass);
12666     // 'd' and 'f' constraints are both defined to be "the floating point
12667     // registers", where one is for 32-bit and the other for 64-bit. We don't
12668     // really care overly much here so just give them all the same reg classes.
12669     case 'd':
12670     case 'f':
12671       if (VT == MVT::f32 || VT == MVT::i32)
12672         return std::make_pair(0U, &PPC::F4RCRegClass);
12673       if (VT == MVT::f64 || VT == MVT::i64)
12674         return std::make_pair(0U, &PPC::F8RCRegClass);
12675       if (VT == MVT::v4f64 && Subtarget.hasQPX())
12676         return std::make_pair(0U, &PPC::QFRCRegClass);
12677       if (VT == MVT::v4f32 && Subtarget.hasQPX())
12678         return std::make_pair(0U, &PPC::QSRCRegClass);
12679       break;
12680     case 'v':
12681       if (VT == MVT::v4f64 && Subtarget.hasQPX())
12682         return std::make_pair(0U, &PPC::QFRCRegClass);
12683       if (VT == MVT::v4f32 && Subtarget.hasQPX())
12684         return std::make_pair(0U, &PPC::QSRCRegClass);
12685       if (Subtarget.hasAltivec())
12686         return std::make_pair(0U, &PPC::VRRCRegClass);
12687     case 'y':   // crrc
12688       return std::make_pair(0U, &PPC::CRRCRegClass);
12689     }
12690   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
12691     // An individual CR bit.
12692     return std::make_pair(0U, &PPC::CRBITRCRegClass);
12693   } else if ((Constraint == "wa" || Constraint == "wd" ||
12694              Constraint == "wf") && Subtarget.hasVSX()) {
12695     return std::make_pair(0U, &PPC::VSRCRegClass);
12696   } else if (Constraint == "ws" && Subtarget.hasVSX()) {
12697     if (VT == MVT::f32 && Subtarget.hasP8Vector())
12698       return std::make_pair(0U, &PPC::VSSRCRegClass);
12699     else
12700       return std::make_pair(0U, &PPC::VSFRCRegClass);
12701   }
12702 
12703   std::pair<unsigned, const TargetRegisterClass *> R =
12704       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
12705 
12706   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
12707   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
12708   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
12709   // register.
12710   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
12711   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
12712   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
12713       PPC::GPRCRegClass.contains(R.first))
12714     return std::make_pair(TRI->getMatchingSuperReg(R.first,
12715                             PPC::sub_32, &PPC::G8RCRegClass),
12716                           &PPC::G8RCRegClass);
12717 
12718   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
12719   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
12720     R.first = PPC::CR0;
12721     R.second = &PPC::CRRCRegClass;
12722   }
12723 
12724   return R;
12725 }
12726 
12727 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
12728 /// vector.  If it is invalid, don't add anything to Ops.
12729 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
12730                                                      std::string &Constraint,
12731                                                      std::vector<SDValue>&Ops,
12732                                                      SelectionDAG &DAG) const {
12733   SDValue Result;
12734 
12735   // Only support length 1 constraints.
12736   if (Constraint.length() > 1) return;
12737 
12738   char Letter = Constraint[0];
12739   switch (Letter) {
12740   default: break;
12741   case 'I':
12742   case 'J':
12743   case 'K':
12744   case 'L':
12745   case 'M':
12746   case 'N':
12747   case 'O':
12748   case 'P': {
12749     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
12750     if (!CST) return; // Must be an immediate to match.
12751     SDLoc dl(Op);
12752     int64_t Value = CST->getSExtValue();
12753     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
12754                          // numbers are printed as such.
12755     switch (Letter) {
12756     default: llvm_unreachable("Unknown constraint letter!");
12757     case 'I':  // "I" is a signed 16-bit constant.
12758       if (isInt<16>(Value))
12759         Result = DAG.getTargetConstant(Value, dl, TCVT);
12760       break;
12761     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
12762       if (isShiftedUInt<16, 16>(Value))
12763         Result = DAG.getTargetConstant(Value, dl, TCVT);
12764       break;
12765     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
12766       if (isShiftedInt<16, 16>(Value))
12767         Result = DAG.getTargetConstant(Value, dl, TCVT);
12768       break;
12769     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
12770       if (isUInt<16>(Value))
12771         Result = DAG.getTargetConstant(Value, dl, TCVT);
12772       break;
12773     case 'M':  // "M" is a constant that is greater than 31.
12774       if (Value > 31)
12775         Result = DAG.getTargetConstant(Value, dl, TCVT);
12776       break;
12777     case 'N':  // "N" is a positive constant that is an exact power of two.
12778       if (Value > 0 && isPowerOf2_64(Value))
12779         Result = DAG.getTargetConstant(Value, dl, TCVT);
12780       break;
12781     case 'O':  // "O" is the constant zero.
12782       if (Value == 0)
12783         Result = DAG.getTargetConstant(Value, dl, TCVT);
12784       break;
12785     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
12786       if (isInt<16>(-Value))
12787         Result = DAG.getTargetConstant(Value, dl, TCVT);
12788       break;
12789     }
12790     break;
12791   }
12792   }
12793 
12794   if (Result.getNode()) {
12795     Ops.push_back(Result);
12796     return;
12797   }
12798 
12799   // Handle standard constraint letters.
12800   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
12801 }
12802 
12803 // isLegalAddressingMode - Return true if the addressing mode represented
12804 // by AM is legal for this target, for a load/store of the specified type.
12805 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
12806                                               const AddrMode &AM, Type *Ty,
12807                                               unsigned AS) const {
12808   // PPC does not allow r+i addressing modes for vectors!
12809   if (Ty->isVectorTy() && AM.BaseOffs != 0)
12810     return false;
12811 
12812   // PPC allows a sign-extended 16-bit immediate field.
12813   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
12814     return false;
12815 
12816   // No global is ever allowed as a base.
12817   if (AM.BaseGV)
12818     return false;
12819 
12820   // PPC only support r+r,
12821   switch (AM.Scale) {
12822   case 0:  // "r+i" or just "i", depending on HasBaseReg.
12823     break;
12824   case 1:
12825     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
12826       return false;
12827     // Otherwise we have r+r or r+i.
12828     break;
12829   case 2:
12830     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
12831       return false;
12832     // Allow 2*r as r+r.
12833     break;
12834   default:
12835     // No other scales are supported.
12836     return false;
12837   }
12838 
12839   return true;
12840 }
12841 
12842 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
12843                                            SelectionDAG &DAG) const {
12844   MachineFunction &MF = DAG.getMachineFunction();
12845   MachineFrameInfo &MFI = MF.getFrameInfo();
12846   MFI.setReturnAddressIsTaken(true);
12847 
12848   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
12849     return SDValue();
12850 
12851   SDLoc dl(Op);
12852   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12853 
12854   // Make sure the function does not optimize away the store of the RA to
12855   // the stack.
12856   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
12857   FuncInfo->setLRStoreRequired();
12858   bool isPPC64 = Subtarget.isPPC64();
12859   auto PtrVT = getPointerTy(MF.getDataLayout());
12860 
12861   if (Depth > 0) {
12862     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
12863     SDValue Offset =
12864         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
12865                         isPPC64 ? MVT::i64 : MVT::i32);
12866     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
12867                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
12868                        MachinePointerInfo());
12869   }
12870 
12871   // Just load the return address off the stack.
12872   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
12873   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
12874                      MachinePointerInfo());
12875 }
12876 
12877 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
12878                                           SelectionDAG &DAG) const {
12879   SDLoc dl(Op);
12880   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12881 
12882   MachineFunction &MF = DAG.getMachineFunction();
12883   MachineFrameInfo &MFI = MF.getFrameInfo();
12884   MFI.setFrameAddressIsTaken(true);
12885 
12886   EVT PtrVT = getPointerTy(MF.getDataLayout());
12887   bool isPPC64 = PtrVT == MVT::i64;
12888 
12889   // Naked functions never have a frame pointer, and so we use r1. For all
12890   // other functions, this decision must be delayed until during PEI.
12891   unsigned FrameReg;
12892   if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
12893     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
12894   else
12895     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
12896 
12897   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
12898                                          PtrVT);
12899   while (Depth--)
12900     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
12901                             FrameAddr, MachinePointerInfo());
12902   return FrameAddr;
12903 }
12904 
12905 // FIXME? Maybe this could be a TableGen attribute on some registers and
12906 // this table could be generated automatically from RegInfo.
12907 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
12908                                               SelectionDAG &DAG) const {
12909   bool isPPC64 = Subtarget.isPPC64();
12910   bool isDarwinABI = Subtarget.isDarwinABI();
12911 
12912   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
12913       (!isPPC64 && VT != MVT::i32))
12914     report_fatal_error("Invalid register global variable type");
12915 
12916   bool is64Bit = isPPC64 && VT == MVT::i64;
12917   unsigned Reg = StringSwitch<unsigned>(RegName)
12918                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
12919                    .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
12920                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
12921                                   (is64Bit ? PPC::X13 : PPC::R13))
12922                    .Default(0);
12923 
12924   if (Reg)
12925     return Reg;
12926   report_fatal_error("Invalid register name global variable");
12927 }
12928 
12929 bool
12930 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
12931   // The PowerPC target isn't yet aware of offsets.
12932   return false;
12933 }
12934 
12935 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12936                                            const CallInst &I,
12937                                            unsigned Intrinsic) const {
12938   switch (Intrinsic) {
12939   case Intrinsic::ppc_qpx_qvlfd:
12940   case Intrinsic::ppc_qpx_qvlfs:
12941   case Intrinsic::ppc_qpx_qvlfcd:
12942   case Intrinsic::ppc_qpx_qvlfcs:
12943   case Intrinsic::ppc_qpx_qvlfiwa:
12944   case Intrinsic::ppc_qpx_qvlfiwz:
12945   case Intrinsic::ppc_altivec_lvx:
12946   case Intrinsic::ppc_altivec_lvxl:
12947   case Intrinsic::ppc_altivec_lvebx:
12948   case Intrinsic::ppc_altivec_lvehx:
12949   case Intrinsic::ppc_altivec_lvewx:
12950   case Intrinsic::ppc_vsx_lxvd2x:
12951   case Intrinsic::ppc_vsx_lxvw4x: {
12952     EVT VT;
12953     switch (Intrinsic) {
12954     case Intrinsic::ppc_altivec_lvebx:
12955       VT = MVT::i8;
12956       break;
12957     case Intrinsic::ppc_altivec_lvehx:
12958       VT = MVT::i16;
12959       break;
12960     case Intrinsic::ppc_altivec_lvewx:
12961       VT = MVT::i32;
12962       break;
12963     case Intrinsic::ppc_vsx_lxvd2x:
12964       VT = MVT::v2f64;
12965       break;
12966     case Intrinsic::ppc_qpx_qvlfd:
12967       VT = MVT::v4f64;
12968       break;
12969     case Intrinsic::ppc_qpx_qvlfs:
12970       VT = MVT::v4f32;
12971       break;
12972     case Intrinsic::ppc_qpx_qvlfcd:
12973       VT = MVT::v2f64;
12974       break;
12975     case Intrinsic::ppc_qpx_qvlfcs:
12976       VT = MVT::v2f32;
12977       break;
12978     default:
12979       VT = MVT::v4i32;
12980       break;
12981     }
12982 
12983     Info.opc = ISD::INTRINSIC_W_CHAIN;
12984     Info.memVT = VT;
12985     Info.ptrVal = I.getArgOperand(0);
12986     Info.offset = -VT.getStoreSize()+1;
12987     Info.size = 2*VT.getStoreSize()-1;
12988     Info.align = 1;
12989     Info.vol = false;
12990     Info.readMem = true;
12991     Info.writeMem = false;
12992     return true;
12993   }
12994   case Intrinsic::ppc_qpx_qvlfda:
12995   case Intrinsic::ppc_qpx_qvlfsa:
12996   case Intrinsic::ppc_qpx_qvlfcda:
12997   case Intrinsic::ppc_qpx_qvlfcsa:
12998   case Intrinsic::ppc_qpx_qvlfiwaa:
12999   case Intrinsic::ppc_qpx_qvlfiwza: {
13000     EVT VT;
13001     switch (Intrinsic) {
13002     case Intrinsic::ppc_qpx_qvlfda:
13003       VT = MVT::v4f64;
13004       break;
13005     case Intrinsic::ppc_qpx_qvlfsa:
13006       VT = MVT::v4f32;
13007       break;
13008     case Intrinsic::ppc_qpx_qvlfcda:
13009       VT = MVT::v2f64;
13010       break;
13011     case Intrinsic::ppc_qpx_qvlfcsa:
13012       VT = MVT::v2f32;
13013       break;
13014     default:
13015       VT = MVT::v4i32;
13016       break;
13017     }
13018 
13019     Info.opc = ISD::INTRINSIC_W_CHAIN;
13020     Info.memVT = VT;
13021     Info.ptrVal = I.getArgOperand(0);
13022     Info.offset = 0;
13023     Info.size = VT.getStoreSize();
13024     Info.align = 1;
13025     Info.vol = false;
13026     Info.readMem = true;
13027     Info.writeMem = false;
13028     return true;
13029   }
13030   case Intrinsic::ppc_qpx_qvstfd:
13031   case Intrinsic::ppc_qpx_qvstfs:
13032   case Intrinsic::ppc_qpx_qvstfcd:
13033   case Intrinsic::ppc_qpx_qvstfcs:
13034   case Intrinsic::ppc_qpx_qvstfiw:
13035   case Intrinsic::ppc_altivec_stvx:
13036   case Intrinsic::ppc_altivec_stvxl:
13037   case Intrinsic::ppc_altivec_stvebx:
13038   case Intrinsic::ppc_altivec_stvehx:
13039   case Intrinsic::ppc_altivec_stvewx:
13040   case Intrinsic::ppc_vsx_stxvd2x:
13041   case Intrinsic::ppc_vsx_stxvw4x: {
13042     EVT VT;
13043     switch (Intrinsic) {
13044     case Intrinsic::ppc_altivec_stvebx:
13045       VT = MVT::i8;
13046       break;
13047     case Intrinsic::ppc_altivec_stvehx:
13048       VT = MVT::i16;
13049       break;
13050     case Intrinsic::ppc_altivec_stvewx:
13051       VT = MVT::i32;
13052       break;
13053     case Intrinsic::ppc_vsx_stxvd2x:
13054       VT = MVT::v2f64;
13055       break;
13056     case Intrinsic::ppc_qpx_qvstfd:
13057       VT = MVT::v4f64;
13058       break;
13059     case Intrinsic::ppc_qpx_qvstfs:
13060       VT = MVT::v4f32;
13061       break;
13062     case Intrinsic::ppc_qpx_qvstfcd:
13063       VT = MVT::v2f64;
13064       break;
13065     case Intrinsic::ppc_qpx_qvstfcs:
13066       VT = MVT::v2f32;
13067       break;
13068     default:
13069       VT = MVT::v4i32;
13070       break;
13071     }
13072 
13073     Info.opc = ISD::INTRINSIC_VOID;
13074     Info.memVT = VT;
13075     Info.ptrVal = I.getArgOperand(1);
13076     Info.offset = -VT.getStoreSize()+1;
13077     Info.size = 2*VT.getStoreSize()-1;
13078     Info.align = 1;
13079     Info.vol = false;
13080     Info.readMem = false;
13081     Info.writeMem = true;
13082     return true;
13083   }
13084   case Intrinsic::ppc_qpx_qvstfda:
13085   case Intrinsic::ppc_qpx_qvstfsa:
13086   case Intrinsic::ppc_qpx_qvstfcda:
13087   case Intrinsic::ppc_qpx_qvstfcsa:
13088   case Intrinsic::ppc_qpx_qvstfiwa: {
13089     EVT VT;
13090     switch (Intrinsic) {
13091     case Intrinsic::ppc_qpx_qvstfda:
13092       VT = MVT::v4f64;
13093       break;
13094     case Intrinsic::ppc_qpx_qvstfsa:
13095       VT = MVT::v4f32;
13096       break;
13097     case Intrinsic::ppc_qpx_qvstfcda:
13098       VT = MVT::v2f64;
13099       break;
13100     case Intrinsic::ppc_qpx_qvstfcsa:
13101       VT = MVT::v2f32;
13102       break;
13103     default:
13104       VT = MVT::v4i32;
13105       break;
13106     }
13107 
13108     Info.opc = ISD::INTRINSIC_VOID;
13109     Info.memVT = VT;
13110     Info.ptrVal = I.getArgOperand(1);
13111     Info.offset = 0;
13112     Info.size = VT.getStoreSize();
13113     Info.align = 1;
13114     Info.vol = false;
13115     Info.readMem = false;
13116     Info.writeMem = true;
13117     return true;
13118   }
13119   default:
13120     break;
13121   }
13122 
13123   return false;
13124 }
13125 
13126 /// getOptimalMemOpType - Returns the target specific optimal type for load
13127 /// and store operations as a result of memset, memcpy, and memmove
13128 /// lowering. If DstAlign is zero that means it's safe to destination
13129 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
13130 /// means there isn't a need to check it against alignment requirement,
13131 /// probably because the source does not need to be loaded. If 'IsMemset' is
13132 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
13133 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
13134 /// source is constant so it does not need to be loaded.
13135 /// It returns EVT::Other if the type should be determined using generic
13136 /// target-independent logic.
13137 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
13138                                            unsigned DstAlign, unsigned SrcAlign,
13139                                            bool IsMemset, bool ZeroMemset,
13140                                            bool MemcpyStrSrc,
13141                                            MachineFunction &MF) const {
13142   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
13143     const Function *F = MF.getFunction();
13144     // When expanding a memset, require at least two QPX instructions to cover
13145     // the cost of loading the value to be stored from the constant pool.
13146     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
13147        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
13148         !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
13149       return MVT::v4f64;
13150     }
13151 
13152     // We should use Altivec/VSX loads and stores when available. For unaligned
13153     // addresses, unaligned VSX loads are only fast starting with the P8.
13154     if (Subtarget.hasAltivec() && Size >= 16 &&
13155         (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
13156          ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
13157       return MVT::v4i32;
13158   }
13159 
13160   if (Subtarget.isPPC64()) {
13161     return MVT::i64;
13162   }
13163 
13164   return MVT::i32;
13165 }
13166 
13167 /// \brief Returns true if it is beneficial to convert a load of a constant
13168 /// to just the constant itself.
13169 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
13170                                                           Type *Ty) const {
13171   assert(Ty->isIntegerTy());
13172 
13173   unsigned BitSize = Ty->getPrimitiveSizeInBits();
13174   return !(BitSize == 0 || BitSize > 64);
13175 }
13176 
13177 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
13178   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
13179     return false;
13180   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
13181   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
13182   return NumBits1 == 64 && NumBits2 == 32;
13183 }
13184 
13185 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
13186   if (!VT1.isInteger() || !VT2.isInteger())
13187     return false;
13188   unsigned NumBits1 = VT1.getSizeInBits();
13189   unsigned NumBits2 = VT2.getSizeInBits();
13190   return NumBits1 == 64 && NumBits2 == 32;
13191 }
13192 
13193 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
13194   // Generally speaking, zexts are not free, but they are free when they can be
13195   // folded with other operations.
13196   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
13197     EVT MemVT = LD->getMemoryVT();
13198     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
13199          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
13200         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
13201          LD->getExtensionType() == ISD::ZEXTLOAD))
13202       return true;
13203   }
13204 
13205   // FIXME: Add other cases...
13206   //  - 32-bit shifts with a zext to i64
13207   //  - zext after ctlz, bswap, etc.
13208   //  - zext after and by a constant mask
13209 
13210   return TargetLowering::isZExtFree(Val, VT2);
13211 }
13212 
13213 bool PPCTargetLowering::isFPExtFree(EVT VT) const {
13214   assert(VT.isFloatingPoint());
13215   return true;
13216 }
13217 
13218 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
13219   return isInt<16>(Imm) || isUInt<16>(Imm);
13220 }
13221 
13222 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
13223   return isInt<16>(Imm) || isUInt<16>(Imm);
13224 }
13225 
13226 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
13227                                                        unsigned,
13228                                                        unsigned,
13229                                                        bool *Fast) const {
13230   if (DisablePPCUnaligned)
13231     return false;
13232 
13233   // PowerPC supports unaligned memory access for simple non-vector types.
13234   // Although accessing unaligned addresses is not as efficient as accessing
13235   // aligned addresses, it is generally more efficient than manual expansion,
13236   // and generally only traps for software emulation when crossing page
13237   // boundaries.
13238 
13239   if (!VT.isSimple())
13240     return false;
13241 
13242   if (VT.getSimpleVT().isVector()) {
13243     if (Subtarget.hasVSX()) {
13244       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
13245           VT != MVT::v4f32 && VT != MVT::v4i32)
13246         return false;
13247     } else {
13248       return false;
13249     }
13250   }
13251 
13252   if (VT == MVT::ppcf128)
13253     return false;
13254 
13255   if (Fast)
13256     *Fast = true;
13257 
13258   return true;
13259 }
13260 
13261 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
13262   VT = VT.getScalarType();
13263 
13264   if (!VT.isSimple())
13265     return false;
13266 
13267   switch (VT.getSimpleVT().SimpleTy) {
13268   case MVT::f32:
13269   case MVT::f64:
13270     return true;
13271   default:
13272     break;
13273   }
13274 
13275   return false;
13276 }
13277 
13278 const MCPhysReg *
13279 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
13280   // LR is a callee-save register, but we must treat it as clobbered by any call
13281   // site. Hence we include LR in the scratch registers, which are in turn added
13282   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
13283   // to CTR, which is used by any indirect call.
13284   static const MCPhysReg ScratchRegs[] = {
13285     PPC::X12, PPC::LR8, PPC::CTR8, 0
13286   };
13287 
13288   return ScratchRegs;
13289 }
13290 
13291 unsigned PPCTargetLowering::getExceptionPointerRegister(
13292     const Constant *PersonalityFn) const {
13293   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
13294 }
13295 
13296 unsigned PPCTargetLowering::getExceptionSelectorRegister(
13297     const Constant *PersonalityFn) const {
13298   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
13299 }
13300 
13301 bool
13302 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
13303                      EVT VT , unsigned DefinedValues) const {
13304   if (VT == MVT::v2i64)
13305     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
13306 
13307   if (Subtarget.hasVSX() || Subtarget.hasQPX())
13308     return true;
13309 
13310   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
13311 }
13312 
13313 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
13314   if (DisableILPPref || Subtarget.enableMachineScheduler())
13315     return TargetLowering::getSchedulingPreference(N);
13316 
13317   return Sched::ILP;
13318 }
13319 
13320 // Create a fast isel object.
13321 FastISel *
13322 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
13323                                   const TargetLibraryInfo *LibInfo) const {
13324   return PPC::createFastISel(FuncInfo, LibInfo);
13325 }
13326 
13327 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
13328   if (Subtarget.isDarwinABI()) return;
13329   if (!Subtarget.isPPC64()) return;
13330 
13331   // Update IsSplitCSR in PPCFunctionInfo
13332   PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
13333   PFI->setIsSplitCSR(true);
13334 }
13335 
13336 void PPCTargetLowering::insertCopiesSplitCSR(
13337   MachineBasicBlock *Entry,
13338   const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
13339   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13340   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
13341   if (!IStart)
13342     return;
13343 
13344   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13345   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
13346   MachineBasicBlock::iterator MBBI = Entry->begin();
13347   for (const MCPhysReg *I = IStart; *I; ++I) {
13348     const TargetRegisterClass *RC = nullptr;
13349     if (PPC::G8RCRegClass.contains(*I))
13350       RC = &PPC::G8RCRegClass;
13351     else if (PPC::F8RCRegClass.contains(*I))
13352       RC = &PPC::F8RCRegClass;
13353     else if (PPC::CRRCRegClass.contains(*I))
13354       RC = &PPC::CRRCRegClass;
13355     else if (PPC::VRRCRegClass.contains(*I))
13356       RC = &PPC::VRRCRegClass;
13357     else
13358       llvm_unreachable("Unexpected register class in CSRsViaCopy!");
13359 
13360     unsigned NewVR = MRI->createVirtualRegister(RC);
13361     // Create copy from CSR to a virtual register.
13362     // FIXME: this currently does not emit CFI pseudo-instructions, it works
13363     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
13364     // nounwind. If we want to generalize this later, we may need to emit
13365     // CFI pseudo-instructions.
13366     assert(Entry->getParent()->getFunction()->hasFnAttribute(
13367              Attribute::NoUnwind) &&
13368            "Function should be nounwind in insertCopiesSplitCSR!");
13369     Entry->addLiveIn(*I);
13370     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
13371       .addReg(*I);
13372 
13373     // Insert the copy-back instructions right before the terminator
13374     for (auto *Exit : Exits)
13375       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
13376               TII->get(TargetOpcode::COPY), *I)
13377         .addReg(NewVR);
13378   }
13379 }
13380 
13381 // Override to enable LOAD_STACK_GUARD lowering on Linux.
13382 bool PPCTargetLowering::useLoadStackGuardNode() const {
13383   if (!Subtarget.isTargetLinux())
13384     return TargetLowering::useLoadStackGuardNode();
13385   return true;
13386 }
13387 
13388 // Override to disable global variable loading on Linux.
13389 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
13390   if (!Subtarget.isTargetLinux())
13391     return TargetLowering::insertSSPDeclarations(M);
13392 }
13393 
13394 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
13395   if (!VT.isSimple() || !Subtarget.hasVSX())
13396     return false;
13397 
13398   switch(VT.getSimpleVT().SimpleTy) {
13399   default:
13400     // For FP types that are currently not supported by PPC backend, return
13401     // false. Examples: f16, f80.
13402     return false;
13403   case MVT::f32:
13404   case MVT::f64:
13405   case MVT::ppcf128:
13406     return Imm.isPosZero();
13407   }
13408 }
13409 
13410 // For vector shift operation op, fold
13411 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
13412 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
13413                                   SelectionDAG &DAG) {
13414   SDValue N0 = N->getOperand(0);
13415   SDValue N1 = N->getOperand(1);
13416   EVT VT = N0.getValueType();
13417   unsigned OpSizeInBits = VT.getScalarSizeInBits();
13418   unsigned Opcode = N->getOpcode();
13419   unsigned TargetOpcode;
13420 
13421   switch (Opcode) {
13422   default:
13423     llvm_unreachable("Unexpected shift operation");
13424   case ISD::SHL:
13425     TargetOpcode = PPCISD::SHL;
13426     break;
13427   case ISD::SRL:
13428     TargetOpcode = PPCISD::SRL;
13429     break;
13430   case ISD::SRA:
13431     TargetOpcode = PPCISD::SRA;
13432     break;
13433   }
13434 
13435   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
13436       N1->getOpcode() == ISD::AND)
13437     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
13438       if (Mask->getZExtValue() == OpSizeInBits - 1)
13439         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
13440 
13441   return SDValue();
13442 }
13443 
13444 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
13445   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13446     return Value;
13447 
13448   return SDValue();
13449 }
13450 
13451 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
13452   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13453     return Value;
13454 
13455   return SDValue();
13456 }
13457 
13458 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
13459   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13460     return Value;
13461 
13462   return SDValue();
13463 }
13464