1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PPCISelLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPCISelLowering.h"
15 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPC.h"
17 #include "PPCCCState.h"
18 #include "PPCCallingConv.h"
19 #include "PPCFrameLowering.h"
20 #include "PPCInstrInfo.h"
21 #include "PPCMachineFunctionInfo.h"
22 #include "PPCPerfectShuffle.h"
23 #include "PPCRegisterInfo.h"
24 #include "PPCSubtarget.h"
25 #include "PPCTargetMachine.h"
26 #include "llvm/ADT/APFloat.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/None.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/ADT/StringSwitch.h"
38 #include "llvm/CodeGen/CallingConvLower.h"
39 #include "llvm/CodeGen/ISDOpcodes.h"
40 #include "llvm/CodeGen/MachineBasicBlock.h"
41 #include "llvm/CodeGen/MachineFrameInfo.h"
42 #include "llvm/CodeGen/MachineFunction.h"
43 #include "llvm/CodeGen/MachineInstr.h"
44 #include "llvm/CodeGen/MachineInstrBuilder.h"
45 #include "llvm/CodeGen/MachineJumpTableInfo.h"
46 #include "llvm/CodeGen/MachineLoopInfo.h"
47 #include "llvm/CodeGen/MachineMemOperand.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/MachineValueType.h"
51 #include "llvm/CodeGen/RuntimeLibcalls.h"
52 #include "llvm/CodeGen/SelectionDAG.h"
53 #include "llvm/CodeGen/SelectionDAGNodes.h"
54 #include "llvm/CodeGen/ValueTypes.h"
55 #include "llvm/IR/CallSite.h"
56 #include "llvm/IR/CallingConv.h"
57 #include "llvm/IR/Constant.h"
58 #include "llvm/IR/Constants.h"
59 #include "llvm/IR/DataLayout.h"
60 #include "llvm/IR/DebugLoc.h"
61 #include "llvm/IR/DerivedTypes.h"
62 #include "llvm/IR/Function.h"
63 #include "llvm/IR/GlobalValue.h"
64 #include "llvm/IR/IRBuilder.h"
65 #include "llvm/IR/Instructions.h"
66 #include "llvm/IR/Intrinsics.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/IR/Type.h"
69 #include "llvm/IR/Use.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/MC/MCExpr.h"
72 #include "llvm/MC/MCRegisterInfo.h"
73 #include "llvm/Support/AtomicOrdering.h"
74 #include "llvm/Support/BranchProbability.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/CodeGen.h"
77 #include "llvm/Support/CommandLine.h"
78 #include "llvm/Support/Compiler.h"
79 #include "llvm/Support/Debug.h"
80 #include "llvm/Support/ErrorHandling.h"
81 #include "llvm/Support/Format.h"
82 #include "llvm/Support/KnownBits.h"
83 #include "llvm/Support/MathExtras.h"
84 #include "llvm/Support/raw_ostream.h"
85 #include "llvm/Target/TargetInstrInfo.h"
86 #include "llvm/Target/TargetLowering.h"
87 #include "llvm/Target/TargetMachine.h"
88 #include "llvm/Target/TargetOptions.h"
89 #include "llvm/Target/TargetRegisterInfo.h"
90 #include <algorithm>
91 #include <cassert>
92 #include <cstdint>
93 #include <iterator>
94 #include <list>
95 #include <utility>
96 #include <vector>
97 
98 using namespace llvm;
99 
100 #define DEBUG_TYPE "ppc-lowering"
101 
102 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
103 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
104 
105 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
106 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
107 
108 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
109 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
110 
111 static cl::opt<bool> DisableSCO("disable-ppc-sco",
112 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
113 
114 STATISTIC(NumTailCalls, "Number of tail calls");
115 STATISTIC(NumSiblingCalls, "Number of sibling calls");
116 
117 // FIXME: Remove this once the bug has been fixed!
118 extern cl::opt<bool> ANDIGlueBug;
119 
120 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
121                                      const PPCSubtarget &STI)
122     : TargetLowering(TM), Subtarget(STI) {
123   // Use _setjmp/_longjmp instead of setjmp/longjmp.
124   setUseUnderscoreSetJmp(true);
125   setUseUnderscoreLongJmp(true);
126 
127   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
128   // arguments are at least 4/8 bytes aligned.
129   bool isPPC64 = Subtarget.isPPC64();
130   setMinStackArgumentAlignment(isPPC64 ? 8:4);
131 
132   // Set up the register classes.
133   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
134   if (!useSoftFloat()) {
135     addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
136     addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
137   }
138 
139   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
140   for (MVT VT : MVT::integer_valuetypes()) {
141     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
142     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
143   }
144 
145   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
146 
147   // PowerPC has pre-inc load and store's.
148   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
149   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
150   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
151   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
152   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
153   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
154   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
155   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
156   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
157   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
158   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
159   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
160   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
161   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
162 
163   if (Subtarget.useCRBits()) {
164     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
165 
166     if (isPPC64 || Subtarget.hasFPCVT()) {
167       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
168       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
169                          isPPC64 ? MVT::i64 : MVT::i32);
170       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
171       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
172                         isPPC64 ? MVT::i64 : MVT::i32);
173     } else {
174       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
175       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
176     }
177 
178     // PowerPC does not support direct load/store of condition registers.
179     setOperationAction(ISD::LOAD, MVT::i1, Custom);
180     setOperationAction(ISD::STORE, MVT::i1, Custom);
181 
182     // FIXME: Remove this once the ANDI glue bug is fixed:
183     if (ANDIGlueBug)
184       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
185 
186     for (MVT VT : MVT::integer_valuetypes()) {
187       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
188       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
189       setTruncStoreAction(VT, MVT::i1, Expand);
190     }
191 
192     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
193   }
194 
195   // This is used in the ppcf128->int sequence.  Note it has different semantics
196   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
197   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
198 
199   // We do not currently implement these libm ops for PowerPC.
200   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
201   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
202   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
203   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
204   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
205   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
206 
207   // PowerPC has no SREM/UREM instructions unless we are on P9
208   // On P9 we may use a hardware instruction to compute the remainder.
209   // The instructions are not legalized directly because in the cases where the
210   // result of both the remainder and the division is required it is more
211   // efficient to compute the remainder from the result of the division rather
212   // than use the remainder instruction.
213   if (Subtarget.isISA3_0()) {
214     setOperationAction(ISD::SREM, MVT::i32, Custom);
215     setOperationAction(ISD::UREM, MVT::i32, Custom);
216     setOperationAction(ISD::SREM, MVT::i64, Custom);
217     setOperationAction(ISD::UREM, MVT::i64, Custom);
218   } else {
219     setOperationAction(ISD::SREM, MVT::i32, Expand);
220     setOperationAction(ISD::UREM, MVT::i32, Expand);
221     setOperationAction(ISD::SREM, MVT::i64, Expand);
222     setOperationAction(ISD::UREM, MVT::i64, Expand);
223   }
224 
225   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
226   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
227   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
228   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
229   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
230   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
231   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
232   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
233   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
234 
235   // We don't support sin/cos/sqrt/fmod/pow
236   setOperationAction(ISD::FSIN , MVT::f64, Expand);
237   setOperationAction(ISD::FCOS , MVT::f64, Expand);
238   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
239   setOperationAction(ISD::FREM , MVT::f64, Expand);
240   setOperationAction(ISD::FPOW , MVT::f64, Expand);
241   setOperationAction(ISD::FMA  , MVT::f64, Legal);
242   setOperationAction(ISD::FSIN , MVT::f32, Expand);
243   setOperationAction(ISD::FCOS , MVT::f32, Expand);
244   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
245   setOperationAction(ISD::FREM , MVT::f32, Expand);
246   setOperationAction(ISD::FPOW , MVT::f32, Expand);
247   setOperationAction(ISD::FMA  , MVT::f32, Legal);
248 
249   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
250 
251   // If we're enabling GP optimizations, use hardware square root
252   if (!Subtarget.hasFSQRT() &&
253       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
254         Subtarget.hasFRE()))
255     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
256 
257   if (!Subtarget.hasFSQRT() &&
258       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
259         Subtarget.hasFRES()))
260     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
261 
262   if (Subtarget.hasFCPSGN()) {
263     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
264     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
265   } else {
266     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
267     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
268   }
269 
270   if (Subtarget.hasFPRND()) {
271     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
272     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
273     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
274     setOperationAction(ISD::FROUND, MVT::f64, Legal);
275 
276     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
277     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
278     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
279     setOperationAction(ISD::FROUND, MVT::f32, Legal);
280   }
281 
282   // PowerPC does not have BSWAP
283   // CTPOP or CTTZ were introduced in P8/P9 respectivelly
284   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
285   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
286   if (Subtarget.isISA3_0()) {
287     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
288     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
289   } else {
290     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
291     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
292   }
293 
294   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
295     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
296     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
297   } else {
298     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
299     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
300   }
301 
302   // PowerPC does not have ROTR
303   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
304   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
305 
306   if (!Subtarget.useCRBits()) {
307     // PowerPC does not have Select
308     setOperationAction(ISD::SELECT, MVT::i32, Expand);
309     setOperationAction(ISD::SELECT, MVT::i64, Expand);
310     setOperationAction(ISD::SELECT, MVT::f32, Expand);
311     setOperationAction(ISD::SELECT, MVT::f64, Expand);
312   }
313 
314   // PowerPC wants to turn select_cc of FP into fsel when possible.
315   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
316   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
317 
318   // PowerPC wants to optimize integer setcc a bit
319   if (!Subtarget.useCRBits())
320     setOperationAction(ISD::SETCC, MVT::i32, Custom);
321 
322   // PowerPC does not have BRCOND which requires SetCC
323   if (!Subtarget.useCRBits())
324     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
325 
326   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
327 
328   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
329   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
330 
331   // PowerPC does not have [U|S]INT_TO_FP
332   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
333   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
334 
335   if (Subtarget.hasDirectMove() && isPPC64) {
336     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
337     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
338     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
339     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
340   } else {
341     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
342     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
343     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
344     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
345   }
346 
347   // We cannot sextinreg(i1).  Expand to shifts.
348   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
349 
350   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
351   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
352   // support continuation, user-level threading, and etc.. As a result, no
353   // other SjLj exception interfaces are implemented and please don't build
354   // your own exception handling based on them.
355   // LLVM/Clang supports zero-cost DWARF exception handling.
356   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
357   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
358 
359   // We want to legalize GlobalAddress and ConstantPool nodes into the
360   // appropriate instructions to materialize the address.
361   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
362   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
363   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
364   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
365   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
366   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
367   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
368   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
369   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
370   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
371 
372   // TRAP is legal.
373   setOperationAction(ISD::TRAP, MVT::Other, Legal);
374 
375   // TRAMPOLINE is custom lowered.
376   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
377   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
378 
379   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
380   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
381 
382   if (Subtarget.isSVR4ABI()) {
383     if (isPPC64) {
384       // VAARG always uses double-word chunks, so promote anything smaller.
385       setOperationAction(ISD::VAARG, MVT::i1, Promote);
386       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
387       setOperationAction(ISD::VAARG, MVT::i8, Promote);
388       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
389       setOperationAction(ISD::VAARG, MVT::i16, Promote);
390       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
391       setOperationAction(ISD::VAARG, MVT::i32, Promote);
392       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
393       setOperationAction(ISD::VAARG, MVT::Other, Expand);
394     } else {
395       // VAARG is custom lowered with the 32-bit SVR4 ABI.
396       setOperationAction(ISD::VAARG, MVT::Other, Custom);
397       setOperationAction(ISD::VAARG, MVT::i64, Custom);
398     }
399   } else
400     setOperationAction(ISD::VAARG, MVT::Other, Expand);
401 
402   if (Subtarget.isSVR4ABI() && !isPPC64)
403     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
404     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
405   else
406     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
407 
408   // Use the default implementation.
409   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
410   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
411   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
412   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
413   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
414   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
415   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
416   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
417   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
418 
419   // We want to custom lower some of our intrinsics.
420   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
421 
422   // To handle counter-based loop conditions.
423   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
424 
425   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
426   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
427   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
428   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
429 
430   // Comparisons that require checking two conditions.
431   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
432   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
433   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
434   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
435   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
436   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
437   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
438   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
439   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
440   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
441   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
442   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
443 
444   if (Subtarget.has64BitSupport()) {
445     // They also have instructions for converting between i64 and fp.
446     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
447     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
448     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
449     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
450     // This is just the low 32 bits of a (signed) fp->i64 conversion.
451     // We cannot do this with Promote because i64 is not a legal type.
452     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
453 
454     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
455       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
456   } else {
457     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
458     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
459   }
460 
461   // With the instructions enabled under FPCVT, we can do everything.
462   if (Subtarget.hasFPCVT()) {
463     if (Subtarget.has64BitSupport()) {
464       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
465       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
466       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
467       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
468     }
469 
470     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
471     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
472     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
473     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
474   }
475 
476   if (Subtarget.use64BitRegs()) {
477     // 64-bit PowerPC implementations can support i64 types directly
478     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
479     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
480     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
481     // 64-bit PowerPC wants to expand i128 shifts itself.
482     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
483     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
484     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
485   } else {
486     // 32-bit PowerPC wants to expand i64 shifts itself.
487     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
488     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
489     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
490   }
491 
492   if (Subtarget.hasAltivec()) {
493     // First set operation action for all vector types to expand. Then we
494     // will selectively turn on ones that can be effectively codegen'd.
495     for (MVT VT : MVT::vector_valuetypes()) {
496       // add/sub are legal for all supported vector VT's.
497       setOperationAction(ISD::ADD, VT, Legal);
498       setOperationAction(ISD::SUB, VT, Legal);
499 
500       // Vector instructions introduced in P8
501       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
502         setOperationAction(ISD::CTPOP, VT, Legal);
503         setOperationAction(ISD::CTLZ, VT, Legal);
504       }
505       else {
506         setOperationAction(ISD::CTPOP, VT, Expand);
507         setOperationAction(ISD::CTLZ, VT, Expand);
508       }
509 
510       // Vector instructions introduced in P9
511       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
512         setOperationAction(ISD::CTTZ, VT, Legal);
513       else
514         setOperationAction(ISD::CTTZ, VT, Expand);
515 
516       // We promote all shuffles to v16i8.
517       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
518       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
519 
520       // We promote all non-typed operations to v4i32.
521       setOperationAction(ISD::AND   , VT, Promote);
522       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
523       setOperationAction(ISD::OR    , VT, Promote);
524       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
525       setOperationAction(ISD::XOR   , VT, Promote);
526       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
527       setOperationAction(ISD::LOAD  , VT, Promote);
528       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
529       setOperationAction(ISD::SELECT, VT, Promote);
530       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
531       setOperationAction(ISD::SELECT_CC, VT, Promote);
532       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
533       setOperationAction(ISD::STORE, VT, Promote);
534       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
535 
536       // No other operations are legal.
537       setOperationAction(ISD::MUL , VT, Expand);
538       setOperationAction(ISD::SDIV, VT, Expand);
539       setOperationAction(ISD::SREM, VT, Expand);
540       setOperationAction(ISD::UDIV, VT, Expand);
541       setOperationAction(ISD::UREM, VT, Expand);
542       setOperationAction(ISD::FDIV, VT, Expand);
543       setOperationAction(ISD::FREM, VT, Expand);
544       setOperationAction(ISD::FNEG, VT, Expand);
545       setOperationAction(ISD::FSQRT, VT, Expand);
546       setOperationAction(ISD::FLOG, VT, Expand);
547       setOperationAction(ISD::FLOG10, VT, Expand);
548       setOperationAction(ISD::FLOG2, VT, Expand);
549       setOperationAction(ISD::FEXP, VT, Expand);
550       setOperationAction(ISD::FEXP2, VT, Expand);
551       setOperationAction(ISD::FSIN, VT, Expand);
552       setOperationAction(ISD::FCOS, VT, Expand);
553       setOperationAction(ISD::FABS, VT, Expand);
554       setOperationAction(ISD::FFLOOR, VT, Expand);
555       setOperationAction(ISD::FCEIL,  VT, Expand);
556       setOperationAction(ISD::FTRUNC, VT, Expand);
557       setOperationAction(ISD::FRINT,  VT, Expand);
558       setOperationAction(ISD::FNEARBYINT, VT, Expand);
559       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
560       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
561       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
562       setOperationAction(ISD::MULHU, VT, Expand);
563       setOperationAction(ISD::MULHS, VT, Expand);
564       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
565       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
566       setOperationAction(ISD::UDIVREM, VT, Expand);
567       setOperationAction(ISD::SDIVREM, VT, Expand);
568       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
569       setOperationAction(ISD::FPOW, VT, Expand);
570       setOperationAction(ISD::BSWAP, VT, Expand);
571       setOperationAction(ISD::VSELECT, VT, Expand);
572       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
573       setOperationAction(ISD::ROTL, VT, Expand);
574       setOperationAction(ISD::ROTR, VT, Expand);
575 
576       for (MVT InnerVT : MVT::vector_valuetypes()) {
577         setTruncStoreAction(VT, InnerVT, Expand);
578         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
579         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
580         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
581       }
582     }
583 
584     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
585     // with merges, splats, etc.
586     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
587 
588     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
589     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
590     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
591     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
592     setOperationAction(ISD::SELECT, MVT::v4i32,
593                        Subtarget.useCRBits() ? Legal : Expand);
594     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
595     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
596     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
597     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
598     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
599     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
600     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
601     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
602     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
603 
604     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
605     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
606     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
607     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
608 
609     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
610     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
611 
612     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
613       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
614       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
615     }
616 
617     if (Subtarget.hasP8Altivec())
618       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
619     else
620       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
621 
622     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
623     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
624 
625     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
626     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
627 
628     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
629     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
630     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
631     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
632 
633     // Altivec does not contain unordered floating-point compare instructions
634     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
635     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
636     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
637     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
638 
639     if (Subtarget.hasVSX()) {
640       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
641       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
642       if (Subtarget.hasP8Vector()) {
643         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
644         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
645       }
646       if (Subtarget.hasDirectMove() && isPPC64) {
647         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
648         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
649         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
650         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
651         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
652         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
653         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
654         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
655       }
656       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
657 
658       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
659       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
660       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
661       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
662       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
663 
664       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
665 
666       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
667       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
668 
669       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
670       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
671 
672       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
673       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
674       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
675       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
676       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
677 
678       // Share the Altivec comparison restrictions.
679       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
680       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
681       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
682       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
683 
684       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
685       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
686 
687       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
688 
689       if (Subtarget.hasP8Vector())
690         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
691 
692       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
693 
694       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
695       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
696       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
697 
698       if (Subtarget.hasP8Altivec()) {
699         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
700         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
701         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
702 
703         // 128 bit shifts can be accomplished via 3 instructions for SHL and
704         // SRL, but not for SRA because of the instructions available:
705         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
706         // doing
707         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
708         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
709         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
710 
711         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
712       }
713       else {
714         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
715         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
716         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
717 
718         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
719 
720         // VSX v2i64 only supports non-arithmetic operations.
721         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
722         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
723       }
724 
725       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
726       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
727       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
728       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
729 
730       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
731 
732       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
733       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
734       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
735       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
736 
737       // Vector operation legalization checks the result type of
738       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
739       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
740       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
741       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
742       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
743 
744       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
745       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
746       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
747       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
748 
749       if (Subtarget.hasDirectMove())
750         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
751       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
752 
753       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
754     }
755 
756     if (Subtarget.hasP8Altivec()) {
757       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
758       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
759     }
760 
761     if (Subtarget.hasP9Vector()) {
762       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
763       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
764 
765       // 128 bit shifts can be accomplished via 3 instructions for SHL and
766       // SRL, but not for SRA because of the instructions available:
767       // VS{RL} and VS{RL}O.
768       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
769       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
770       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
771     }
772   }
773 
774   if (Subtarget.hasQPX()) {
775     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
776     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
777     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
778     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
779 
780     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
781     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
782 
783     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
784     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
785 
786     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
787     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
788 
789     if (!Subtarget.useCRBits())
790       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
791     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
792 
793     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
794     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
795     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
796     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
797     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
798     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
799     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
800 
801     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
802     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
803 
804     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
805     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
806     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
807 
808     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
809     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
810     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
811     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
812     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
813     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
814     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
815     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
816     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
817     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
818 
819     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
820     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
821 
822     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
823     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
824 
825     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
826 
827     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
828     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
829     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
830     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
831 
832     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
833     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
834 
835     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
836     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
837 
838     if (!Subtarget.useCRBits())
839       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
840     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
841 
842     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
843     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
844     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
845     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
846     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
847     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
848     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
849 
850     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
851     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
852 
853     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
854     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
855     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
856     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
857     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
858     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
859     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
860     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
861     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
862     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
863 
864     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
865     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
866 
867     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
868     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
869 
870     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
871 
872     setOperationAction(ISD::AND , MVT::v4i1, Legal);
873     setOperationAction(ISD::OR , MVT::v4i1, Legal);
874     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
875 
876     if (!Subtarget.useCRBits())
877       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
878     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
879 
880     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
881     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
882 
883     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
884     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
885     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
886     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
887     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
888     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
889     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
890 
891     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
892     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
893 
894     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
895 
896     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
897     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
898     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
899     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
900 
901     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
902     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
903     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
904     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
905 
906     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
907     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
908 
909     // These need to set FE_INEXACT, and so cannot be vectorized here.
910     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
911     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
912 
913     if (TM.Options.UnsafeFPMath) {
914       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
915       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
916 
917       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
918       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
919     } else {
920       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
921       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
922 
923       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
924       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
925     }
926   }
927 
928   if (Subtarget.has64BitSupport())
929     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
930 
931   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
932 
933   if (!isPPC64) {
934     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
935     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
936   }
937 
938   setBooleanContents(ZeroOrOneBooleanContent);
939 
940   if (Subtarget.hasAltivec()) {
941     // Altivec instructions set fields to all zeros or all ones.
942     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
943   }
944 
945   if (!isPPC64) {
946     // These libcalls are not available in 32-bit.
947     setLibcallName(RTLIB::SHL_I128, nullptr);
948     setLibcallName(RTLIB::SRL_I128, nullptr);
949     setLibcallName(RTLIB::SRA_I128, nullptr);
950   }
951 
952   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
953 
954   // We have target-specific dag combine patterns for the following nodes:
955   setTargetDAGCombine(ISD::SHL);
956   setTargetDAGCombine(ISD::SRA);
957   setTargetDAGCombine(ISD::SRL);
958   setTargetDAGCombine(ISD::SINT_TO_FP);
959   setTargetDAGCombine(ISD::BUILD_VECTOR);
960   if (Subtarget.hasFPCVT())
961     setTargetDAGCombine(ISD::UINT_TO_FP);
962   setTargetDAGCombine(ISD::LOAD);
963   setTargetDAGCombine(ISD::STORE);
964   setTargetDAGCombine(ISD::BR_CC);
965   if (Subtarget.useCRBits())
966     setTargetDAGCombine(ISD::BRCOND);
967   setTargetDAGCombine(ISD::BSWAP);
968   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
969   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
970   setTargetDAGCombine(ISD::INTRINSIC_VOID);
971 
972   setTargetDAGCombine(ISD::SIGN_EXTEND);
973   setTargetDAGCombine(ISD::ZERO_EXTEND);
974   setTargetDAGCombine(ISD::ANY_EXTEND);
975 
976   if (Subtarget.useCRBits()) {
977     setTargetDAGCombine(ISD::TRUNCATE);
978     setTargetDAGCombine(ISD::SETCC);
979     setTargetDAGCombine(ISD::SELECT_CC);
980   }
981 
982   // Use reciprocal estimates.
983   if (TM.Options.UnsafeFPMath) {
984     setTargetDAGCombine(ISD::FDIV);
985     setTargetDAGCombine(ISD::FSQRT);
986   }
987 
988   // Darwin long double math library functions have $LDBL128 appended.
989   if (Subtarget.isDarwin()) {
990     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
991     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
992     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
993     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
994     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
995     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
996     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
997     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
998     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
999     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1000   }
1001 
1002   // With 32 condition bits, we don't need to sink (and duplicate) compares
1003   // aggressively in CodeGenPrep.
1004   if (Subtarget.useCRBits()) {
1005     setHasMultipleConditionRegisters();
1006     setJumpIsExpensive();
1007   }
1008 
1009   setMinFunctionAlignment(2);
1010   if (Subtarget.isDarwin())
1011     setPrefFunctionAlignment(4);
1012 
1013   switch (Subtarget.getDarwinDirective()) {
1014   default: break;
1015   case PPC::DIR_970:
1016   case PPC::DIR_A2:
1017   case PPC::DIR_E500mc:
1018   case PPC::DIR_E5500:
1019   case PPC::DIR_PWR4:
1020   case PPC::DIR_PWR5:
1021   case PPC::DIR_PWR5X:
1022   case PPC::DIR_PWR6:
1023   case PPC::DIR_PWR6X:
1024   case PPC::DIR_PWR7:
1025   case PPC::DIR_PWR8:
1026   case PPC::DIR_PWR9:
1027     setPrefFunctionAlignment(4);
1028     setPrefLoopAlignment(4);
1029     break;
1030   }
1031 
1032   if (Subtarget.enableMachineScheduler())
1033     setSchedulingPreference(Sched::Source);
1034   else
1035     setSchedulingPreference(Sched::Hybrid);
1036 
1037   computeRegisterProperties(STI.getRegisterInfo());
1038 
1039   // The Freescale cores do better with aggressive inlining of memcpy and
1040   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1041   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1042       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
1043     MaxStoresPerMemset = 32;
1044     MaxStoresPerMemsetOptSize = 16;
1045     MaxStoresPerMemcpy = 32;
1046     MaxStoresPerMemcpyOptSize = 8;
1047     MaxStoresPerMemmove = 32;
1048     MaxStoresPerMemmoveOptSize = 8;
1049   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1050     // The A2 also benefits from (very) aggressive inlining of memcpy and
1051     // friends. The overhead of a the function call, even when warm, can be
1052     // over one hundred cycles.
1053     MaxStoresPerMemset = 128;
1054     MaxStoresPerMemcpy = 128;
1055     MaxStoresPerMemmove = 128;
1056     MaxLoadsPerMemcmp = 128;
1057   } else {
1058     MaxLoadsPerMemcmp = 8;
1059     MaxLoadsPerMemcmpOptSize = 4;
1060   }
1061 }
1062 
1063 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1064 /// the desired ByVal argument alignment.
1065 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1066                              unsigned MaxMaxAlign) {
1067   if (MaxAlign == MaxMaxAlign)
1068     return;
1069   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1070     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
1071       MaxAlign = 32;
1072     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1073       MaxAlign = 16;
1074   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1075     unsigned EltAlign = 0;
1076     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1077     if (EltAlign > MaxAlign)
1078       MaxAlign = EltAlign;
1079   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1080     for (auto *EltTy : STy->elements()) {
1081       unsigned EltAlign = 0;
1082       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1083       if (EltAlign > MaxAlign)
1084         MaxAlign = EltAlign;
1085       if (MaxAlign == MaxMaxAlign)
1086         break;
1087     }
1088   }
1089 }
1090 
1091 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1092 /// function arguments in the caller parameter area.
1093 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1094                                                   const DataLayout &DL) const {
1095   // Darwin passes everything on 4 byte boundary.
1096   if (Subtarget.isDarwin())
1097     return 4;
1098 
1099   // 16byte and wider vectors are passed on 16byte boundary.
1100   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1101   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1102   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1103     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1104   return Align;
1105 }
1106 
1107 bool PPCTargetLowering::useSoftFloat() const {
1108   return Subtarget.useSoftFloat();
1109 }
1110 
1111 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1112   switch ((PPCISD::NodeType)Opcode) {
1113   case PPCISD::FIRST_NUMBER:    break;
1114   case PPCISD::FSEL:            return "PPCISD::FSEL";
1115   case PPCISD::FCFID:           return "PPCISD::FCFID";
1116   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1117   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1118   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1119   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1120   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1121   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1122   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1123   case PPCISD::FRE:             return "PPCISD::FRE";
1124   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1125   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1126   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1127   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1128   case PPCISD::VPERM:           return "PPCISD::VPERM";
1129   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1130   case PPCISD::XXINSERT:        return "PPCISD::XXINSERT";
1131   case PPCISD::XXREVERSE:       return "PPCISD::XXREVERSE";
1132   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1133   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1134   case PPCISD::CMPB:            return "PPCISD::CMPB";
1135   case PPCISD::Hi:              return "PPCISD::Hi";
1136   case PPCISD::Lo:              return "PPCISD::Lo";
1137   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1138   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1139   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1140   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1141   case PPCISD::SRL:             return "PPCISD::SRL";
1142   case PPCISD::SRA:             return "PPCISD::SRA";
1143   case PPCISD::SHL:             return "PPCISD::SHL";
1144   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1145   case PPCISD::CALL:            return "PPCISD::CALL";
1146   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1147   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1148   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1149   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1150   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1151   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1152   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1153   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1154   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1155   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1156   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1157   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1158   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1159   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1160   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1161   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1162   case PPCISD::VCMP:            return "PPCISD::VCMP";
1163   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1164   case PPCISD::LBRX:            return "PPCISD::LBRX";
1165   case PPCISD::STBRX:           return "PPCISD::STBRX";
1166   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1167   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1168   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1169   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1170   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1171   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1172   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1173   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1174   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1175   case PPCISD::BDZ:             return "PPCISD::BDZ";
1176   case PPCISD::MFFS:            return "PPCISD::MFFS";
1177   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1178   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1179   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1180   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1181   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1182   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1183   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1184   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1185   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1186   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1187   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1188   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1189   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1190   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1191   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1192   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1193   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1194   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1195   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1196   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1197   case PPCISD::SC:              return "PPCISD::SC";
1198   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1199   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1200   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1201   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1202   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1203   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1204   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1205   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1206   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1207   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1208   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1209   }
1210   return nullptr;
1211 }
1212 
1213 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1214                                           EVT VT) const {
1215   if (!VT.isVector())
1216     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1217 
1218   if (Subtarget.hasQPX())
1219     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1220 
1221   return VT.changeVectorElementTypeToInteger();
1222 }
1223 
1224 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1225   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1226   return true;
1227 }
1228 
1229 //===----------------------------------------------------------------------===//
1230 // Node matching predicates, for use by the tblgen matching code.
1231 //===----------------------------------------------------------------------===//
1232 
1233 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1234 static bool isFloatingPointZero(SDValue Op) {
1235   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1236     return CFP->getValueAPF().isZero();
1237   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1238     // Maybe this has already been legalized into the constant pool?
1239     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1240       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1241         return CFP->getValueAPF().isZero();
1242   }
1243   return false;
1244 }
1245 
1246 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1247 /// true if Op is undef or if it matches the specified value.
1248 static bool isConstantOrUndef(int Op, int Val) {
1249   return Op < 0 || Op == Val;
1250 }
1251 
1252 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1253 /// VPKUHUM instruction.
1254 /// The ShuffleKind distinguishes between big-endian operations with
1255 /// two different inputs (0), either-endian operations with two identical
1256 /// inputs (1), and little-endian operations with two different inputs (2).
1257 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1258 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1259                                SelectionDAG &DAG) {
1260   bool IsLE = DAG.getDataLayout().isLittleEndian();
1261   if (ShuffleKind == 0) {
1262     if (IsLE)
1263       return false;
1264     for (unsigned i = 0; i != 16; ++i)
1265       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1266         return false;
1267   } else if (ShuffleKind == 2) {
1268     if (!IsLE)
1269       return false;
1270     for (unsigned i = 0; i != 16; ++i)
1271       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1272         return false;
1273   } else if (ShuffleKind == 1) {
1274     unsigned j = IsLE ? 0 : 1;
1275     for (unsigned i = 0; i != 8; ++i)
1276       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1277           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1278         return false;
1279   }
1280   return true;
1281 }
1282 
1283 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1284 /// VPKUWUM instruction.
1285 /// The ShuffleKind distinguishes between big-endian operations with
1286 /// two different inputs (0), either-endian operations with two identical
1287 /// inputs (1), and little-endian operations with two different inputs (2).
1288 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1289 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1290                                SelectionDAG &DAG) {
1291   bool IsLE = DAG.getDataLayout().isLittleEndian();
1292   if (ShuffleKind == 0) {
1293     if (IsLE)
1294       return false;
1295     for (unsigned i = 0; i != 16; i += 2)
1296       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1297           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1298         return false;
1299   } else if (ShuffleKind == 2) {
1300     if (!IsLE)
1301       return false;
1302     for (unsigned i = 0; i != 16; i += 2)
1303       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1304           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1305         return false;
1306   } else if (ShuffleKind == 1) {
1307     unsigned j = IsLE ? 0 : 2;
1308     for (unsigned i = 0; i != 8; i += 2)
1309       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1310           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1311           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1312           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1313         return false;
1314   }
1315   return true;
1316 }
1317 
1318 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1319 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1320 /// current subtarget.
1321 ///
1322 /// The ShuffleKind distinguishes between big-endian operations with
1323 /// two different inputs (0), either-endian operations with two identical
1324 /// inputs (1), and little-endian operations with two different inputs (2).
1325 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1326 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1327                                SelectionDAG &DAG) {
1328   const PPCSubtarget& Subtarget =
1329     static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1330   if (!Subtarget.hasP8Vector())
1331     return false;
1332 
1333   bool IsLE = DAG.getDataLayout().isLittleEndian();
1334   if (ShuffleKind == 0) {
1335     if (IsLE)
1336       return false;
1337     for (unsigned i = 0; i != 16; i += 4)
1338       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1339           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1340           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1341           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1342         return false;
1343   } else if (ShuffleKind == 2) {
1344     if (!IsLE)
1345       return false;
1346     for (unsigned i = 0; i != 16; i += 4)
1347       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1348           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1349           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1350           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1351         return false;
1352   } else if (ShuffleKind == 1) {
1353     unsigned j = IsLE ? 0 : 4;
1354     for (unsigned i = 0; i != 8; i += 4)
1355       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1356           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1357           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1358           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1359           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1360           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1361           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1362           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1363         return false;
1364   }
1365   return true;
1366 }
1367 
1368 /// isVMerge - Common function, used to match vmrg* shuffles.
1369 ///
1370 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1371                      unsigned LHSStart, unsigned RHSStart) {
1372   if (N->getValueType(0) != MVT::v16i8)
1373     return false;
1374   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1375          "Unsupported merge size!");
1376 
1377   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1378     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1379       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1380                              LHSStart+j+i*UnitSize) ||
1381           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1382                              RHSStart+j+i*UnitSize))
1383         return false;
1384     }
1385   return true;
1386 }
1387 
1388 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1389 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1390 /// The ShuffleKind distinguishes between big-endian merges with two
1391 /// different inputs (0), either-endian merges with two identical inputs (1),
1392 /// and little-endian merges with two different inputs (2).  For the latter,
1393 /// the input operands are swapped (see PPCInstrAltivec.td).
1394 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1395                              unsigned ShuffleKind, SelectionDAG &DAG) {
1396   if (DAG.getDataLayout().isLittleEndian()) {
1397     if (ShuffleKind == 1) // unary
1398       return isVMerge(N, UnitSize, 0, 0);
1399     else if (ShuffleKind == 2) // swapped
1400       return isVMerge(N, UnitSize, 0, 16);
1401     else
1402       return false;
1403   } else {
1404     if (ShuffleKind == 1) // unary
1405       return isVMerge(N, UnitSize, 8, 8);
1406     else if (ShuffleKind == 0) // normal
1407       return isVMerge(N, UnitSize, 8, 24);
1408     else
1409       return false;
1410   }
1411 }
1412 
1413 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1414 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1415 /// The ShuffleKind distinguishes between big-endian merges with two
1416 /// different inputs (0), either-endian merges with two identical inputs (1),
1417 /// and little-endian merges with two different inputs (2).  For the latter,
1418 /// the input operands are swapped (see PPCInstrAltivec.td).
1419 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1420                              unsigned ShuffleKind, SelectionDAG &DAG) {
1421   if (DAG.getDataLayout().isLittleEndian()) {
1422     if (ShuffleKind == 1) // unary
1423       return isVMerge(N, UnitSize, 8, 8);
1424     else if (ShuffleKind == 2) // swapped
1425       return isVMerge(N, UnitSize, 8, 24);
1426     else
1427       return false;
1428   } else {
1429     if (ShuffleKind == 1) // unary
1430       return isVMerge(N, UnitSize, 0, 0);
1431     else if (ShuffleKind == 0) // normal
1432       return isVMerge(N, UnitSize, 0, 16);
1433     else
1434       return false;
1435   }
1436 }
1437 
1438 /**
1439  * \brief Common function used to match vmrgew and vmrgow shuffles
1440  *
1441  * The indexOffset determines whether to look for even or odd words in
1442  * the shuffle mask. This is based on the of the endianness of the target
1443  * machine.
1444  *   - Little Endian:
1445  *     - Use offset of 0 to check for odd elements
1446  *     - Use offset of 4 to check for even elements
1447  *   - Big Endian:
1448  *     - Use offset of 0 to check for even elements
1449  *     - Use offset of 4 to check for odd elements
1450  * A detailed description of the vector element ordering for little endian and
1451  * big endian can be found at
1452  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1453  * Targeting your applications - what little endian and big endian IBM XL C/C++
1454  * compiler differences mean to you
1455  *
1456  * The mask to the shuffle vector instruction specifies the indices of the
1457  * elements from the two input vectors to place in the result. The elements are
1458  * numbered in array-access order, starting with the first vector. These vectors
1459  * are always of type v16i8, thus each vector will contain 16 elements of size
1460  * 8. More info on the shuffle vector can be found in the
1461  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1462  * Language Reference.
1463  *
1464  * The RHSStartValue indicates whether the same input vectors are used (unary)
1465  * or two different input vectors are used, based on the following:
1466  *   - If the instruction uses the same vector for both inputs, the range of the
1467  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1468  *     be 0.
1469  *   - If the instruction has two different vectors then the range of the
1470  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1471  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1472  *     to 31 specify elements in the second vector).
1473  *
1474  * \param[in] N The shuffle vector SD Node to analyze
1475  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1476  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1477  * vector to the shuffle_vector instruction
1478  * \return true iff this shuffle vector represents an even or odd word merge
1479  */
1480 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1481                      unsigned RHSStartValue) {
1482   if (N->getValueType(0) != MVT::v16i8)
1483     return false;
1484 
1485   for (unsigned i = 0; i < 2; ++i)
1486     for (unsigned j = 0; j < 4; ++j)
1487       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1488                              i*RHSStartValue+j+IndexOffset) ||
1489           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1490                              i*RHSStartValue+j+IndexOffset+8))
1491         return false;
1492   return true;
1493 }
1494 
1495 /**
1496  * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
1497  * vmrgow instructions.
1498  *
1499  * \param[in] N The shuffle vector SD Node to analyze
1500  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1501  * \param[in] ShuffleKind Identify the type of merge:
1502  *   - 0 = big-endian merge with two different inputs;
1503  *   - 1 = either-endian merge with two identical inputs;
1504  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1505  *     little-endian merges).
1506  * \param[in] DAG The current SelectionDAG
1507  * \return true iff this shuffle mask
1508  */
1509 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1510                               unsigned ShuffleKind, SelectionDAG &DAG) {
1511   if (DAG.getDataLayout().isLittleEndian()) {
1512     unsigned indexOffset = CheckEven ? 4 : 0;
1513     if (ShuffleKind == 1) // Unary
1514       return isVMerge(N, indexOffset, 0);
1515     else if (ShuffleKind == 2) // swapped
1516       return isVMerge(N, indexOffset, 16);
1517     else
1518       return false;
1519   }
1520   else {
1521     unsigned indexOffset = CheckEven ? 0 : 4;
1522     if (ShuffleKind == 1) // Unary
1523       return isVMerge(N, indexOffset, 0);
1524     else if (ShuffleKind == 0) // Normal
1525       return isVMerge(N, indexOffset, 16);
1526     else
1527       return false;
1528   }
1529   return false;
1530 }
1531 
1532 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1533 /// amount, otherwise return -1.
1534 /// The ShuffleKind distinguishes between big-endian operations with two
1535 /// different inputs (0), either-endian operations with two identical inputs
1536 /// (1), and little-endian operations with two different inputs (2).  For the
1537 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1538 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1539                              SelectionDAG &DAG) {
1540   if (N->getValueType(0) != MVT::v16i8)
1541     return -1;
1542 
1543   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1544 
1545   // Find the first non-undef value in the shuffle mask.
1546   unsigned i;
1547   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1548     /*search*/;
1549 
1550   if (i == 16) return -1;  // all undef.
1551 
1552   // Otherwise, check to see if the rest of the elements are consecutively
1553   // numbered from this value.
1554   unsigned ShiftAmt = SVOp->getMaskElt(i);
1555   if (ShiftAmt < i) return -1;
1556 
1557   ShiftAmt -= i;
1558   bool isLE = DAG.getDataLayout().isLittleEndian();
1559 
1560   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1561     // Check the rest of the elements to see if they are consecutive.
1562     for (++i; i != 16; ++i)
1563       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1564         return -1;
1565   } else if (ShuffleKind == 1) {
1566     // Check the rest of the elements to see if they are consecutive.
1567     for (++i; i != 16; ++i)
1568       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1569         return -1;
1570   } else
1571     return -1;
1572 
1573   if (isLE)
1574     ShiftAmt = 16 - ShiftAmt;
1575 
1576   return ShiftAmt;
1577 }
1578 
1579 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1580 /// specifies a splat of a single element that is suitable for input to
1581 /// VSPLTB/VSPLTH/VSPLTW.
1582 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1583   assert(N->getValueType(0) == MVT::v16i8 &&
1584          (EltSize == 1 || EltSize == 2 || EltSize == 4));
1585 
1586   // The consecutive indices need to specify an element, not part of two
1587   // different elements.  So abandon ship early if this isn't the case.
1588   if (N->getMaskElt(0) % EltSize != 0)
1589     return false;
1590 
1591   // This is a splat operation if each element of the permute is the same, and
1592   // if the value doesn't reference the second vector.
1593   unsigned ElementBase = N->getMaskElt(0);
1594 
1595   // FIXME: Handle UNDEF elements too!
1596   if (ElementBase >= 16)
1597     return false;
1598 
1599   // Check that the indices are consecutive, in the case of a multi-byte element
1600   // splatted with a v16i8 mask.
1601   for (unsigned i = 1; i != EltSize; ++i)
1602     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1603       return false;
1604 
1605   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1606     if (N->getMaskElt(i) < 0) continue;
1607     for (unsigned j = 0; j != EltSize; ++j)
1608       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1609         return false;
1610   }
1611   return true;
1612 }
1613 
1614 /// Check that the mask is shuffling N byte elements. Within each N byte
1615 /// element of the mask, the indices could be either in increasing or
1616 /// decreasing order as long as they are consecutive.
1617 /// \param[in] N the shuffle vector SD Node to analyze
1618 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1619 /// Word/DoubleWord/QuadWord).
1620 /// \param[in] StepLen the delta indices number among the N byte element, if
1621 /// the mask is in increasing/decreasing order then it is 1/-1.
1622 /// \return true iff the mask is shuffling N byte elements.
1623 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1624                                    int StepLen) {
1625   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1626          "Unexpected element width.");
1627   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1628 
1629   unsigned NumOfElem = 16 / Width;
1630   unsigned MaskVal[16]; //  Width is never greater than 16
1631   for (unsigned i = 0; i < NumOfElem; ++i) {
1632     MaskVal[0] = N->getMaskElt(i * Width);
1633     if ((StepLen == 1) && (MaskVal[0] % Width)) {
1634       return false;
1635     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1636       return false;
1637     }
1638 
1639     for (unsigned int j = 1; j < Width; ++j) {
1640       MaskVal[j] = N->getMaskElt(i * Width + j);
1641       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1642         return false;
1643       }
1644     }
1645   }
1646 
1647   return true;
1648 }
1649 
1650 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1651                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1652   if (!isNByteElemShuffleMask(N, 4, 1))
1653     return false;
1654 
1655   // Now we look at mask elements 0,4,8,12
1656   unsigned M0 = N->getMaskElt(0) / 4;
1657   unsigned M1 = N->getMaskElt(4) / 4;
1658   unsigned M2 = N->getMaskElt(8) / 4;
1659   unsigned M3 = N->getMaskElt(12) / 4;
1660   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1661   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1662 
1663   // Below, let H and L be arbitrary elements of the shuffle mask
1664   // where H is in the range [4,7] and L is in the range [0,3].
1665   // H, 1, 2, 3 or L, 5, 6, 7
1666   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1667       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1668     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1669     InsertAtByte = IsLE ? 12 : 0;
1670     Swap = M0 < 4;
1671     return true;
1672   }
1673   // 0, H, 2, 3 or 4, L, 6, 7
1674   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1675       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1676     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1677     InsertAtByte = IsLE ? 8 : 4;
1678     Swap = M1 < 4;
1679     return true;
1680   }
1681   // 0, 1, H, 3 or 4, 5, L, 7
1682   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1683       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1684     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1685     InsertAtByte = IsLE ? 4 : 8;
1686     Swap = M2 < 4;
1687     return true;
1688   }
1689   // 0, 1, 2, H or 4, 5, 6, L
1690   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1691       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1692     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1693     InsertAtByte = IsLE ? 0 : 12;
1694     Swap = M3 < 4;
1695     return true;
1696   }
1697 
1698   // If both vector operands for the shuffle are the same vector, the mask will
1699   // contain only elements from the first one and the second one will be undef.
1700   if (N->getOperand(1).isUndef()) {
1701     ShiftElts = 0;
1702     Swap = true;
1703     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1704     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1705       InsertAtByte = IsLE ? 12 : 0;
1706       return true;
1707     }
1708     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1709       InsertAtByte = IsLE ? 8 : 4;
1710       return true;
1711     }
1712     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1713       InsertAtByte = IsLE ? 4 : 8;
1714       return true;
1715     }
1716     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1717       InsertAtByte = IsLE ? 0 : 12;
1718       return true;
1719     }
1720   }
1721 
1722   return false;
1723 }
1724 
1725 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1726                                bool &Swap, bool IsLE) {
1727   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1728   // Ensure each byte index of the word is consecutive.
1729   if (!isNByteElemShuffleMask(N, 4, 1))
1730     return false;
1731 
1732   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1733   unsigned M0 = N->getMaskElt(0) / 4;
1734   unsigned M1 = N->getMaskElt(4) / 4;
1735   unsigned M2 = N->getMaskElt(8) / 4;
1736   unsigned M3 = N->getMaskElt(12) / 4;
1737 
1738   // If both vector operands for the shuffle are the same vector, the mask will
1739   // contain only elements from the first one and the second one will be undef.
1740   if (N->getOperand(1).isUndef()) {
1741     assert(M0 < 4 && "Indexing into an undef vector?");
1742     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
1743       return false;
1744 
1745     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
1746     Swap = false;
1747     return true;
1748   }
1749 
1750   // Ensure each word index of the ShuffleVector Mask is consecutive.
1751   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
1752     return false;
1753 
1754   if (IsLE) {
1755     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
1756       // Input vectors don't need to be swapped if the leading element
1757       // of the result is one of the 3 left elements of the second vector
1758       // (or if there is no shift to be done at all).
1759       Swap = false;
1760       ShiftElts = (8 - M0) % 8;
1761     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
1762       // Input vectors need to be swapped if the leading element
1763       // of the result is one of the 3 left elements of the first vector
1764       // (or if we're shifting by 4 - thereby simply swapping the vectors).
1765       Swap = true;
1766       ShiftElts = (4 - M0) % 4;
1767     }
1768 
1769     return true;
1770   } else {                                          // BE
1771     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
1772       // Input vectors don't need to be swapped if the leading element
1773       // of the result is one of the 4 elements of the first vector.
1774       Swap = false;
1775       ShiftElts = M0;
1776     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
1777       // Input vectors need to be swapped if the leading element
1778       // of the result is one of the 4 elements of the right vector.
1779       Swap = true;
1780       ShiftElts = M0 - 4;
1781     }
1782 
1783     return true;
1784   }
1785 }
1786 
1787 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1788   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1789 
1790   if (!isNByteElemShuffleMask(N, Width, -1))
1791     return false;
1792 
1793   for (int i = 0; i < 16; i += Width)
1794     if (N->getMaskElt(i) != i + Width - 1)
1795       return false;
1796 
1797   return true;
1798 }
1799 
1800 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
1801   return isXXBRShuffleMaskHelper(N, 2);
1802 }
1803 
1804 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
1805   return isXXBRShuffleMaskHelper(N, 4);
1806 }
1807 
1808 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
1809   return isXXBRShuffleMaskHelper(N, 8);
1810 }
1811 
1812 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
1813   return isXXBRShuffleMaskHelper(N, 16);
1814 }
1815 
1816 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
1817 /// if the inputs to the instruction should be swapped and set \p DM to the
1818 /// value for the immediate.
1819 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
1820 /// AND element 0 of the result comes from the first input (LE) or second input
1821 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
1822 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
1823 /// mask.
1824 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
1825                                bool &Swap, bool IsLE) {
1826   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1827 
1828   // Ensure each byte index of the double word is consecutive.
1829   if (!isNByteElemShuffleMask(N, 8, 1))
1830     return false;
1831 
1832   unsigned M0 = N->getMaskElt(0) / 8;
1833   unsigned M1 = N->getMaskElt(8) / 8;
1834   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
1835 
1836   // If both vector operands for the shuffle are the same vector, the mask will
1837   // contain only elements from the first one and the second one will be undef.
1838   if (N->getOperand(1).isUndef()) {
1839     if ((M0 | M1) < 2) {
1840       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
1841       Swap = false;
1842       return true;
1843     } else
1844       return false;
1845   }
1846 
1847   if (IsLE) {
1848     if (M0 > 1 && M1 < 2) {
1849       Swap = false;
1850     } else if (M0 < 2 && M1 > 1) {
1851       M0 = (M0 + 2) % 4;
1852       M1 = (M1 + 2) % 4;
1853       Swap = true;
1854     } else
1855       return false;
1856 
1857     // Note: if control flow comes here that means Swap is already set above
1858     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
1859     return true;
1860   } else { // BE
1861     if (M0 < 2 && M1 > 1) {
1862       Swap = false;
1863     } else if (M0 > 1 && M1 < 2) {
1864       M0 = (M0 + 2) % 4;
1865       M1 = (M1 + 2) % 4;
1866       Swap = true;
1867     } else
1868       return false;
1869 
1870     // Note: if control flow comes here that means Swap is already set above
1871     DM = (M0 << 1) + (M1 & 1);
1872     return true;
1873   }
1874 }
1875 
1876 
1877 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1878 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1879 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1880                                 SelectionDAG &DAG) {
1881   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1882   assert(isSplatShuffleMask(SVOp, EltSize));
1883   if (DAG.getDataLayout().isLittleEndian())
1884     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1885   else
1886     return SVOp->getMaskElt(0) / EltSize;
1887 }
1888 
1889 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1890 /// by using a vspltis[bhw] instruction of the specified element size, return
1891 /// the constant being splatted.  The ByteSize field indicates the number of
1892 /// bytes of each element [124] -> [bhw].
1893 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1894   SDValue OpVal(nullptr, 0);
1895 
1896   // If ByteSize of the splat is bigger than the element size of the
1897   // build_vector, then we have a case where we are checking for a splat where
1898   // multiple elements of the buildvector are folded together into a single
1899   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1900   unsigned EltSize = 16/N->getNumOperands();
1901   if (EltSize < ByteSize) {
1902     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1903     SDValue UniquedVals[4];
1904     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1905 
1906     // See if all of the elements in the buildvector agree across.
1907     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1908       if (N->getOperand(i).isUndef()) continue;
1909       // If the element isn't a constant, bail fully out.
1910       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1911 
1912       if (!UniquedVals[i&(Multiple-1)].getNode())
1913         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1914       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1915         return SDValue();  // no match.
1916     }
1917 
1918     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1919     // either constant or undef values that are identical for each chunk.  See
1920     // if these chunks can form into a larger vspltis*.
1921 
1922     // Check to see if all of the leading entries are either 0 or -1.  If
1923     // neither, then this won't fit into the immediate field.
1924     bool LeadingZero = true;
1925     bool LeadingOnes = true;
1926     for (unsigned i = 0; i != Multiple-1; ++i) {
1927       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1928 
1929       LeadingZero &= isNullConstant(UniquedVals[i]);
1930       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
1931     }
1932     // Finally, check the least significant entry.
1933     if (LeadingZero) {
1934       if (!UniquedVals[Multiple-1].getNode())
1935         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
1936       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1937       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
1938         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1939     }
1940     if (LeadingOnes) {
1941       if (!UniquedVals[Multiple-1].getNode())
1942         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1943       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1944       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1945         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1946     }
1947 
1948     return SDValue();
1949   }
1950 
1951   // Check to see if this buildvec has a single non-undef value in its elements.
1952   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1953     if (N->getOperand(i).isUndef()) continue;
1954     if (!OpVal.getNode())
1955       OpVal = N->getOperand(i);
1956     else if (OpVal != N->getOperand(i))
1957       return SDValue();
1958   }
1959 
1960   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1961 
1962   unsigned ValSizeInBytes = EltSize;
1963   uint64_t Value = 0;
1964   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1965     Value = CN->getZExtValue();
1966   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1967     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1968     Value = FloatToBits(CN->getValueAPF().convertToFloat());
1969   }
1970 
1971   // If the splat value is larger than the element value, then we can never do
1972   // this splat.  The only case that we could fit the replicated bits into our
1973   // immediate field for would be zero, and we prefer to use vxor for it.
1974   if (ValSizeInBytes < ByteSize) return SDValue();
1975 
1976   // If the element value is larger than the splat value, check if it consists
1977   // of a repeated bit pattern of size ByteSize.
1978   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
1979     return SDValue();
1980 
1981   // Properly sign extend the value.
1982   int MaskVal = SignExtend32(Value, ByteSize * 8);
1983 
1984   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1985   if (MaskVal == 0) return SDValue();
1986 
1987   // Finally, if this value fits in a 5 bit sext field, return it
1988   if (SignExtend32<5>(MaskVal) == MaskVal)
1989     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
1990   return SDValue();
1991 }
1992 
1993 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1994 /// amount, otherwise return -1.
1995 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
1996   EVT VT = N->getValueType(0);
1997   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
1998     return -1;
1999 
2000   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2001 
2002   // Find the first non-undef value in the shuffle mask.
2003   unsigned i;
2004   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2005     /*search*/;
2006 
2007   if (i == 4) return -1;  // all undef.
2008 
2009   // Otherwise, check to see if the rest of the elements are consecutively
2010   // numbered from this value.
2011   unsigned ShiftAmt = SVOp->getMaskElt(i);
2012   if (ShiftAmt < i) return -1;
2013   ShiftAmt -= i;
2014 
2015   // Check the rest of the elements to see if they are consecutive.
2016   for (++i; i != 4; ++i)
2017     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2018       return -1;
2019 
2020   return ShiftAmt;
2021 }
2022 
2023 //===----------------------------------------------------------------------===//
2024 //  Addressing Mode Selection
2025 //===----------------------------------------------------------------------===//
2026 
2027 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2028 /// or 64-bit immediate, and if the value can be accurately represented as a
2029 /// sign extension from a 16-bit value.  If so, this returns true and the
2030 /// immediate.
2031 static bool isIntS16Immediate(SDNode *N, short &Imm) {
2032   if (!isa<ConstantSDNode>(N))
2033     return false;
2034 
2035   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
2036   if (N->getValueType(0) == MVT::i32)
2037     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2038   else
2039     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2040 }
2041 static bool isIntS16Immediate(SDValue Op, short &Imm) {
2042   return isIntS16Immediate(Op.getNode(), Imm);
2043 }
2044 
2045 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2046 /// can be represented as an indexed [r+r] operation.  Returns false if it
2047 /// can be more efficiently represented with [r+imm].
2048 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2049                                             SDValue &Index,
2050                                             SelectionDAG &DAG) const {
2051   short imm = 0;
2052   if (N.getOpcode() == ISD::ADD) {
2053     if (isIntS16Immediate(N.getOperand(1), imm))
2054       return false;    // r+i
2055     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2056       return false;    // r+i
2057 
2058     Base = N.getOperand(0);
2059     Index = N.getOperand(1);
2060     return true;
2061   } else if (N.getOpcode() == ISD::OR) {
2062     if (isIntS16Immediate(N.getOperand(1), imm))
2063       return false;    // r+i can fold it if we can.
2064 
2065     // If this is an or of disjoint bitfields, we can codegen this as an add
2066     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2067     // disjoint.
2068     KnownBits LHSKnown, RHSKnown;
2069     DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2070 
2071     if (LHSKnown.Zero.getBoolValue()) {
2072       DAG.computeKnownBits(N.getOperand(1), RHSKnown);
2073       // If all of the bits are known zero on the LHS or RHS, the add won't
2074       // carry.
2075       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2076         Base = N.getOperand(0);
2077         Index = N.getOperand(1);
2078         return true;
2079       }
2080     }
2081   }
2082 
2083   return false;
2084 }
2085 
2086 // If we happen to be doing an i64 load or store into a stack slot that has
2087 // less than a 4-byte alignment, then the frame-index elimination may need to
2088 // use an indexed load or store instruction (because the offset may not be a
2089 // multiple of 4). The extra register needed to hold the offset comes from the
2090 // register scavenger, and it is possible that the scavenger will need to use
2091 // an emergency spill slot. As a result, we need to make sure that a spill slot
2092 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2093 // stack slot.
2094 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2095   // FIXME: This does not handle the LWA case.
2096   if (VT != MVT::i64)
2097     return;
2098 
2099   // NOTE: We'll exclude negative FIs here, which come from argument
2100   // lowering, because there are no known test cases triggering this problem
2101   // using packed structures (or similar). We can remove this exclusion if
2102   // we find such a test case. The reason why this is so test-case driven is
2103   // because this entire 'fixup' is only to prevent crashes (from the
2104   // register scavenger) on not-really-valid inputs. For example, if we have:
2105   //   %a = alloca i1
2106   //   %b = bitcast i1* %a to i64*
2107   //   store i64* a, i64 b
2108   // then the store should really be marked as 'align 1', but is not. If it
2109   // were marked as 'align 1' then the indexed form would have been
2110   // instruction-selected initially, and the problem this 'fixup' is preventing
2111   // won't happen regardless.
2112   if (FrameIdx < 0)
2113     return;
2114 
2115   MachineFunction &MF = DAG.getMachineFunction();
2116   MachineFrameInfo &MFI = MF.getFrameInfo();
2117 
2118   unsigned Align = MFI.getObjectAlignment(FrameIdx);
2119   if (Align >= 4)
2120     return;
2121 
2122   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2123   FuncInfo->setHasNonRISpills();
2124 }
2125 
2126 /// Returns true if the address N can be represented by a base register plus
2127 /// a signed 16-bit displacement [r+imm], and if it is not better
2128 /// represented as reg+reg.  If Aligned is true, only accept displacements
2129 /// suitable for STD and friends, i.e. multiples of 4.
2130 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2131                                             SDValue &Base,
2132                                             SelectionDAG &DAG,
2133                                             bool Aligned) const {
2134   // FIXME dl should come from parent load or store, not from address
2135   SDLoc dl(N);
2136   // If this can be more profitably realized as r+r, fail.
2137   if (SelectAddressRegReg(N, Disp, Base, DAG))
2138     return false;
2139 
2140   if (N.getOpcode() == ISD::ADD) {
2141     short imm = 0;
2142     if (isIntS16Immediate(N.getOperand(1), imm) &&
2143         (!Aligned || (imm & 3) == 0)) {
2144       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2145       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2146         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2147         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2148       } else {
2149         Base = N.getOperand(0);
2150       }
2151       return true; // [r+i]
2152     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2153       // Match LOAD (ADD (X, Lo(G))).
2154       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2155              && "Cannot handle constant offsets yet!");
2156       Disp = N.getOperand(1).getOperand(0);  // The global address.
2157       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2158              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2159              Disp.getOpcode() == ISD::TargetConstantPool ||
2160              Disp.getOpcode() == ISD::TargetJumpTable);
2161       Base = N.getOperand(0);
2162       return true;  // [&g+r]
2163     }
2164   } else if (N.getOpcode() == ISD::OR) {
2165     short imm = 0;
2166     if (isIntS16Immediate(N.getOperand(1), imm) &&
2167         (!Aligned || (imm & 3) == 0)) {
2168       // If this is an or of disjoint bitfields, we can codegen this as an add
2169       // (for better address arithmetic) if the LHS and RHS of the OR are
2170       // provably disjoint.
2171       KnownBits LHSKnown;
2172       DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2173 
2174       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2175         // If all of the bits are known zero on the LHS or RHS, the add won't
2176         // carry.
2177         if (FrameIndexSDNode *FI =
2178               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2179           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2180           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2181         } else {
2182           Base = N.getOperand(0);
2183         }
2184         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2185         return true;
2186       }
2187     }
2188   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2189     // Loading from a constant address.
2190 
2191     // If this address fits entirely in a 16-bit sext immediate field, codegen
2192     // this as "d, 0"
2193     short Imm;
2194     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
2195       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2196       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2197                              CN->getValueType(0));
2198       return true;
2199     }
2200 
2201     // Handle 32-bit sext immediates with LIS + addr mode.
2202     if ((CN->getValueType(0) == MVT::i32 ||
2203          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2204         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
2205       int Addr = (int)CN->getZExtValue();
2206 
2207       // Otherwise, break this down into an LIS + disp.
2208       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2209 
2210       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2211                                    MVT::i32);
2212       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2213       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2214       return true;
2215     }
2216   }
2217 
2218   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2219   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2220     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2221     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2222   } else
2223     Base = N;
2224   return true;      // [r+0]
2225 }
2226 
2227 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2228 /// represented as an indexed [r+r] operation.
2229 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2230                                                 SDValue &Index,
2231                                                 SelectionDAG &DAG) const {
2232   // Check to see if we can easily represent this as an [r+r] address.  This
2233   // will fail if it thinks that the address is more profitably represented as
2234   // reg+imm, e.g. where imm = 0.
2235   if (SelectAddressRegReg(N, Base, Index, DAG))
2236     return true;
2237 
2238   // If the operand is an addition, always emit this as [r+r], since this is
2239   // better (for code size, and execution, as the memop does the add for free)
2240   // than emitting an explicit add.
2241   if (N.getOpcode() == ISD::ADD) {
2242     Base = N.getOperand(0);
2243     Index = N.getOperand(1);
2244     return true;
2245   }
2246 
2247   // Otherwise, do it the hard way, using R0 as the base register.
2248   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2249                          N.getValueType());
2250   Index = N;
2251   return true;
2252 }
2253 
2254 /// getPreIndexedAddressParts - returns true by value, base pointer and
2255 /// offset pointer and addressing mode by reference if the node's address
2256 /// can be legally represented as pre-indexed load / store address.
2257 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2258                                                   SDValue &Offset,
2259                                                   ISD::MemIndexedMode &AM,
2260                                                   SelectionDAG &DAG) const {
2261   if (DisablePPCPreinc) return false;
2262 
2263   bool isLoad = true;
2264   SDValue Ptr;
2265   EVT VT;
2266   unsigned Alignment;
2267   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2268     Ptr = LD->getBasePtr();
2269     VT = LD->getMemoryVT();
2270     Alignment = LD->getAlignment();
2271   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2272     Ptr = ST->getBasePtr();
2273     VT  = ST->getMemoryVT();
2274     Alignment = ST->getAlignment();
2275     isLoad = false;
2276   } else
2277     return false;
2278 
2279   // PowerPC doesn't have preinc load/store instructions for vectors (except
2280   // for QPX, which does have preinc r+r forms).
2281   if (VT.isVector()) {
2282     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2283       return false;
2284     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2285       AM = ISD::PRE_INC;
2286       return true;
2287     }
2288   }
2289 
2290   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2291     // Common code will reject creating a pre-inc form if the base pointer
2292     // is a frame index, or if N is a store and the base pointer is either
2293     // the same as or a predecessor of the value being stored.  Check for
2294     // those situations here, and try with swapped Base/Offset instead.
2295     bool Swap = false;
2296 
2297     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2298       Swap = true;
2299     else if (!isLoad) {
2300       SDValue Val = cast<StoreSDNode>(N)->getValue();
2301       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2302         Swap = true;
2303     }
2304 
2305     if (Swap)
2306       std::swap(Base, Offset);
2307 
2308     AM = ISD::PRE_INC;
2309     return true;
2310   }
2311 
2312   // LDU/STU can only handle immediates that are a multiple of 4.
2313   if (VT != MVT::i64) {
2314     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
2315       return false;
2316   } else {
2317     // LDU/STU need an address with at least 4-byte alignment.
2318     if (Alignment < 4)
2319       return false;
2320 
2321     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
2322       return false;
2323   }
2324 
2325   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2326     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2327     // sext i32 to i64 when addr mode is r+i.
2328     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2329         LD->getExtensionType() == ISD::SEXTLOAD &&
2330         isa<ConstantSDNode>(Offset))
2331       return false;
2332   }
2333 
2334   AM = ISD::PRE_INC;
2335   return true;
2336 }
2337 
2338 //===----------------------------------------------------------------------===//
2339 //  LowerOperation implementation
2340 //===----------------------------------------------------------------------===//
2341 
2342 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2343 /// and LoOpFlags to the target MO flags.
2344 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2345                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2346                                const GlobalValue *GV = nullptr) {
2347   HiOpFlags = PPCII::MO_HA;
2348   LoOpFlags = PPCII::MO_LO;
2349 
2350   // Don't use the pic base if not in PIC relocation model.
2351   if (IsPIC) {
2352     HiOpFlags |= PPCII::MO_PIC_FLAG;
2353     LoOpFlags |= PPCII::MO_PIC_FLAG;
2354   }
2355 
2356   // If this is a reference to a global value that requires a non-lazy-ptr, make
2357   // sure that instruction lowering adds it.
2358   if (GV && Subtarget.hasLazyResolverStub(GV)) {
2359     HiOpFlags |= PPCII::MO_NLP_FLAG;
2360     LoOpFlags |= PPCII::MO_NLP_FLAG;
2361 
2362     if (GV->hasHiddenVisibility()) {
2363       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2364       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2365     }
2366   }
2367 }
2368 
2369 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2370                              SelectionDAG &DAG) {
2371   SDLoc DL(HiPart);
2372   EVT PtrVT = HiPart.getValueType();
2373   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2374 
2375   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2376   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2377 
2378   // With PIC, the first instruction is actually "GR+hi(&G)".
2379   if (isPIC)
2380     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2381                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2382 
2383   // Generate non-pic code that has direct accesses to the constant pool.
2384   // The address of the global is just (hi(&g)+lo(&g)).
2385   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2386 }
2387 
2388 static void setUsesTOCBasePtr(MachineFunction &MF) {
2389   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2390   FuncInfo->setUsesTOCBasePtr();
2391 }
2392 
2393 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2394   setUsesTOCBasePtr(DAG.getMachineFunction());
2395 }
2396 
2397 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2398                            SDValue GA) {
2399   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2400   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2401                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2402 
2403   SDValue Ops[] = { GA, Reg };
2404   return DAG.getMemIntrinsicNode(
2405       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2406       MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
2407       false, 0);
2408 }
2409 
2410 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2411                                              SelectionDAG &DAG) const {
2412   EVT PtrVT = Op.getValueType();
2413   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2414   const Constant *C = CP->getConstVal();
2415 
2416   // 64-bit SVR4 ABI code is always position-independent.
2417   // The actual address of the GlobalValue is stored in the TOC.
2418   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2419     setUsesTOCBasePtr(DAG);
2420     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2421     return getTOCEntry(DAG, SDLoc(CP), true, GA);
2422   }
2423 
2424   unsigned MOHiFlag, MOLoFlag;
2425   bool IsPIC = isPositionIndependent();
2426   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2427 
2428   if (IsPIC && Subtarget.isSVR4ABI()) {
2429     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2430                                            PPCII::MO_PIC_FLAG);
2431     return getTOCEntry(DAG, SDLoc(CP), false, GA);
2432   }
2433 
2434   SDValue CPIHi =
2435     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2436   SDValue CPILo =
2437     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2438   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2439 }
2440 
2441 // For 64-bit PowerPC, prefer the more compact relative encodings.
2442 // This trades 32 bits per jump table entry for one or two instructions
2443 // on the jump site.
2444 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2445   if (isJumpTableRelative())
2446     return MachineJumpTableInfo::EK_LabelDifference32;
2447 
2448   return TargetLowering::getJumpTableEncoding();
2449 }
2450 
2451 bool PPCTargetLowering::isJumpTableRelative() const {
2452   if (Subtarget.isPPC64())
2453     return true;
2454   return TargetLowering::isJumpTableRelative();
2455 }
2456 
2457 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2458                                                     SelectionDAG &DAG) const {
2459   if (!Subtarget.isPPC64())
2460     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2461 
2462   switch (getTargetMachine().getCodeModel()) {
2463   case CodeModel::Default:
2464   case CodeModel::Small:
2465   case CodeModel::Medium:
2466     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2467   default:
2468     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2469                        getPointerTy(DAG.getDataLayout()));
2470   }
2471 }
2472 
2473 const MCExpr *
2474 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2475                                                 unsigned JTI,
2476                                                 MCContext &Ctx) const {
2477   if (!Subtarget.isPPC64())
2478     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2479 
2480   switch (getTargetMachine().getCodeModel()) {
2481   case CodeModel::Default:
2482   case CodeModel::Small:
2483   case CodeModel::Medium:
2484     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2485   default:
2486     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2487   }
2488 }
2489 
2490 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2491   EVT PtrVT = Op.getValueType();
2492   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2493 
2494   // 64-bit SVR4 ABI code is always position-independent.
2495   // The actual address of the GlobalValue is stored in the TOC.
2496   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2497     setUsesTOCBasePtr(DAG);
2498     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2499     return getTOCEntry(DAG, SDLoc(JT), true, GA);
2500   }
2501 
2502   unsigned MOHiFlag, MOLoFlag;
2503   bool IsPIC = isPositionIndependent();
2504   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2505 
2506   if (IsPIC && Subtarget.isSVR4ABI()) {
2507     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2508                                         PPCII::MO_PIC_FLAG);
2509     return getTOCEntry(DAG, SDLoc(GA), false, GA);
2510   }
2511 
2512   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2513   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2514   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2515 }
2516 
2517 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2518                                              SelectionDAG &DAG) const {
2519   EVT PtrVT = Op.getValueType();
2520   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2521   const BlockAddress *BA = BASDN->getBlockAddress();
2522 
2523   // 64-bit SVR4 ABI code is always position-independent.
2524   // The actual BlockAddress is stored in the TOC.
2525   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2526     setUsesTOCBasePtr(DAG);
2527     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2528     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
2529   }
2530 
2531   unsigned MOHiFlag, MOLoFlag;
2532   bool IsPIC = isPositionIndependent();
2533   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2534   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2535   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2536   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2537 }
2538 
2539 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2540                                               SelectionDAG &DAG) const {
2541   // FIXME: TLS addresses currently use medium model code sequences,
2542   // which is the most useful form.  Eventually support for small and
2543   // large models could be added if users need it, at the cost of
2544   // additional complexity.
2545   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2546   if (DAG.getTarget().Options.EmulatedTLS)
2547     return LowerToTLSEmulatedModel(GA, DAG);
2548 
2549   SDLoc dl(GA);
2550   const GlobalValue *GV = GA->getGlobal();
2551   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2552   bool is64bit = Subtarget.isPPC64();
2553   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2554   PICLevel::Level picLevel = M->getPICLevel();
2555 
2556   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
2557 
2558   if (Model == TLSModel::LocalExec) {
2559     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2560                                                PPCII::MO_TPREL_HA);
2561     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2562                                                PPCII::MO_TPREL_LO);
2563     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2564                              : DAG.getRegister(PPC::R2, MVT::i32);
2565 
2566     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2567     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2568   }
2569 
2570   if (Model == TLSModel::InitialExec) {
2571     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2572     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2573                                                 PPCII::MO_TLS);
2574     SDValue GOTPtr;
2575     if (is64bit) {
2576       setUsesTOCBasePtr(DAG);
2577       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2578       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2579                            PtrVT, GOTReg, TGA);
2580     } else
2581       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2582     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2583                                    PtrVT, TGA, GOTPtr);
2584     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2585   }
2586 
2587   if (Model == TLSModel::GeneralDynamic) {
2588     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2589     SDValue GOTPtr;
2590     if (is64bit) {
2591       setUsesTOCBasePtr(DAG);
2592       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2593       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2594                                    GOTReg, TGA);
2595     } else {
2596       if (picLevel == PICLevel::SmallPIC)
2597         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2598       else
2599         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2600     }
2601     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2602                        GOTPtr, TGA, TGA);
2603   }
2604 
2605   if (Model == TLSModel::LocalDynamic) {
2606     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2607     SDValue GOTPtr;
2608     if (is64bit) {
2609       setUsesTOCBasePtr(DAG);
2610       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2611       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2612                            GOTReg, TGA);
2613     } else {
2614       if (picLevel == PICLevel::SmallPIC)
2615         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2616       else
2617         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2618     }
2619     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2620                                   PtrVT, GOTPtr, TGA, TGA);
2621     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2622                                       PtrVT, TLSAddr, TGA);
2623     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2624   }
2625 
2626   llvm_unreachable("Unknown TLS model!");
2627 }
2628 
2629 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2630                                               SelectionDAG &DAG) const {
2631   EVT PtrVT = Op.getValueType();
2632   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2633   SDLoc DL(GSDN);
2634   const GlobalValue *GV = GSDN->getGlobal();
2635 
2636   // 64-bit SVR4 ABI code is always position-independent.
2637   // The actual address of the GlobalValue is stored in the TOC.
2638   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2639     setUsesTOCBasePtr(DAG);
2640     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2641     return getTOCEntry(DAG, DL, true, GA);
2642   }
2643 
2644   unsigned MOHiFlag, MOLoFlag;
2645   bool IsPIC = isPositionIndependent();
2646   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2647 
2648   if (IsPIC && Subtarget.isSVR4ABI()) {
2649     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2650                                             GSDN->getOffset(),
2651                                             PPCII::MO_PIC_FLAG);
2652     return getTOCEntry(DAG, DL, false, GA);
2653   }
2654 
2655   SDValue GAHi =
2656     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2657   SDValue GALo =
2658     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2659 
2660   SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2661 
2662   // If the global reference is actually to a non-lazy-pointer, we have to do an
2663   // extra load to get the address of the global.
2664   if (MOHiFlag & PPCII::MO_NLP_FLAG)
2665     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2666   return Ptr;
2667 }
2668 
2669 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2670   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2671   SDLoc dl(Op);
2672 
2673   if (Op.getValueType() == MVT::v2i64) {
2674     // When the operands themselves are v2i64 values, we need to do something
2675     // special because VSX has no underlying comparison operations for these.
2676     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2677       // Equality can be handled by casting to the legal type for Altivec
2678       // comparisons, everything else needs to be expanded.
2679       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2680         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2681                  DAG.getSetCC(dl, MVT::v4i32,
2682                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2683                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2684                    CC));
2685       }
2686 
2687       return SDValue();
2688     }
2689 
2690     // We handle most of these in the usual way.
2691     return Op;
2692   }
2693 
2694   // If we're comparing for equality to zero, expose the fact that this is
2695   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2696   // fold the new nodes.
2697   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2698     return V;
2699 
2700   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2701     // Leave comparisons against 0 and -1 alone for now, since they're usually
2702     // optimized.  FIXME: revisit this when we can custom lower all setcc
2703     // optimizations.
2704     if (C->isAllOnesValue() || C->isNullValue())
2705       return SDValue();
2706   }
2707 
2708   // If we have an integer seteq/setne, turn it into a compare against zero
2709   // by xor'ing the rhs with the lhs, which is faster than setting a
2710   // condition register, reading it back out, and masking the correct bit.  The
2711   // normal approach here uses sub to do this instead of xor.  Using xor exposes
2712   // the result to other bit-twiddling opportunities.
2713   EVT LHSVT = Op.getOperand(0).getValueType();
2714   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2715     EVT VT = Op.getValueType();
2716     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2717                                 Op.getOperand(1));
2718     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2719   }
2720   return SDValue();
2721 }
2722 
2723 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2724   SDNode *Node = Op.getNode();
2725   EVT VT = Node->getValueType(0);
2726   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2727   SDValue InChain = Node->getOperand(0);
2728   SDValue VAListPtr = Node->getOperand(1);
2729   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2730   SDLoc dl(Node);
2731 
2732   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2733 
2734   // gpr_index
2735   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2736                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
2737   InChain = GprIndex.getValue(1);
2738 
2739   if (VT == MVT::i64) {
2740     // Check if GprIndex is even
2741     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2742                                  DAG.getConstant(1, dl, MVT::i32));
2743     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2744                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2745     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2746                                           DAG.getConstant(1, dl, MVT::i32));
2747     // Align GprIndex to be even if it isn't
2748     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2749                            GprIndex);
2750   }
2751 
2752   // fpr index is 1 byte after gpr
2753   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2754                                DAG.getConstant(1, dl, MVT::i32));
2755 
2756   // fpr
2757   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2758                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
2759   InChain = FprIndex.getValue(1);
2760 
2761   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2762                                        DAG.getConstant(8, dl, MVT::i32));
2763 
2764   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2765                                         DAG.getConstant(4, dl, MVT::i32));
2766 
2767   // areas
2768   SDValue OverflowArea =
2769       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
2770   InChain = OverflowArea.getValue(1);
2771 
2772   SDValue RegSaveArea =
2773       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
2774   InChain = RegSaveArea.getValue(1);
2775 
2776   // select overflow_area if index > 8
2777   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2778                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2779 
2780   // adjustment constant gpr_index * 4/8
2781   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2782                                     VT.isInteger() ? GprIndex : FprIndex,
2783                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2784                                                     MVT::i32));
2785 
2786   // OurReg = RegSaveArea + RegConstant
2787   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2788                                RegConstant);
2789 
2790   // Floating types are 32 bytes into RegSaveArea
2791   if (VT.isFloatingPoint())
2792     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2793                          DAG.getConstant(32, dl, MVT::i32));
2794 
2795   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2796   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2797                                    VT.isInteger() ? GprIndex : FprIndex,
2798                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2799                                                    MVT::i32));
2800 
2801   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2802                               VT.isInteger() ? VAListPtr : FprPtr,
2803                               MachinePointerInfo(SV), MVT::i8);
2804 
2805   // determine if we should load from reg_save_area or overflow_area
2806   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2807 
2808   // increase overflow_area by 4/8 if gpr/fpr > 8
2809   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2810                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
2811                                           dl, MVT::i32));
2812 
2813   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2814                              OverflowAreaPlusN);
2815 
2816   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
2817                               MachinePointerInfo(), MVT::i32);
2818 
2819   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
2820 }
2821 
2822 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
2823   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2824 
2825   // We have to copy the entire va_list struct:
2826   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2827   return DAG.getMemcpy(Op.getOperand(0), Op,
2828                        Op.getOperand(1), Op.getOperand(2),
2829                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2830                        false, MachinePointerInfo(), MachinePointerInfo());
2831 }
2832 
2833 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2834                                                   SelectionDAG &DAG) const {
2835   return Op.getOperand(0);
2836 }
2837 
2838 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2839                                                 SelectionDAG &DAG) const {
2840   SDValue Chain = Op.getOperand(0);
2841   SDValue Trmp = Op.getOperand(1); // trampoline
2842   SDValue FPtr = Op.getOperand(2); // nested function
2843   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2844   SDLoc dl(Op);
2845 
2846   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2847   bool isPPC64 = (PtrVT == MVT::i64);
2848   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
2849 
2850   TargetLowering::ArgListTy Args;
2851   TargetLowering::ArgListEntry Entry;
2852 
2853   Entry.Ty = IntPtrTy;
2854   Entry.Node = Trmp; Args.push_back(Entry);
2855 
2856   // TrampSize == (isPPC64 ? 48 : 40);
2857   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
2858                                isPPC64 ? MVT::i64 : MVT::i32);
2859   Args.push_back(Entry);
2860 
2861   Entry.Node = FPtr; Args.push_back(Entry);
2862   Entry.Node = Nest; Args.push_back(Entry);
2863 
2864   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2865   TargetLowering::CallLoweringInfo CLI(DAG);
2866   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2867       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2868       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
2869 
2870   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2871   return CallResult.second;
2872 }
2873 
2874 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2875   MachineFunction &MF = DAG.getMachineFunction();
2876   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2877   EVT PtrVT = getPointerTy(MF.getDataLayout());
2878 
2879   SDLoc dl(Op);
2880 
2881   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2882     // vastart just stores the address of the VarArgsFrameIndex slot into the
2883     // memory location argument.
2884     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2885     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2886     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2887                         MachinePointerInfo(SV));
2888   }
2889 
2890   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2891   // We suppose the given va_list is already allocated.
2892   //
2893   // typedef struct {
2894   //  char gpr;     /* index into the array of 8 GPRs
2895   //                 * stored in the register save area
2896   //                 * gpr=0 corresponds to r3,
2897   //                 * gpr=1 to r4, etc.
2898   //                 */
2899   //  char fpr;     /* index into the array of 8 FPRs
2900   //                 * stored in the register save area
2901   //                 * fpr=0 corresponds to f1,
2902   //                 * fpr=1 to f2, etc.
2903   //                 */
2904   //  char *overflow_arg_area;
2905   //                /* location on stack that holds
2906   //                 * the next overflow argument
2907   //                 */
2908   //  char *reg_save_area;
2909   //               /* where r3:r10 and f1:f8 (if saved)
2910   //                * are stored
2911   //                */
2912   // } va_list[1];
2913 
2914   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2915   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2916   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2917                                             PtrVT);
2918   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2919                                  PtrVT);
2920 
2921   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2922   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2923 
2924   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2925   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2926 
2927   uint64_t FPROffset = 1;
2928   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2929 
2930   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2931 
2932   // Store first byte : number of int regs
2933   SDValue firstStore =
2934       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
2935                         MachinePointerInfo(SV), MVT::i8);
2936   uint64_t nextOffset = FPROffset;
2937   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2938                                   ConstFPROffset);
2939 
2940   // Store second byte : number of float regs
2941   SDValue secondStore =
2942       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2943                         MachinePointerInfo(SV, nextOffset), MVT::i8);
2944   nextOffset += StackOffset;
2945   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2946 
2947   // Store second word : arguments given on stack
2948   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2949                                     MachinePointerInfo(SV, nextOffset));
2950   nextOffset += FrameOffset;
2951   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2952 
2953   // Store third word : arguments given in registers
2954   return DAG.getStore(thirdStore, dl, FR, nextPtr,
2955                       MachinePointerInfo(SV, nextOffset));
2956 }
2957 
2958 #include "PPCGenCallingConv.inc"
2959 
2960 // Function whose sole purpose is to kill compiler warnings
2961 // stemming from unused functions included from PPCGenCallingConv.inc.
2962 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2963   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2964 }
2965 
2966 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2967                                       CCValAssign::LocInfo &LocInfo,
2968                                       ISD::ArgFlagsTy &ArgFlags,
2969                                       CCState &State) {
2970   return true;
2971 }
2972 
2973 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2974                                              MVT &LocVT,
2975                                              CCValAssign::LocInfo &LocInfo,
2976                                              ISD::ArgFlagsTy &ArgFlags,
2977                                              CCState &State) {
2978   static const MCPhysReg ArgRegs[] = {
2979     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2980     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2981   };
2982   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2983 
2984   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2985 
2986   // Skip one register if the first unallocated register has an even register
2987   // number and there are still argument registers available which have not been
2988   // allocated yet. RegNum is actually an index into ArgRegs, which means we
2989   // need to skip a register if RegNum is odd.
2990   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2991     State.AllocateReg(ArgRegs[RegNum]);
2992   }
2993 
2994   // Always return false here, as this function only makes sure that the first
2995   // unallocated register has an odd register number and does not actually
2996   // allocate a register for the current argument.
2997   return false;
2998 }
2999 
3000 bool
3001 llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
3002                                                   MVT &LocVT,
3003                                                   CCValAssign::LocInfo &LocInfo,
3004                                                   ISD::ArgFlagsTy &ArgFlags,
3005                                                   CCState &State) {
3006   static const MCPhysReg ArgRegs[] = {
3007     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3008     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3009   };
3010   const unsigned NumArgRegs = array_lengthof(ArgRegs);
3011 
3012   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3013   int RegsLeft = NumArgRegs - RegNum;
3014 
3015   // Skip if there is not enough registers left for long double type (4 gpr regs
3016   // in soft float mode) and put long double argument on the stack.
3017   if (RegNum != NumArgRegs && RegsLeft < 4) {
3018     for (int i = 0; i < RegsLeft; i++) {
3019       State.AllocateReg(ArgRegs[RegNum + i]);
3020     }
3021   }
3022 
3023   return false;
3024 }
3025 
3026 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
3027                                                MVT &LocVT,
3028                                                CCValAssign::LocInfo &LocInfo,
3029                                                ISD::ArgFlagsTy &ArgFlags,
3030                                                CCState &State) {
3031   static const MCPhysReg ArgRegs[] = {
3032     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3033     PPC::F8
3034   };
3035 
3036   const unsigned NumArgRegs = array_lengthof(ArgRegs);
3037 
3038   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3039 
3040   // If there is only one Floating-point register left we need to put both f64
3041   // values of a split ppc_fp128 value on the stack.
3042   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
3043     State.AllocateReg(ArgRegs[RegNum]);
3044   }
3045 
3046   // Always return false here, as this function only makes sure that the two f64
3047   // values a ppc_fp128 value is split into are both passed in registers or both
3048   // passed on the stack and does not actually allocate a register for the
3049   // current argument.
3050   return false;
3051 }
3052 
3053 /// FPR - The set of FP registers that should be allocated for arguments,
3054 /// on Darwin.
3055 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3056                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3057                                 PPC::F11, PPC::F12, PPC::F13};
3058 
3059 /// QFPR - The set of QPX registers that should be allocated for arguments.
3060 static const MCPhysReg QFPR[] = {
3061     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
3062     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3063 
3064 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3065 /// the stack.
3066 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3067                                        unsigned PtrByteSize) {
3068   unsigned ArgSize = ArgVT.getStoreSize();
3069   if (Flags.isByVal())
3070     ArgSize = Flags.getByValSize();
3071 
3072   // Round up to multiples of the pointer size, except for array members,
3073   // which are always packed.
3074   if (!Flags.isInConsecutiveRegs())
3075     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3076 
3077   return ArgSize;
3078 }
3079 
3080 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3081 /// on the stack.
3082 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3083                                             ISD::ArgFlagsTy Flags,
3084                                             unsigned PtrByteSize) {
3085   unsigned Align = PtrByteSize;
3086 
3087   // Altivec parameters are padded to a 16 byte boundary.
3088   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3089       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3090       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3091       ArgVT == MVT::v1i128)
3092     Align = 16;
3093   // QPX vector types stored in double-precision are padded to a 32 byte
3094   // boundary.
3095   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3096     Align = 32;
3097 
3098   // ByVal parameters are aligned as requested.
3099   if (Flags.isByVal()) {
3100     unsigned BVAlign = Flags.getByValAlign();
3101     if (BVAlign > PtrByteSize) {
3102       if (BVAlign % PtrByteSize != 0)
3103           llvm_unreachable(
3104             "ByVal alignment is not a multiple of the pointer size");
3105 
3106       Align = BVAlign;
3107     }
3108   }
3109 
3110   // Array members are always packed to their original alignment.
3111   if (Flags.isInConsecutiveRegs()) {
3112     // If the array member was split into multiple registers, the first
3113     // needs to be aligned to the size of the full type.  (Except for
3114     // ppcf128, which is only aligned as its f64 components.)
3115     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3116       Align = OrigVT.getStoreSize();
3117     else
3118       Align = ArgVT.getStoreSize();
3119   }
3120 
3121   return Align;
3122 }
3123 
3124 /// CalculateStackSlotUsed - Return whether this argument will use its
3125 /// stack slot (instead of being passed in registers).  ArgOffset,
3126 /// AvailableFPRs, and AvailableVRs must hold the current argument
3127 /// position, and will be updated to account for this argument.
3128 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3129                                    ISD::ArgFlagsTy Flags,
3130                                    unsigned PtrByteSize,
3131                                    unsigned LinkageSize,
3132                                    unsigned ParamAreaSize,
3133                                    unsigned &ArgOffset,
3134                                    unsigned &AvailableFPRs,
3135                                    unsigned &AvailableVRs, bool HasQPX) {
3136   bool UseMemory = false;
3137 
3138   // Respect alignment of argument on the stack.
3139   unsigned Align =
3140     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3141   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3142   // If there's no space left in the argument save area, we must
3143   // use memory (this check also catches zero-sized arguments).
3144   if (ArgOffset >= LinkageSize + ParamAreaSize)
3145     UseMemory = true;
3146 
3147   // Allocate argument on the stack.
3148   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3149   if (Flags.isInConsecutiveRegsLast())
3150     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3151   // If we overran the argument save area, we must use memory
3152   // (this check catches arguments passed partially in memory)
3153   if (ArgOffset > LinkageSize + ParamAreaSize)
3154     UseMemory = true;
3155 
3156   // However, if the argument is actually passed in an FPR or a VR,
3157   // we don't use memory after all.
3158   if (!Flags.isByVal()) {
3159     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3160         // QPX registers overlap with the scalar FP registers.
3161         (HasQPX && (ArgVT == MVT::v4f32 ||
3162                     ArgVT == MVT::v4f64 ||
3163                     ArgVT == MVT::v4i1)))
3164       if (AvailableFPRs > 0) {
3165         --AvailableFPRs;
3166         return false;
3167       }
3168     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3169         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3170         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3171         ArgVT == MVT::v1i128)
3172       if (AvailableVRs > 0) {
3173         --AvailableVRs;
3174         return false;
3175       }
3176   }
3177 
3178   return UseMemory;
3179 }
3180 
3181 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3182 /// ensure minimum alignment required for target.
3183 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3184                                      unsigned NumBytes) {
3185   unsigned TargetAlign = Lowering->getStackAlignment();
3186   unsigned AlignMask = TargetAlign - 1;
3187   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3188   return NumBytes;
3189 }
3190 
3191 SDValue PPCTargetLowering::LowerFormalArguments(
3192     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3193     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3194     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3195   if (Subtarget.isSVR4ABI()) {
3196     if (Subtarget.isPPC64())
3197       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
3198                                          dl, DAG, InVals);
3199     else
3200       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
3201                                          dl, DAG, InVals);
3202   } else {
3203     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
3204                                        dl, DAG, InVals);
3205   }
3206 }
3207 
3208 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3209     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3210     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3211     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3212 
3213   // 32-bit SVR4 ABI Stack Frame Layout:
3214   //              +-----------------------------------+
3215   //        +-->  |            Back chain             |
3216   //        |     +-----------------------------------+
3217   //        |     | Floating-point register save area |
3218   //        |     +-----------------------------------+
3219   //        |     |    General register save area     |
3220   //        |     +-----------------------------------+
3221   //        |     |          CR save word             |
3222   //        |     +-----------------------------------+
3223   //        |     |         VRSAVE save word          |
3224   //        |     +-----------------------------------+
3225   //        |     |         Alignment padding         |
3226   //        |     +-----------------------------------+
3227   //        |     |     Vector register save area     |
3228   //        |     +-----------------------------------+
3229   //        |     |       Local variable space        |
3230   //        |     +-----------------------------------+
3231   //        |     |        Parameter list area        |
3232   //        |     +-----------------------------------+
3233   //        |     |           LR save word            |
3234   //        |     +-----------------------------------+
3235   // SP-->  +---  |            Back chain             |
3236   //              +-----------------------------------+
3237   //
3238   // Specifications:
3239   //   System V Application Binary Interface PowerPC Processor Supplement
3240   //   AltiVec Technology Programming Interface Manual
3241 
3242   MachineFunction &MF = DAG.getMachineFunction();
3243   MachineFrameInfo &MFI = MF.getFrameInfo();
3244   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3245 
3246   EVT PtrVT = getPointerTy(MF.getDataLayout());
3247   // Potential tail calls could cause overwriting of argument stack slots.
3248   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3249                        (CallConv == CallingConv::Fast));
3250   unsigned PtrByteSize = 4;
3251 
3252   // Assign locations to all of the incoming arguments.
3253   SmallVector<CCValAssign, 16> ArgLocs;
3254   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3255                  *DAG.getContext());
3256 
3257   // Reserve space for the linkage area on the stack.
3258   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3259   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3260   if (useSoftFloat())
3261     CCInfo.PreAnalyzeFormalArguments(Ins);
3262 
3263   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3264   CCInfo.clearWasPPCF128();
3265 
3266   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3267     CCValAssign &VA = ArgLocs[i];
3268 
3269     // Arguments stored in registers.
3270     if (VA.isRegLoc()) {
3271       const TargetRegisterClass *RC;
3272       EVT ValVT = VA.getValVT();
3273 
3274       switch (ValVT.getSimpleVT().SimpleTy) {
3275         default:
3276           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3277         case MVT::i1:
3278         case MVT::i32:
3279           RC = &PPC::GPRCRegClass;
3280           break;
3281         case MVT::f32:
3282           if (Subtarget.hasP8Vector())
3283             RC = &PPC::VSSRCRegClass;
3284           else
3285             RC = &PPC::F4RCRegClass;
3286           break;
3287         case MVT::f64:
3288           if (Subtarget.hasVSX())
3289             RC = &PPC::VSFRCRegClass;
3290           else
3291             RC = &PPC::F8RCRegClass;
3292           break;
3293         case MVT::v16i8:
3294         case MVT::v8i16:
3295         case MVT::v4i32:
3296           RC = &PPC::VRRCRegClass;
3297           break;
3298         case MVT::v4f32:
3299           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3300           break;
3301         case MVT::v2f64:
3302         case MVT::v2i64:
3303           RC = &PPC::VRRCRegClass;
3304           break;
3305         case MVT::v4f64:
3306           RC = &PPC::QFRCRegClass;
3307           break;
3308         case MVT::v4i1:
3309           RC = &PPC::QBRCRegClass;
3310           break;
3311       }
3312 
3313       // Transform the arguments stored in physical registers into virtual ones.
3314       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3315       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3316                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
3317 
3318       if (ValVT == MVT::i1)
3319         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3320 
3321       InVals.push_back(ArgValue);
3322     } else {
3323       // Argument stored in memory.
3324       assert(VA.isMemLoc());
3325 
3326       unsigned ArgSize = VA.getLocVT().getStoreSize();
3327       int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
3328                                      isImmutable);
3329 
3330       // Create load nodes to retrieve arguments from the stack.
3331       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3332       InVals.push_back(
3333           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3334     }
3335   }
3336 
3337   // Assign locations to all of the incoming aggregate by value arguments.
3338   // Aggregates passed by value are stored in the local variable space of the
3339   // caller's stack frame, right above the parameter list area.
3340   SmallVector<CCValAssign, 16> ByValArgLocs;
3341   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3342                       ByValArgLocs, *DAG.getContext());
3343 
3344   // Reserve stack space for the allocations in CCInfo.
3345   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3346 
3347   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3348 
3349   // Area that is at least reserved in the caller of this function.
3350   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3351   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3352 
3353   // Set the size that is at least reserved in caller of this function.  Tail
3354   // call optimized function's reserved stack space needs to be aligned so that
3355   // taking the difference between two stack areas will result in an aligned
3356   // stack.
3357   MinReservedArea =
3358       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3359   FuncInfo->setMinReservedArea(MinReservedArea);
3360 
3361   SmallVector<SDValue, 8> MemOps;
3362 
3363   // If the function takes variable number of arguments, make a frame index for
3364   // the start of the first vararg value... for expansion of llvm.va_start.
3365   if (isVarArg) {
3366     static const MCPhysReg GPArgRegs[] = {
3367       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3368       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3369     };
3370     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3371 
3372     static const MCPhysReg FPArgRegs[] = {
3373       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3374       PPC::F8
3375     };
3376     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3377 
3378     if (useSoftFloat())
3379        NumFPArgRegs = 0;
3380 
3381     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3382     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3383 
3384     // Make room for NumGPArgRegs and NumFPArgRegs.
3385     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3386                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3387 
3388     FuncInfo->setVarArgsStackOffset(
3389       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3390                             CCInfo.getNextStackOffset(), true));
3391 
3392     FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3393     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3394 
3395     // The fixed integer arguments of a variadic function are stored to the
3396     // VarArgsFrameIndex on the stack so that they may be loaded by
3397     // dereferencing the result of va_next.
3398     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3399       // Get an existing live-in vreg, or add a new one.
3400       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3401       if (!VReg)
3402         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3403 
3404       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3405       SDValue Store =
3406           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3407       MemOps.push_back(Store);
3408       // Increment the address by four for the next argument to store
3409       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3410       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3411     }
3412 
3413     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3414     // is set.
3415     // The double arguments are stored to the VarArgsFrameIndex
3416     // on the stack.
3417     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3418       // Get an existing live-in vreg, or add a new one.
3419       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3420       if (!VReg)
3421         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3422 
3423       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3424       SDValue Store =
3425           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3426       MemOps.push_back(Store);
3427       // Increment the address by eight for the next argument to store
3428       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3429                                          PtrVT);
3430       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3431     }
3432   }
3433 
3434   if (!MemOps.empty())
3435     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3436 
3437   return Chain;
3438 }
3439 
3440 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3441 // value to MVT::i64 and then truncate to the correct register size.
3442 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3443                                              EVT ObjectVT, SelectionDAG &DAG,
3444                                              SDValue ArgVal,
3445                                              const SDLoc &dl) const {
3446   if (Flags.isSExt())
3447     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3448                          DAG.getValueType(ObjectVT));
3449   else if (Flags.isZExt())
3450     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3451                          DAG.getValueType(ObjectVT));
3452 
3453   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3454 }
3455 
3456 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3457     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3458     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3459     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3460   // TODO: add description of PPC stack frame format, or at least some docs.
3461   //
3462   bool isELFv2ABI = Subtarget.isELFv2ABI();
3463   bool isLittleEndian = Subtarget.isLittleEndian();
3464   MachineFunction &MF = DAG.getMachineFunction();
3465   MachineFrameInfo &MFI = MF.getFrameInfo();
3466   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3467 
3468   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3469          "fastcc not supported on varargs functions");
3470 
3471   EVT PtrVT = getPointerTy(MF.getDataLayout());
3472   // Potential tail calls could cause overwriting of argument stack slots.
3473   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3474                        (CallConv == CallingConv::Fast));
3475   unsigned PtrByteSize = 8;
3476   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3477 
3478   static const MCPhysReg GPR[] = {
3479     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3480     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3481   };
3482   static const MCPhysReg VR[] = {
3483     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3484     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3485   };
3486 
3487   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3488   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3489   const unsigned Num_VR_Regs  = array_lengthof(VR);
3490   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3491 
3492   // Do a first pass over the arguments to determine whether the ABI
3493   // guarantees that our caller has allocated the parameter save area
3494   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3495   // in the ELFv2 ABI, it is true if this is a vararg function or if
3496   // any parameter is located in a stack slot.
3497 
3498   bool HasParameterArea = !isELFv2ABI || isVarArg;
3499   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3500   unsigned NumBytes = LinkageSize;
3501   unsigned AvailableFPRs = Num_FPR_Regs;
3502   unsigned AvailableVRs = Num_VR_Regs;
3503   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3504     if (Ins[i].Flags.isNest())
3505       continue;
3506 
3507     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3508                                PtrByteSize, LinkageSize, ParamAreaSize,
3509                                NumBytes, AvailableFPRs, AvailableVRs,
3510                                Subtarget.hasQPX()))
3511       HasParameterArea = true;
3512   }
3513 
3514   // Add DAG nodes to load the arguments or copy them out of registers.  On
3515   // entry to a function on PPC, the arguments start after the linkage area,
3516   // although the first ones are often in registers.
3517 
3518   unsigned ArgOffset = LinkageSize;
3519   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3520   unsigned &QFPR_idx = FPR_idx;
3521   SmallVector<SDValue, 8> MemOps;
3522   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3523   unsigned CurArgIdx = 0;
3524   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3525     SDValue ArgVal;
3526     bool needsLoad = false;
3527     EVT ObjectVT = Ins[ArgNo].VT;
3528     EVT OrigVT = Ins[ArgNo].ArgVT;
3529     unsigned ObjSize = ObjectVT.getStoreSize();
3530     unsigned ArgSize = ObjSize;
3531     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3532     if (Ins[ArgNo].isOrigArg()) {
3533       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3534       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3535     }
3536     // We re-align the argument offset for each argument, except when using the
3537     // fast calling convention, when we need to make sure we do that only when
3538     // we'll actually use a stack slot.
3539     unsigned CurArgOffset, Align;
3540     auto ComputeArgOffset = [&]() {
3541       /* Respect alignment of argument on the stack.  */
3542       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3543       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3544       CurArgOffset = ArgOffset;
3545     };
3546 
3547     if (CallConv != CallingConv::Fast) {
3548       ComputeArgOffset();
3549 
3550       /* Compute GPR index associated with argument offset.  */
3551       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3552       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3553     }
3554 
3555     // FIXME the codegen can be much improved in some cases.
3556     // We do not have to keep everything in memory.
3557     if (Flags.isByVal()) {
3558       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3559 
3560       if (CallConv == CallingConv::Fast)
3561         ComputeArgOffset();
3562 
3563       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3564       ObjSize = Flags.getByValSize();
3565       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3566       // Empty aggregate parameters do not take up registers.  Examples:
3567       //   struct { } a;
3568       //   union  { } b;
3569       //   int c[0];
3570       // etc.  However, we have to provide a place-holder in InVals, so
3571       // pretend we have an 8-byte item at the current address for that
3572       // purpose.
3573       if (!ObjSize) {
3574         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3575         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3576         InVals.push_back(FIN);
3577         continue;
3578       }
3579 
3580       // Create a stack object covering all stack doublewords occupied
3581       // by the argument.  If the argument is (fully or partially) on
3582       // the stack, or if the argument is fully in registers but the
3583       // caller has allocated the parameter save anyway, we can refer
3584       // directly to the caller's stack frame.  Otherwise, create a
3585       // local copy in our own frame.
3586       int FI;
3587       if (HasParameterArea ||
3588           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3589         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3590       else
3591         FI = MFI.CreateStackObject(ArgSize, Align, false);
3592       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3593 
3594       // Handle aggregates smaller than 8 bytes.
3595       if (ObjSize < PtrByteSize) {
3596         // The value of the object is its address, which differs from the
3597         // address of the enclosing doubleword on big-endian systems.
3598         SDValue Arg = FIN;
3599         if (!isLittleEndian) {
3600           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3601           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3602         }
3603         InVals.push_back(Arg);
3604 
3605         if (GPR_idx != Num_GPR_Regs) {
3606           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3607           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3608           SDValue Store;
3609 
3610           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3611             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3612                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
3613             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3614                                       MachinePointerInfo(&*FuncArg), ObjType);
3615           } else {
3616             // For sizes that don't fit a truncating store (3, 5, 6, 7),
3617             // store the whole register as-is to the parameter save area
3618             // slot.
3619             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3620                                  MachinePointerInfo(&*FuncArg));
3621           }
3622 
3623           MemOps.push_back(Store);
3624         }
3625         // Whether we copied from a register or not, advance the offset
3626         // into the parameter save area by a full doubleword.
3627         ArgOffset += PtrByteSize;
3628         continue;
3629       }
3630 
3631       // The value of the object is its address, which is the address of
3632       // its first stack doubleword.
3633       InVals.push_back(FIN);
3634 
3635       // Store whatever pieces of the object are in registers to memory.
3636       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3637         if (GPR_idx == Num_GPR_Regs)
3638           break;
3639 
3640         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3641         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3642         SDValue Addr = FIN;
3643         if (j) {
3644           SDValue Off = DAG.getConstant(j, dl, PtrVT);
3645           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3646         }
3647         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3648                                      MachinePointerInfo(&*FuncArg, j));
3649         MemOps.push_back(Store);
3650         ++GPR_idx;
3651       }
3652       ArgOffset += ArgSize;
3653       continue;
3654     }
3655 
3656     switch (ObjectVT.getSimpleVT().SimpleTy) {
3657     default: llvm_unreachable("Unhandled argument type!");
3658     case MVT::i1:
3659     case MVT::i32:
3660     case MVT::i64:
3661       if (Flags.isNest()) {
3662         // The 'nest' parameter, if any, is passed in R11.
3663         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3664         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3665 
3666         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3667           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3668 
3669         break;
3670       }
3671 
3672       // These can be scalar arguments or elements of an integer array type
3673       // passed directly.  Clang may use those instead of "byval" aggregate
3674       // types to avoid forcing arguments to memory unnecessarily.
3675       if (GPR_idx != Num_GPR_Regs) {
3676         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3677         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3678 
3679         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3680           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3681           // value to MVT::i64 and then truncate to the correct register size.
3682           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3683       } else {
3684         if (CallConv == CallingConv::Fast)
3685           ComputeArgOffset();
3686 
3687         needsLoad = true;
3688         ArgSize = PtrByteSize;
3689       }
3690       if (CallConv != CallingConv::Fast || needsLoad)
3691         ArgOffset += 8;
3692       break;
3693 
3694     case MVT::f32:
3695     case MVT::f64:
3696       // These can be scalar arguments or elements of a float array type
3697       // passed directly.  The latter are used to implement ELFv2 homogenous
3698       // float aggregates.
3699       if (FPR_idx != Num_FPR_Regs) {
3700         unsigned VReg;
3701 
3702         if (ObjectVT == MVT::f32)
3703           VReg = MF.addLiveIn(FPR[FPR_idx],
3704                               Subtarget.hasP8Vector()
3705                                   ? &PPC::VSSRCRegClass
3706                                   : &PPC::F4RCRegClass);
3707         else
3708           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3709                                                 ? &PPC::VSFRCRegClass
3710                                                 : &PPC::F8RCRegClass);
3711 
3712         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3713         ++FPR_idx;
3714       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3715         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3716         // once we support fp <-> gpr moves.
3717 
3718         // This can only ever happen in the presence of f32 array types,
3719         // since otherwise we never run out of FPRs before running out
3720         // of GPRs.
3721         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3722         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3723 
3724         if (ObjectVT == MVT::f32) {
3725           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3726             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3727                                  DAG.getConstant(32, dl, MVT::i32));
3728           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3729         }
3730 
3731         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3732       } else {
3733         if (CallConv == CallingConv::Fast)
3734           ComputeArgOffset();
3735 
3736         needsLoad = true;
3737       }
3738 
3739       // When passing an array of floats, the array occupies consecutive
3740       // space in the argument area; only round up to the next doubleword
3741       // at the end of the array.  Otherwise, each float takes 8 bytes.
3742       if (CallConv != CallingConv::Fast || needsLoad) {
3743         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3744         ArgOffset += ArgSize;
3745         if (Flags.isInConsecutiveRegsLast())
3746           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3747       }
3748       break;
3749     case MVT::v4f32:
3750     case MVT::v4i32:
3751     case MVT::v8i16:
3752     case MVT::v16i8:
3753     case MVT::v2f64:
3754     case MVT::v2i64:
3755     case MVT::v1i128:
3756       if (!Subtarget.hasQPX()) {
3757       // These can be scalar arguments or elements of a vector array type
3758       // passed directly.  The latter are used to implement ELFv2 homogenous
3759       // vector aggregates.
3760       if (VR_idx != Num_VR_Regs) {
3761         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3762         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3763         ++VR_idx;
3764       } else {
3765         if (CallConv == CallingConv::Fast)
3766           ComputeArgOffset();
3767 
3768         needsLoad = true;
3769       }
3770       if (CallConv != CallingConv::Fast || needsLoad)
3771         ArgOffset += 16;
3772       break;
3773       } // not QPX
3774 
3775       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3776              "Invalid QPX parameter type");
3777       /* fall through */
3778 
3779     case MVT::v4f64:
3780     case MVT::v4i1:
3781       // QPX vectors are treated like their scalar floating-point subregisters
3782       // (except that they're larger).
3783       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3784       if (QFPR_idx != Num_QFPR_Regs) {
3785         const TargetRegisterClass *RC;
3786         switch (ObjectVT.getSimpleVT().SimpleTy) {
3787         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3788         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3789         default:         RC = &PPC::QBRCRegClass; break;
3790         }
3791 
3792         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3793         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3794         ++QFPR_idx;
3795       } else {
3796         if (CallConv == CallingConv::Fast)
3797           ComputeArgOffset();
3798         needsLoad = true;
3799       }
3800       if (CallConv != CallingConv::Fast || needsLoad)
3801         ArgOffset += Sz;
3802       break;
3803     }
3804 
3805     // We need to load the argument to a virtual register if we determined
3806     // above that we ran out of physical registers of the appropriate type.
3807     if (needsLoad) {
3808       if (ObjSize < ArgSize && !isLittleEndian)
3809         CurArgOffset += ArgSize - ObjSize;
3810       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3811       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3812       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3813     }
3814 
3815     InVals.push_back(ArgVal);
3816   }
3817 
3818   // Area that is at least reserved in the caller of this function.
3819   unsigned MinReservedArea;
3820   if (HasParameterArea)
3821     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3822   else
3823     MinReservedArea = LinkageSize;
3824 
3825   // Set the size that is at least reserved in caller of this function.  Tail
3826   // call optimized functions' reserved stack space needs to be aligned so that
3827   // taking the difference between two stack areas will result in an aligned
3828   // stack.
3829   MinReservedArea =
3830       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3831   FuncInfo->setMinReservedArea(MinReservedArea);
3832 
3833   // If the function takes variable number of arguments, make a frame index for
3834   // the start of the first vararg value... for expansion of llvm.va_start.
3835   if (isVarArg) {
3836     int Depth = ArgOffset;
3837 
3838     FuncInfo->setVarArgsFrameIndex(
3839       MFI.CreateFixedObject(PtrByteSize, Depth, true));
3840     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3841 
3842     // If this function is vararg, store any remaining integer argument regs
3843     // to their spots on the stack so that they may be loaded by dereferencing
3844     // the result of va_next.
3845     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3846          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3847       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3848       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3849       SDValue Store =
3850           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3851       MemOps.push_back(Store);
3852       // Increment the address by four for the next argument to store
3853       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3854       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3855     }
3856   }
3857 
3858   if (!MemOps.empty())
3859     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3860 
3861   return Chain;
3862 }
3863 
3864 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
3865     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3866     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3867     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3868   // TODO: add description of PPC stack frame format, or at least some docs.
3869   //
3870   MachineFunction &MF = DAG.getMachineFunction();
3871   MachineFrameInfo &MFI = MF.getFrameInfo();
3872   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3873 
3874   EVT PtrVT = getPointerTy(MF.getDataLayout());
3875   bool isPPC64 = PtrVT == MVT::i64;
3876   // Potential tail calls could cause overwriting of argument stack slots.
3877   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3878                        (CallConv == CallingConv::Fast));
3879   unsigned PtrByteSize = isPPC64 ? 8 : 4;
3880   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3881   unsigned ArgOffset = LinkageSize;
3882   // Area that is at least reserved in caller of this function.
3883   unsigned MinReservedArea = ArgOffset;
3884 
3885   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
3886     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3887     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3888   };
3889   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
3890     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3891     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3892   };
3893   static const MCPhysReg VR[] = {
3894     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3895     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3896   };
3897 
3898   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3899   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3900   const unsigned Num_VR_Regs  = array_lengthof( VR);
3901 
3902   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3903 
3904   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3905 
3906   // In 32-bit non-varargs functions, the stack space for vectors is after the
3907   // stack space for non-vectors.  We do not use this space unless we have
3908   // too many vectors to fit in registers, something that only occurs in
3909   // constructed examples:), but we have to walk the arglist to figure
3910   // that out...for the pathological case, compute VecArgOffset as the
3911   // start of the vector parameter area.  Computing VecArgOffset is the
3912   // entire point of the following loop.
3913   unsigned VecArgOffset = ArgOffset;
3914   if (!isVarArg && !isPPC64) {
3915     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3916          ++ArgNo) {
3917       EVT ObjectVT = Ins[ArgNo].VT;
3918       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3919 
3920       if (Flags.isByVal()) {
3921         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3922         unsigned ObjSize = Flags.getByValSize();
3923         unsigned ArgSize =
3924                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3925         VecArgOffset += ArgSize;
3926         continue;
3927       }
3928 
3929       switch(ObjectVT.getSimpleVT().SimpleTy) {
3930       default: llvm_unreachable("Unhandled argument type!");
3931       case MVT::i1:
3932       case MVT::i32:
3933       case MVT::f32:
3934         VecArgOffset += 4;
3935         break;
3936       case MVT::i64:  // PPC64
3937       case MVT::f64:
3938         // FIXME: We are guaranteed to be !isPPC64 at this point.
3939         // Does MVT::i64 apply?
3940         VecArgOffset += 8;
3941         break;
3942       case MVT::v4f32:
3943       case MVT::v4i32:
3944       case MVT::v8i16:
3945       case MVT::v16i8:
3946         // Nothing to do, we're only looking at Nonvector args here.
3947         break;
3948       }
3949     }
3950   }
3951   // We've found where the vector parameter area in memory is.  Skip the
3952   // first 12 parameters; these don't use that memory.
3953   VecArgOffset = ((VecArgOffset+15)/16)*16;
3954   VecArgOffset += 12*16;
3955 
3956   // Add DAG nodes to load the arguments or copy them out of registers.  On
3957   // entry to a function on PPC, the arguments start after the linkage area,
3958   // although the first ones are often in registers.
3959 
3960   SmallVector<SDValue, 8> MemOps;
3961   unsigned nAltivecParamsAtEnd = 0;
3962   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3963   unsigned CurArgIdx = 0;
3964   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3965     SDValue ArgVal;
3966     bool needsLoad = false;
3967     EVT ObjectVT = Ins[ArgNo].VT;
3968     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3969     unsigned ArgSize = ObjSize;
3970     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3971     if (Ins[ArgNo].isOrigArg()) {
3972       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3973       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3974     }
3975     unsigned CurArgOffset = ArgOffset;
3976 
3977     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3978     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3979         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3980       if (isVarArg || isPPC64) {
3981         MinReservedArea = ((MinReservedArea+15)/16)*16;
3982         MinReservedArea += CalculateStackSlotSize(ObjectVT,
3983                                                   Flags,
3984                                                   PtrByteSize);
3985       } else  nAltivecParamsAtEnd++;
3986     } else
3987       // Calculate min reserved area.
3988       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3989                                                 Flags,
3990                                                 PtrByteSize);
3991 
3992     // FIXME the codegen can be much improved in some cases.
3993     // We do not have to keep everything in memory.
3994     if (Flags.isByVal()) {
3995       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3996 
3997       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3998       ObjSize = Flags.getByValSize();
3999       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4000       // Objects of size 1 and 2 are right justified, everything else is
4001       // left justified.  This means the memory address is adjusted forwards.
4002       if (ObjSize==1 || ObjSize==2) {
4003         CurArgOffset = CurArgOffset + (4 - ObjSize);
4004       }
4005       // The value of the object is its address.
4006       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4007       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4008       InVals.push_back(FIN);
4009       if (ObjSize==1 || ObjSize==2) {
4010         if (GPR_idx != Num_GPR_Regs) {
4011           unsigned VReg;
4012           if (isPPC64)
4013             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4014           else
4015             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4016           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4017           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4018           SDValue Store =
4019               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4020                                 MachinePointerInfo(&*FuncArg), ObjType);
4021           MemOps.push_back(Store);
4022           ++GPR_idx;
4023         }
4024 
4025         ArgOffset += PtrByteSize;
4026 
4027         continue;
4028       }
4029       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4030         // Store whatever pieces of the object are in registers
4031         // to memory.  ArgOffset will be the address of the beginning
4032         // of the object.
4033         if (GPR_idx != Num_GPR_Regs) {
4034           unsigned VReg;
4035           if (isPPC64)
4036             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4037           else
4038             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4039           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4040           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4041           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4042           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4043                                        MachinePointerInfo(&*FuncArg, j));
4044           MemOps.push_back(Store);
4045           ++GPR_idx;
4046           ArgOffset += PtrByteSize;
4047         } else {
4048           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4049           break;
4050         }
4051       }
4052       continue;
4053     }
4054 
4055     switch (ObjectVT.getSimpleVT().SimpleTy) {
4056     default: llvm_unreachable("Unhandled argument type!");
4057     case MVT::i1:
4058     case MVT::i32:
4059       if (!isPPC64) {
4060         if (GPR_idx != Num_GPR_Regs) {
4061           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4062           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4063 
4064           if (ObjectVT == MVT::i1)
4065             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4066 
4067           ++GPR_idx;
4068         } else {
4069           needsLoad = true;
4070           ArgSize = PtrByteSize;
4071         }
4072         // All int arguments reserve stack space in the Darwin ABI.
4073         ArgOffset += PtrByteSize;
4074         break;
4075       }
4076       LLVM_FALLTHROUGH;
4077     case MVT::i64:  // PPC64
4078       if (GPR_idx != Num_GPR_Regs) {
4079         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4080         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4081 
4082         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4083           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4084           // value to MVT::i64 and then truncate to the correct register size.
4085           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4086 
4087         ++GPR_idx;
4088       } else {
4089         needsLoad = true;
4090         ArgSize = PtrByteSize;
4091       }
4092       // All int arguments reserve stack space in the Darwin ABI.
4093       ArgOffset += 8;
4094       break;
4095 
4096     case MVT::f32:
4097     case MVT::f64:
4098       // Every 4 bytes of argument space consumes one of the GPRs available for
4099       // argument passing.
4100       if (GPR_idx != Num_GPR_Regs) {
4101         ++GPR_idx;
4102         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4103           ++GPR_idx;
4104       }
4105       if (FPR_idx != Num_FPR_Regs) {
4106         unsigned VReg;
4107 
4108         if (ObjectVT == MVT::f32)
4109           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4110         else
4111           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4112 
4113         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4114         ++FPR_idx;
4115       } else {
4116         needsLoad = true;
4117       }
4118 
4119       // All FP arguments reserve stack space in the Darwin ABI.
4120       ArgOffset += isPPC64 ? 8 : ObjSize;
4121       break;
4122     case MVT::v4f32:
4123     case MVT::v4i32:
4124     case MVT::v8i16:
4125     case MVT::v16i8:
4126       // Note that vector arguments in registers don't reserve stack space,
4127       // except in varargs functions.
4128       if (VR_idx != Num_VR_Regs) {
4129         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4130         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4131         if (isVarArg) {
4132           while ((ArgOffset % 16) != 0) {
4133             ArgOffset += PtrByteSize;
4134             if (GPR_idx != Num_GPR_Regs)
4135               GPR_idx++;
4136           }
4137           ArgOffset += 16;
4138           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4139         }
4140         ++VR_idx;
4141       } else {
4142         if (!isVarArg && !isPPC64) {
4143           // Vectors go after all the nonvectors.
4144           CurArgOffset = VecArgOffset;
4145           VecArgOffset += 16;
4146         } else {
4147           // Vectors are aligned.
4148           ArgOffset = ((ArgOffset+15)/16)*16;
4149           CurArgOffset = ArgOffset;
4150           ArgOffset += 16;
4151         }
4152         needsLoad = true;
4153       }
4154       break;
4155     }
4156 
4157     // We need to load the argument to a virtual register if we determined above
4158     // that we ran out of physical registers of the appropriate type.
4159     if (needsLoad) {
4160       int FI = MFI.CreateFixedObject(ObjSize,
4161                                      CurArgOffset + (ArgSize - ObjSize),
4162                                      isImmutable);
4163       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4164       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4165     }
4166 
4167     InVals.push_back(ArgVal);
4168   }
4169 
4170   // Allow for Altivec parameters at the end, if needed.
4171   if (nAltivecParamsAtEnd) {
4172     MinReservedArea = ((MinReservedArea+15)/16)*16;
4173     MinReservedArea += 16*nAltivecParamsAtEnd;
4174   }
4175 
4176   // Area that is at least reserved in the caller of this function.
4177   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4178 
4179   // Set the size that is at least reserved in caller of this function.  Tail
4180   // call optimized functions' reserved stack space needs to be aligned so that
4181   // taking the difference between two stack areas will result in an aligned
4182   // stack.
4183   MinReservedArea =
4184       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4185   FuncInfo->setMinReservedArea(MinReservedArea);
4186 
4187   // If the function takes variable number of arguments, make a frame index for
4188   // the start of the first vararg value... for expansion of llvm.va_start.
4189   if (isVarArg) {
4190     int Depth = ArgOffset;
4191 
4192     FuncInfo->setVarArgsFrameIndex(
4193       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4194                             Depth, true));
4195     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4196 
4197     // If this function is vararg, store any remaining integer argument regs
4198     // to their spots on the stack so that they may be loaded by dereferencing
4199     // the result of va_next.
4200     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4201       unsigned VReg;
4202 
4203       if (isPPC64)
4204         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4205       else
4206         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4207 
4208       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4209       SDValue Store =
4210           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4211       MemOps.push_back(Store);
4212       // Increment the address by four for the next argument to store
4213       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4214       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4215     }
4216   }
4217 
4218   if (!MemOps.empty())
4219     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4220 
4221   return Chain;
4222 }
4223 
4224 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4225 /// adjusted to accommodate the arguments for the tailcall.
4226 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4227                                    unsigned ParamSize) {
4228 
4229   if (!isTailCall) return 0;
4230 
4231   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4232   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4233   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4234   // Remember only if the new adjustement is bigger.
4235   if (SPDiff < FI->getTailCallSPDelta())
4236     FI->setTailCallSPDelta(SPDiff);
4237 
4238   return SPDiff;
4239 }
4240 
4241 static bool isFunctionGlobalAddress(SDValue Callee);
4242 
4243 static bool
4244 resideInSameSection(const Function *Caller, SDValue Callee,
4245                     const TargetMachine &TM) {
4246   // If !G, Callee can be an external symbol.
4247   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4248   if (!G)
4249     return false;
4250 
4251   const GlobalValue *GV = G->getGlobal();
4252   if (!GV->isStrongDefinitionForLinker())
4253     return false;
4254 
4255   // Any explicitly-specified sections and section prefixes must also match.
4256   // Also, if we're using -ffunction-sections, then each function is always in
4257   // a different section (the same is true for COMDAT functions).
4258   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4259       GV->getSection() != Caller->getSection())
4260     return false;
4261   if (const auto *F = dyn_cast<Function>(GV)) {
4262     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4263       return false;
4264   }
4265 
4266   // If the callee might be interposed, then we can't assume the ultimate call
4267   // target will be in the same section. Even in cases where we can assume that
4268   // interposition won't happen, in any case where the linker might insert a
4269   // stub to allow for interposition, we must generate code as though
4270   // interposition might occur. To understand why this matters, consider a
4271   // situation where: a -> b -> c where the arrows indicate calls. b and c are
4272   // in the same section, but a is in a different module (i.e. has a different
4273   // TOC base pointer). If the linker allows for interposition between b and c,
4274   // then it will generate a stub for the call edge between b and c which will
4275   // save the TOC pointer into the designated stack slot allocated by b. If we
4276   // return true here, and therefore allow a tail call between b and c, that
4277   // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4278   // pointer into the stack slot allocated by a (where the a -> b stub saved
4279   // a's TOC base pointer). If we're not considering a tail call, but rather,
4280   // whether a nop is needed after the call instruction in b, because the linker
4281   // will insert a stub, it might complain about a missing nop if we omit it
4282   // (although many don't complain in this case).
4283   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4284     return false;
4285 
4286   return true;
4287 }
4288 
4289 static bool
4290 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4291                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4292   assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
4293 
4294   const unsigned PtrByteSize = 8;
4295   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4296 
4297   static const MCPhysReg GPR[] = {
4298     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4299     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4300   };
4301   static const MCPhysReg VR[] = {
4302     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4303     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4304   };
4305 
4306   const unsigned NumGPRs = array_lengthof(GPR);
4307   const unsigned NumFPRs = 13;
4308   const unsigned NumVRs = array_lengthof(VR);
4309   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4310 
4311   unsigned NumBytes = LinkageSize;
4312   unsigned AvailableFPRs = NumFPRs;
4313   unsigned AvailableVRs = NumVRs;
4314 
4315   for (const ISD::OutputArg& Param : Outs) {
4316     if (Param.Flags.isNest()) continue;
4317 
4318     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4319                                PtrByteSize, LinkageSize, ParamAreaSize,
4320                                NumBytes, AvailableFPRs, AvailableVRs,
4321                                Subtarget.hasQPX()))
4322       return true;
4323   }
4324   return false;
4325 }
4326 
4327 static bool
4328 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
4329   if (CS->arg_size() != CallerFn->arg_size())
4330     return false;
4331 
4332   ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
4333   ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end();
4334   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4335 
4336   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4337     const Value* CalleeArg = *CalleeArgIter;
4338     const Value* CallerArg = &(*CallerArgIter);
4339     if (CalleeArg == CallerArg)
4340       continue;
4341 
4342     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4343     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4344     //      }
4345     // 1st argument of callee is undef and has the same type as caller.
4346     if (CalleeArg->getType() == CallerArg->getType() &&
4347         isa<UndefValue>(CalleeArg))
4348       continue;
4349 
4350     return false;
4351   }
4352 
4353   return true;
4354 }
4355 
4356 bool
4357 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4358                                     SDValue Callee,
4359                                     CallingConv::ID CalleeCC,
4360                                     ImmutableCallSite *CS,
4361                                     bool isVarArg,
4362                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4363                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4364                                     SelectionDAG& DAG) const {
4365   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4366 
4367   if (DisableSCO && !TailCallOpt) return false;
4368 
4369   // Variadic argument functions are not supported.
4370   if (isVarArg) return false;
4371 
4372   MachineFunction &MF = DAG.getMachineFunction();
4373   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4374 
4375   // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
4376   // the same calling convention
4377   if (CallerCC != CalleeCC) return false;
4378 
4379   // SCO support C calling convention
4380   if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
4381     return false;
4382 
4383   // Caller contains any byval parameter is not supported.
4384   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4385     return false;
4386 
4387   // Callee contains any byval parameter is not supported, too.
4388   // Note: This is a quick work around, because in some cases, e.g.
4389   // caller's stack size > callee's stack size, we are still able to apply
4390   // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
4391   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4392     return false;
4393 
4394   // No TCO/SCO on indirect call because Caller have to restore its TOC
4395   if (!isFunctionGlobalAddress(Callee) &&
4396       !isa<ExternalSymbolSDNode>(Callee))
4397     return false;
4398 
4399   // Check if Callee resides in the same section, because for now, PPC64 SVR4
4400   // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
4401   // section.
4402   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4403   if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
4404     return false;
4405 
4406   // TCO allows altering callee ABI, so we don't have to check further.
4407   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4408     return true;
4409 
4410   if (DisableSCO) return false;
4411 
4412   // If callee use the same argument list that caller is using, then we can
4413   // apply SCO on this case. If it is not, then we need to check if callee needs
4414   // stack for passing arguments.
4415   if (!hasSameArgumentList(MF.getFunction(), CS) &&
4416       needStackSlotPassParameters(Subtarget, Outs)) {
4417     return false;
4418   }
4419 
4420   return true;
4421 }
4422 
4423 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4424 /// for tail call optimization. Targets which want to do tail call
4425 /// optimization should implement this function.
4426 bool
4427 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4428                                                      CallingConv::ID CalleeCC,
4429                                                      bool isVarArg,
4430                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4431                                                      SelectionDAG& DAG) const {
4432   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4433     return false;
4434 
4435   // Variable argument functions are not supported.
4436   if (isVarArg)
4437     return false;
4438 
4439   MachineFunction &MF = DAG.getMachineFunction();
4440   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4441   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4442     // Functions containing by val parameters are not supported.
4443     for (unsigned i = 0; i != Ins.size(); i++) {
4444        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4445        if (Flags.isByVal()) return false;
4446     }
4447 
4448     // Non-PIC/GOT tail calls are supported.
4449     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4450       return true;
4451 
4452     // At the moment we can only do local tail calls (in same module, hidden
4453     // or protected) if we are generating PIC.
4454     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4455       return G->getGlobal()->hasHiddenVisibility()
4456           || G->getGlobal()->hasProtectedVisibility();
4457   }
4458 
4459   return false;
4460 }
4461 
4462 /// isCallCompatibleAddress - Return the immediate to use if the specified
4463 /// 32-bit value is representable in the immediate field of a BxA instruction.
4464 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4465   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4466   if (!C) return nullptr;
4467 
4468   int Addr = C->getZExtValue();
4469   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4470       SignExtend32<26>(Addr) != Addr)
4471     return nullptr;  // Top 6 bits have to be sext of immediate.
4472 
4473   return DAG
4474       .getConstant(
4475           (int)C->getZExtValue() >> 2, SDLoc(Op),
4476           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4477       .getNode();
4478 }
4479 
4480 namespace {
4481 
4482 struct TailCallArgumentInfo {
4483   SDValue Arg;
4484   SDValue FrameIdxOp;
4485   int FrameIdx = 0;
4486 
4487   TailCallArgumentInfo() = default;
4488 };
4489 
4490 } // end anonymous namespace
4491 
4492 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4493 static void StoreTailCallArgumentsToStackSlot(
4494     SelectionDAG &DAG, SDValue Chain,
4495     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4496     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4497   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4498     SDValue Arg = TailCallArgs[i].Arg;
4499     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4500     int FI = TailCallArgs[i].FrameIdx;
4501     // Store relative to framepointer.
4502     MemOpChains.push_back(DAG.getStore(
4503         Chain, dl, Arg, FIN,
4504         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4505   }
4506 }
4507 
4508 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4509 /// the appropriate stack slot for the tail call optimized function call.
4510 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4511                                              SDValue OldRetAddr, SDValue OldFP,
4512                                              int SPDiff, const SDLoc &dl) {
4513   if (SPDiff) {
4514     // Calculate the new stack slot for the return address.
4515     MachineFunction &MF = DAG.getMachineFunction();
4516     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4517     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4518     bool isPPC64 = Subtarget.isPPC64();
4519     int SlotSize = isPPC64 ? 8 : 4;
4520     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4521     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4522                                                          NewRetAddrLoc, true);
4523     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4524     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4525     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4526                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4527 
4528     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4529     // slot as the FP is never overwritten.
4530     if (Subtarget.isDarwinABI()) {
4531       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4532       int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4533                                                          true);
4534       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4535       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4536                            MachinePointerInfo::getFixedStack(
4537                                DAG.getMachineFunction(), NewFPIdx));
4538     }
4539   }
4540   return Chain;
4541 }
4542 
4543 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4544 /// the position of the argument.
4545 static void
4546 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4547                          SDValue Arg, int SPDiff, unsigned ArgOffset,
4548                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4549   int Offset = ArgOffset + SPDiff;
4550   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4551   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4552   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4553   SDValue FIN = DAG.getFrameIndex(FI, VT);
4554   TailCallArgumentInfo Info;
4555   Info.Arg = Arg;
4556   Info.FrameIdxOp = FIN;
4557   Info.FrameIdx = FI;
4558   TailCallArguments.push_back(Info);
4559 }
4560 
4561 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4562 /// stack slot. Returns the chain as result and the loaded frame pointers in
4563 /// LROpOut/FPOpout. Used when tail calling.
4564 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4565     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4566     SDValue &FPOpOut, const SDLoc &dl) const {
4567   if (SPDiff) {
4568     // Load the LR and FP stack slot for later adjusting.
4569     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4570     LROpOut = getReturnAddrFrameIndex(DAG);
4571     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4572     Chain = SDValue(LROpOut.getNode(), 1);
4573 
4574     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4575     // slot as the FP is never overwritten.
4576     if (Subtarget.isDarwinABI()) {
4577       FPOpOut = getFramePointerFrameIndex(DAG);
4578       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4579       Chain = SDValue(FPOpOut.getNode(), 1);
4580     }
4581   }
4582   return Chain;
4583 }
4584 
4585 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4586 /// by "Src" to address "Dst" of size "Size".  Alignment information is
4587 /// specified by the specific parameter attribute. The copy will be passed as
4588 /// a byval function parameter.
4589 /// Sometimes what we are copying is the end of a larger object, the part that
4590 /// does not fit in registers.
4591 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4592                                          SDValue Chain, ISD::ArgFlagsTy Flags,
4593                                          SelectionDAG &DAG, const SDLoc &dl) {
4594   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4595   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4596                        false, false, false, MachinePointerInfo(),
4597                        MachinePointerInfo());
4598 }
4599 
4600 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4601 /// tail calls.
4602 static void LowerMemOpCallTo(
4603     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4604     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4605     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4606     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4607   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4608   if (!isTailCall) {
4609     if (isVector) {
4610       SDValue StackPtr;
4611       if (isPPC64)
4612         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4613       else
4614         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4615       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4616                            DAG.getConstant(ArgOffset, dl, PtrVT));
4617     }
4618     MemOpChains.push_back(
4619         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4620     // Calculate and remember argument location.
4621   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4622                                   TailCallArguments);
4623 }
4624 
4625 static void
4626 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4627                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4628                 SDValue FPOp,
4629                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4630   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4631   // might overwrite each other in case of tail call optimization.
4632   SmallVector<SDValue, 8> MemOpChains2;
4633   // Do not flag preceding copytoreg stuff together with the following stuff.
4634   InFlag = SDValue();
4635   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4636                                     MemOpChains2, dl);
4637   if (!MemOpChains2.empty())
4638     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4639 
4640   // Store the return address to the appropriate stack slot.
4641   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4642 
4643   // Emit callseq_end just before tailcall node.
4644   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4645                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4646   InFlag = Chain.getValue(1);
4647 }
4648 
4649 // Is this global address that of a function that can be called by name? (as
4650 // opposed to something that must hold a descriptor for an indirect call).
4651 static bool isFunctionGlobalAddress(SDValue Callee) {
4652   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4653     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4654         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4655       return false;
4656 
4657     return G->getGlobal()->getValueType()->isFunctionTy();
4658   }
4659 
4660   return false;
4661 }
4662 
4663 static unsigned
4664 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4665             SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4666             bool isPatchPoint, bool hasNest,
4667             SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4668             SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4669             ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
4670   bool isPPC64 = Subtarget.isPPC64();
4671   bool isSVR4ABI = Subtarget.isSVR4ABI();
4672   bool isELFv2ABI = Subtarget.isELFv2ABI();
4673 
4674   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4675   NodeTys.push_back(MVT::Other);   // Returns a chain
4676   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4677 
4678   unsigned CallOpc = PPCISD::CALL;
4679 
4680   bool needIndirectCall = true;
4681   if (!isSVR4ABI || !isPPC64)
4682     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4683       // If this is an absolute destination address, use the munged value.
4684       Callee = SDValue(Dest, 0);
4685       needIndirectCall = false;
4686     }
4687 
4688   // PC-relative references to external symbols should go through $stub, unless
4689   // we're building with the leopard linker or later, which automatically
4690   // synthesizes these stubs.
4691   const TargetMachine &TM = DAG.getTarget();
4692   const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
4693   const GlobalValue *GV = nullptr;
4694   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4695     GV = G->getGlobal();
4696   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4697   bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4698 
4699   if (isFunctionGlobalAddress(Callee)) {
4700     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4701     // A call to a TLS address is actually an indirect call to a
4702     // thread-specific pointer.
4703     unsigned OpFlags = 0;
4704     if (UsePlt)
4705       OpFlags = PPCII::MO_PLT;
4706 
4707     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4708     // every direct call is) turn it into a TargetGlobalAddress /
4709     // TargetExternalSymbol node so that legalize doesn't hack it.
4710     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4711                                         Callee.getValueType(), 0, OpFlags);
4712     needIndirectCall = false;
4713   }
4714 
4715   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4716     unsigned char OpFlags = 0;
4717 
4718     if (UsePlt)
4719       OpFlags = PPCII::MO_PLT;
4720 
4721     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4722                                          OpFlags);
4723     needIndirectCall = false;
4724   }
4725 
4726   if (isPatchPoint) {
4727     // We'll form an invalid direct call when lowering a patchpoint; the full
4728     // sequence for an indirect call is complicated, and many of the
4729     // instructions introduced might have side effects (and, thus, can't be
4730     // removed later). The call itself will be removed as soon as the
4731     // argument/return lowering is complete, so the fact that it has the wrong
4732     // kind of operands should not really matter.
4733     needIndirectCall = false;
4734   }
4735 
4736   if (needIndirectCall) {
4737     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4738     // to do the call, we can't use PPCISD::CALL.
4739     SDValue MTCTROps[] = {Chain, Callee, InFlag};
4740 
4741     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4742       // Function pointers in the 64-bit SVR4 ABI do not point to the function
4743       // entry point, but to the function descriptor (the function entry point
4744       // address is part of the function descriptor though).
4745       // The function descriptor is a three doubleword structure with the
4746       // following fields: function entry point, TOC base address and
4747       // environment pointer.
4748       // Thus for a call through a function pointer, the following actions need
4749       // to be performed:
4750       //   1. Save the TOC of the caller in the TOC save area of its stack
4751       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4752       //   2. Load the address of the function entry point from the function
4753       //      descriptor.
4754       //   3. Load the TOC of the callee from the function descriptor into r2.
4755       //   4. Load the environment pointer from the function descriptor into
4756       //      r11.
4757       //   5. Branch to the function entry point address.
4758       //   6. On return of the callee, the TOC of the caller needs to be
4759       //      restored (this is done in FinishCall()).
4760       //
4761       // The loads are scheduled at the beginning of the call sequence, and the
4762       // register copies are flagged together to ensure that no other
4763       // operations can be scheduled in between. E.g. without flagging the
4764       // copies together, a TOC access in the caller could be scheduled between
4765       // the assignment of the callee TOC and the branch to the callee, which
4766       // results in the TOC access going through the TOC of the callee instead
4767       // of going through the TOC of the caller, which leads to incorrect code.
4768 
4769       // Load the address of the function entry point from the function
4770       // descriptor.
4771       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4772       if (LDChain.getValueType() == MVT::Glue)
4773         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4774 
4775       auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
4776                           ? (MachineMemOperand::MODereferenceable |
4777                              MachineMemOperand::MOInvariant)
4778                           : MachineMemOperand::MONone;
4779 
4780       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4781       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4782                                         /* Alignment = */ 8, MMOFlags);
4783 
4784       // Load environment pointer into r11.
4785       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4786       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4787       SDValue LoadEnvPtr =
4788           DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
4789                       /* Alignment = */ 8, MMOFlags);
4790 
4791       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4792       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4793       SDValue TOCPtr =
4794           DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
4795                       /* Alignment = */ 8, MMOFlags);
4796 
4797       setUsesTOCBasePtr(DAG);
4798       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4799                                         InFlag);
4800       Chain = TOCVal.getValue(0);
4801       InFlag = TOCVal.getValue(1);
4802 
4803       // If the function call has an explicit 'nest' parameter, it takes the
4804       // place of the environment pointer.
4805       if (!hasNest) {
4806         SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4807                                           InFlag);
4808 
4809         Chain = EnvVal.getValue(0);
4810         InFlag = EnvVal.getValue(1);
4811       }
4812 
4813       MTCTROps[0] = Chain;
4814       MTCTROps[1] = LoadFuncPtr;
4815       MTCTROps[2] = InFlag;
4816     }
4817 
4818     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4819                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4820     InFlag = Chain.getValue(1);
4821 
4822     NodeTys.clear();
4823     NodeTys.push_back(MVT::Other);
4824     NodeTys.push_back(MVT::Glue);
4825     Ops.push_back(Chain);
4826     CallOpc = PPCISD::BCTRL;
4827     Callee.setNode(nullptr);
4828     // Add use of X11 (holding environment pointer)
4829     if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
4830       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4831     // Add CTR register as callee so a bctr can be emitted later.
4832     if (isTailCall)
4833       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4834   }
4835 
4836   // If this is a direct call, pass the chain and the callee.
4837   if (Callee.getNode()) {
4838     Ops.push_back(Chain);
4839     Ops.push_back(Callee);
4840   }
4841   // If this is a tail call add stack pointer delta.
4842   if (isTailCall)
4843     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4844 
4845   // Add argument registers to the end of the list so that they are known live
4846   // into the call.
4847   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4848     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4849                                   RegsToPass[i].second.getValueType()));
4850 
4851   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4852   // into the call.
4853   if (isSVR4ABI && isPPC64 && !isPatchPoint) {
4854     setUsesTOCBasePtr(DAG);
4855     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4856   }
4857 
4858   return CallOpc;
4859 }
4860 
4861 SDValue PPCTargetLowering::LowerCallResult(
4862     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4863     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4864     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4865   SmallVector<CCValAssign, 16> RVLocs;
4866   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4867                     *DAG.getContext());
4868   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4869 
4870   // Copy all of the result registers out of their specified physreg.
4871   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4872     CCValAssign &VA = RVLocs[i];
4873     assert(VA.isRegLoc() && "Can only return in registers!");
4874 
4875     SDValue Val = DAG.getCopyFromReg(Chain, dl,
4876                                      VA.getLocReg(), VA.getLocVT(), InFlag);
4877     Chain = Val.getValue(1);
4878     InFlag = Val.getValue(2);
4879 
4880     switch (VA.getLocInfo()) {
4881     default: llvm_unreachable("Unknown loc info!");
4882     case CCValAssign::Full: break;
4883     case CCValAssign::AExt:
4884       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4885       break;
4886     case CCValAssign::ZExt:
4887       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4888                         DAG.getValueType(VA.getValVT()));
4889       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4890       break;
4891     case CCValAssign::SExt:
4892       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4893                         DAG.getValueType(VA.getValVT()));
4894       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4895       break;
4896     }
4897 
4898     InVals.push_back(Val);
4899   }
4900 
4901   return Chain;
4902 }
4903 
4904 SDValue PPCTargetLowering::FinishCall(
4905     CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
4906     bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
4907     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
4908     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
4909     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
4910     SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
4911   std::vector<EVT> NodeTys;
4912   SmallVector<SDValue, 8> Ops;
4913   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4914                                  SPDiff, isTailCall, isPatchPoint, hasNest,
4915                                  RegsToPass, Ops, NodeTys, CS, Subtarget);
4916 
4917   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4918   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4919     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4920 
4921   // When performing tail call optimization the callee pops its arguments off
4922   // the stack. Account for this here so these bytes can be pushed back on in
4923   // PPCFrameLowering::eliminateCallFramePseudoInstr.
4924   int BytesCalleePops =
4925     (CallConv == CallingConv::Fast &&
4926      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
4927 
4928   // Add a register mask operand representing the call-preserved registers.
4929   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4930   const uint32_t *Mask =
4931       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4932   assert(Mask && "Missing call preserved mask for calling convention");
4933   Ops.push_back(DAG.getRegisterMask(Mask));
4934 
4935   if (InFlag.getNode())
4936     Ops.push_back(InFlag);
4937 
4938   // Emit tail call.
4939   if (isTailCall) {
4940     assert(((Callee.getOpcode() == ISD::Register &&
4941              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4942             Callee.getOpcode() == ISD::TargetExternalSymbol ||
4943             Callee.getOpcode() == ISD::TargetGlobalAddress ||
4944             isa<ConstantSDNode>(Callee)) &&
4945     "Expecting an global address, external symbol, absolute value or register");
4946 
4947     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
4948     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4949   }
4950 
4951   // Add a NOP immediately after the branch instruction when using the 64-bit
4952   // SVR4 ABI. At link time, if caller and callee are in a different module and
4953   // thus have a different TOC, the call will be replaced with a call to a stub
4954   // function which saves the current TOC, loads the TOC of the callee and
4955   // branches to the callee. The NOP will be replaced with a load instruction
4956   // which restores the TOC of the caller from the TOC save slot of the current
4957   // stack frame. If caller and callee belong to the same module (and have the
4958   // same TOC), the NOP will remain unchanged.
4959 
4960   MachineFunction &MF = DAG.getMachineFunction();
4961   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4962       !isPatchPoint) {
4963     if (CallOpc == PPCISD::BCTRL) {
4964       // This is a call through a function pointer.
4965       // Restore the caller TOC from the save area into R2.
4966       // See PrepareCall() for more information about calls through function
4967       // pointers in the 64-bit SVR4 ABI.
4968       // We are using a target-specific load with r2 hard coded, because the
4969       // result of a target-independent load would never go directly into r2,
4970       // since r2 is a reserved register (which prevents the register allocator
4971       // from allocating it), resulting in an additional register being
4972       // allocated and an unnecessary move instruction being generated.
4973       CallOpc = PPCISD::BCTRL_LOAD_TOC;
4974 
4975       EVT PtrVT = getPointerTy(DAG.getDataLayout());
4976       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4977       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4978       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
4979       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4980 
4981       // The address needs to go after the chain input but before the flag (or
4982       // any other variadic arguments).
4983       Ops.insert(std::next(Ops.begin()), AddTOC);
4984     } else if (CallOpc == PPCISD::CALL &&
4985       !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
4986       // Otherwise insert NOP for non-local calls.
4987       CallOpc = PPCISD::CALL_NOP;
4988     }
4989   }
4990 
4991   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
4992   InFlag = Chain.getValue(1);
4993 
4994   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4995                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
4996                              InFlag, dl);
4997   if (!Ins.empty())
4998     InFlag = Chain.getValue(1);
4999 
5000   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5001                          Ins, dl, DAG, InVals);
5002 }
5003 
5004 SDValue
5005 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5006                              SmallVectorImpl<SDValue> &InVals) const {
5007   SelectionDAG &DAG                     = CLI.DAG;
5008   SDLoc &dl                             = CLI.DL;
5009   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5010   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5011   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5012   SDValue Chain                         = CLI.Chain;
5013   SDValue Callee                        = CLI.Callee;
5014   bool &isTailCall                      = CLI.IsTailCall;
5015   CallingConv::ID CallConv              = CLI.CallConv;
5016   bool isVarArg                         = CLI.IsVarArg;
5017   bool isPatchPoint                     = CLI.IsPatchPoint;
5018   ImmutableCallSite *CS                 = CLI.CS;
5019 
5020   if (isTailCall) {
5021     if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall()))
5022       isTailCall = false;
5023     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5024       isTailCall =
5025         IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5026                                                  isVarArg, Outs, Ins, DAG);
5027     else
5028       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5029                                                      Ins, DAG);
5030     if (isTailCall) {
5031       ++NumTailCalls;
5032       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5033         ++NumSiblingCalls;
5034 
5035       assert(isa<GlobalAddressSDNode>(Callee) &&
5036              "Callee should be an llvm::Function object.");
5037       DEBUG(
5038         const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5039         const unsigned Width = 80 - strlen("TCO caller: ")
5040                                   - strlen(", callee linkage: 0, 0");
5041         dbgs() << "TCO caller: "
5042                << left_justify(DAG.getMachineFunction().getName(), Width)
5043                << ", callee linkage: "
5044                << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
5045       );
5046     }
5047   }
5048 
5049   if (!isTailCall && CS && CS->isMustTailCall())
5050     report_fatal_error("failed to perform tail call elimination on a call "
5051                        "site marked musttail");
5052 
5053   // When long calls (i.e. indirect calls) are always used, calls are always
5054   // made via function pointer. If we have a function name, first translate it
5055   // into a pointer.
5056   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5057       !isTailCall)
5058     Callee = LowerGlobalAddress(Callee, DAG);
5059 
5060   if (Subtarget.isSVR4ABI()) {
5061     if (Subtarget.isPPC64())
5062       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5063                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
5064                               dl, DAG, InVals, CS);
5065     else
5066       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5067                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
5068                               dl, DAG, InVals, CS);
5069   }
5070 
5071   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5072                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
5073                           dl, DAG, InVals, CS);
5074 }
5075 
5076 SDValue PPCTargetLowering::LowerCall_32SVR4(
5077     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5078     bool isTailCall, bool isPatchPoint,
5079     const SmallVectorImpl<ISD::OutputArg> &Outs,
5080     const SmallVectorImpl<SDValue> &OutVals,
5081     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5082     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5083     ImmutableCallSite *CS) const {
5084   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5085   // of the 32-bit SVR4 ABI stack frame layout.
5086 
5087   assert((CallConv == CallingConv::C ||
5088           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5089 
5090   unsigned PtrByteSize = 4;
5091 
5092   MachineFunction &MF = DAG.getMachineFunction();
5093 
5094   // Mark this function as potentially containing a function that contains a
5095   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5096   // and restoring the callers stack pointer in this functions epilog. This is
5097   // done because by tail calling the called function might overwrite the value
5098   // in this function's (MF) stack pointer stack slot 0(SP).
5099   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5100       CallConv == CallingConv::Fast)
5101     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5102 
5103   // Count how many bytes are to be pushed on the stack, including the linkage
5104   // area, parameter list area and the part of the local variable space which
5105   // contains copies of aggregates which are passed by value.
5106 
5107   // Assign locations to all of the outgoing arguments.
5108   SmallVector<CCValAssign, 16> ArgLocs;
5109   PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5110 
5111   // Reserve space for the linkage area on the stack.
5112   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5113                        PtrByteSize);
5114   if (useSoftFloat())
5115     CCInfo.PreAnalyzeCallOperands(Outs);
5116 
5117   if (isVarArg) {
5118     // Handle fixed and variable vector arguments differently.
5119     // Fixed vector arguments go into registers as long as registers are
5120     // available. Variable vector arguments always go into memory.
5121     unsigned NumArgs = Outs.size();
5122 
5123     for (unsigned i = 0; i != NumArgs; ++i) {
5124       MVT ArgVT = Outs[i].VT;
5125       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5126       bool Result;
5127 
5128       if (Outs[i].IsFixed) {
5129         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5130                                CCInfo);
5131       } else {
5132         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5133                                       ArgFlags, CCInfo);
5134       }
5135 
5136       if (Result) {
5137 #ifndef NDEBUG
5138         errs() << "Call operand #" << i << " has unhandled type "
5139              << EVT(ArgVT).getEVTString() << "\n";
5140 #endif
5141         llvm_unreachable(nullptr);
5142       }
5143     }
5144   } else {
5145     // All arguments are treated the same.
5146     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5147   }
5148   CCInfo.clearWasPPCF128();
5149 
5150   // Assign locations to all of the outgoing aggregate by value arguments.
5151   SmallVector<CCValAssign, 16> ByValArgLocs;
5152   CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5153 
5154   // Reserve stack space for the allocations in CCInfo.
5155   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5156 
5157   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5158 
5159   // Size of the linkage area, parameter list area and the part of the local
5160   // space variable where copies of aggregates which are passed by value are
5161   // stored.
5162   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5163 
5164   // Calculate by how many bytes the stack has to be adjusted in case of tail
5165   // call optimization.
5166   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5167 
5168   // Adjust the stack pointer for the new arguments...
5169   // These operations are automatically eliminated by the prolog/epilog pass
5170   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5171   SDValue CallSeqStart = Chain;
5172 
5173   // Load the return address and frame pointer so it can be moved somewhere else
5174   // later.
5175   SDValue LROp, FPOp;
5176   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5177 
5178   // Set up a copy of the stack pointer for use loading and storing any
5179   // arguments that may not fit in the registers available for argument
5180   // passing.
5181   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5182 
5183   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5184   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5185   SmallVector<SDValue, 8> MemOpChains;
5186 
5187   bool seenFloatArg = false;
5188   // Walk the register/memloc assignments, inserting copies/loads.
5189   for (unsigned i = 0, j = 0, e = ArgLocs.size();
5190        i != e;
5191        ++i) {
5192     CCValAssign &VA = ArgLocs[i];
5193     SDValue Arg = OutVals[i];
5194     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5195 
5196     if (Flags.isByVal()) {
5197       // Argument is an aggregate which is passed by value, thus we need to
5198       // create a copy of it in the local variable space of the current stack
5199       // frame (which is the stack frame of the caller) and pass the address of
5200       // this copy to the callee.
5201       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5202       CCValAssign &ByValVA = ByValArgLocs[j++];
5203       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5204 
5205       // Memory reserved in the local variable space of the callers stack frame.
5206       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5207 
5208       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5209       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5210                            StackPtr, PtrOff);
5211 
5212       // Create a copy of the argument in the local area of the current
5213       // stack frame.
5214       SDValue MemcpyCall =
5215         CreateCopyOfByValArgument(Arg, PtrOff,
5216                                   CallSeqStart.getNode()->getOperand(0),
5217                                   Flags, DAG, dl);
5218 
5219       // This must go outside the CALLSEQ_START..END.
5220       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5221                                                      SDLoc(MemcpyCall));
5222       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5223                              NewCallSeqStart.getNode());
5224       Chain = CallSeqStart = NewCallSeqStart;
5225 
5226       // Pass the address of the aggregate copy on the stack either in a
5227       // physical register or in the parameter list area of the current stack
5228       // frame to the callee.
5229       Arg = PtrOff;
5230     }
5231 
5232     if (VA.isRegLoc()) {
5233       if (Arg.getValueType() == MVT::i1)
5234         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
5235 
5236       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5237       // Put argument in a physical register.
5238       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5239     } else {
5240       // Put argument in the parameter list area of the current stack frame.
5241       assert(VA.isMemLoc());
5242       unsigned LocMemOffset = VA.getLocMemOffset();
5243 
5244       if (!isTailCall) {
5245         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5246         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5247                              StackPtr, PtrOff);
5248 
5249         MemOpChains.push_back(
5250             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5251       } else {
5252         // Calculate and remember argument location.
5253         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5254                                  TailCallArguments);
5255       }
5256     }
5257   }
5258 
5259   if (!MemOpChains.empty())
5260     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5261 
5262   // Build a sequence of copy-to-reg nodes chained together with token chain
5263   // and flag operands which copy the outgoing args into the appropriate regs.
5264   SDValue InFlag;
5265   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5266     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5267                              RegsToPass[i].second, InFlag);
5268     InFlag = Chain.getValue(1);
5269   }
5270 
5271   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5272   // registers.
5273   if (isVarArg) {
5274     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5275     SDValue Ops[] = { Chain, InFlag };
5276 
5277     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5278                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5279 
5280     InFlag = Chain.getValue(1);
5281   }
5282 
5283   if (isTailCall)
5284     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5285                     TailCallArguments);
5286 
5287   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5288                     /* unused except on PPC64 ELFv1 */ false, DAG,
5289                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5290                     NumBytes, Ins, InVals, CS);
5291 }
5292 
5293 // Copy an argument into memory, being careful to do this outside the
5294 // call sequence for the call to which the argument belongs.
5295 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5296     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5297     SelectionDAG &DAG, const SDLoc &dl) const {
5298   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5299                         CallSeqStart.getNode()->getOperand(0),
5300                         Flags, DAG, dl);
5301   // The MEMCPY must go outside the CALLSEQ_START..END.
5302   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5303   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5304                                                  SDLoc(MemcpyCall));
5305   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5306                          NewCallSeqStart.getNode());
5307   return NewCallSeqStart;
5308 }
5309 
5310 SDValue PPCTargetLowering::LowerCall_64SVR4(
5311     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5312     bool isTailCall, bool isPatchPoint,
5313     const SmallVectorImpl<ISD::OutputArg> &Outs,
5314     const SmallVectorImpl<SDValue> &OutVals,
5315     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5316     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5317     ImmutableCallSite *CS) const {
5318   bool isELFv2ABI = Subtarget.isELFv2ABI();
5319   bool isLittleEndian = Subtarget.isLittleEndian();
5320   unsigned NumOps = Outs.size();
5321   bool hasNest = false;
5322   bool IsSibCall = false;
5323 
5324   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5325   unsigned PtrByteSize = 8;
5326 
5327   MachineFunction &MF = DAG.getMachineFunction();
5328 
5329   if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5330     IsSibCall = true;
5331 
5332   // Mark this function as potentially containing a function that contains a
5333   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5334   // and restoring the callers stack pointer in this functions epilog. This is
5335   // done because by tail calling the called function might overwrite the value
5336   // in this function's (MF) stack pointer stack slot 0(SP).
5337   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5338       CallConv == CallingConv::Fast)
5339     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5340 
5341   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5342          "fastcc not supported on varargs functions");
5343 
5344   // Count how many bytes are to be pushed on the stack, including the linkage
5345   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5346   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5347   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5348   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5349   unsigned NumBytes = LinkageSize;
5350   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5351   unsigned &QFPR_idx = FPR_idx;
5352 
5353   static const MCPhysReg GPR[] = {
5354     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5355     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5356   };
5357   static const MCPhysReg VR[] = {
5358     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5359     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5360   };
5361 
5362   const unsigned NumGPRs = array_lengthof(GPR);
5363   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5364   const unsigned NumVRs  = array_lengthof(VR);
5365   const unsigned NumQFPRs = NumFPRs;
5366 
5367   // On ELFv2, we can avoid allocating the parameter area if all the arguments
5368   // can be passed to the callee in registers.
5369   // For the fast calling convention, there is another check below.
5370   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5371   bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
5372   if (!HasParameterArea) {
5373     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5374     unsigned AvailableFPRs = NumFPRs;
5375     unsigned AvailableVRs = NumVRs;
5376     unsigned NumBytesTmp = NumBytes;
5377     for (unsigned i = 0; i != NumOps; ++i) {
5378       if (Outs[i].Flags.isNest()) continue;
5379       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5380                                 PtrByteSize, LinkageSize, ParamAreaSize,
5381                                 NumBytesTmp, AvailableFPRs, AvailableVRs,
5382                                 Subtarget.hasQPX()))
5383         HasParameterArea = true;
5384     }
5385   }
5386 
5387   // When using the fast calling convention, we don't provide backing for
5388   // arguments that will be in registers.
5389   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5390 
5391   // Add up all the space actually used.
5392   for (unsigned i = 0; i != NumOps; ++i) {
5393     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5394     EVT ArgVT = Outs[i].VT;
5395     EVT OrigVT = Outs[i].ArgVT;
5396 
5397     if (Flags.isNest())
5398       continue;
5399 
5400     if (CallConv == CallingConv::Fast) {
5401       if (Flags.isByVal())
5402         NumGPRsUsed += (Flags.getByValSize()+7)/8;
5403       else
5404         switch (ArgVT.getSimpleVT().SimpleTy) {
5405         default: llvm_unreachable("Unexpected ValueType for argument!");
5406         case MVT::i1:
5407         case MVT::i32:
5408         case MVT::i64:
5409           if (++NumGPRsUsed <= NumGPRs)
5410             continue;
5411           break;
5412         case MVT::v4i32:
5413         case MVT::v8i16:
5414         case MVT::v16i8:
5415         case MVT::v2f64:
5416         case MVT::v2i64:
5417         case MVT::v1i128:
5418           if (++NumVRsUsed <= NumVRs)
5419             continue;
5420           break;
5421         case MVT::v4f32:
5422           // When using QPX, this is handled like a FP register, otherwise, it
5423           // is an Altivec register.
5424           if (Subtarget.hasQPX()) {
5425             if (++NumFPRsUsed <= NumFPRs)
5426               continue;
5427           } else {
5428             if (++NumVRsUsed <= NumVRs)
5429               continue;
5430           }
5431           break;
5432         case MVT::f32:
5433         case MVT::f64:
5434         case MVT::v4f64: // QPX
5435         case MVT::v4i1:  // QPX
5436           if (++NumFPRsUsed <= NumFPRs)
5437             continue;
5438           break;
5439         }
5440     }
5441 
5442     /* Respect alignment of argument on the stack.  */
5443     unsigned Align =
5444       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5445     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5446 
5447     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5448     if (Flags.isInConsecutiveRegsLast())
5449       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5450   }
5451 
5452   unsigned NumBytesActuallyUsed = NumBytes;
5453 
5454   // In the old ELFv1 ABI,
5455   // the prolog code of the callee may store up to 8 GPR argument registers to
5456   // the stack, allowing va_start to index over them in memory if its varargs.
5457   // Because we cannot tell if this is needed on the caller side, we have to
5458   // conservatively assume that it is needed.  As such, make sure we have at
5459   // least enough stack space for the caller to store the 8 GPRs.
5460   // In the ELFv2 ABI, we allocate the parameter area iff a callee
5461   // really requires memory operands, e.g. a vararg function.
5462   if (HasParameterArea)
5463     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5464   else
5465     NumBytes = LinkageSize;
5466 
5467   // Tail call needs the stack to be aligned.
5468   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5469       CallConv == CallingConv::Fast)
5470     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5471 
5472   int SPDiff = 0;
5473 
5474   // Calculate by how many bytes the stack has to be adjusted in case of tail
5475   // call optimization.
5476   if (!IsSibCall)
5477     SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5478 
5479   // To protect arguments on the stack from being clobbered in a tail call,
5480   // force all the loads to happen before doing any other lowering.
5481   if (isTailCall)
5482     Chain = DAG.getStackArgumentTokenFactor(Chain);
5483 
5484   // Adjust the stack pointer for the new arguments...
5485   // These operations are automatically eliminated by the prolog/epilog pass
5486   if (!IsSibCall)
5487     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5488   SDValue CallSeqStart = Chain;
5489 
5490   // Load the return address and frame pointer so it can be move somewhere else
5491   // later.
5492   SDValue LROp, FPOp;
5493   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5494 
5495   // Set up a copy of the stack pointer for use loading and storing any
5496   // arguments that may not fit in the registers available for argument
5497   // passing.
5498   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5499 
5500   // Figure out which arguments are going to go in registers, and which in
5501   // memory.  Also, if this is a vararg function, floating point operations
5502   // must be stored to our stack, and loaded into integer regs as well, if
5503   // any integer regs are available for argument passing.
5504   unsigned ArgOffset = LinkageSize;
5505 
5506   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5507   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5508 
5509   SmallVector<SDValue, 8> MemOpChains;
5510   for (unsigned i = 0; i != NumOps; ++i) {
5511     SDValue Arg = OutVals[i];
5512     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5513     EVT ArgVT = Outs[i].VT;
5514     EVT OrigVT = Outs[i].ArgVT;
5515 
5516     // PtrOff will be used to store the current argument to the stack if a
5517     // register cannot be found for it.
5518     SDValue PtrOff;
5519 
5520     // We re-align the argument offset for each argument, except when using the
5521     // fast calling convention, when we need to make sure we do that only when
5522     // we'll actually use a stack slot.
5523     auto ComputePtrOff = [&]() {
5524       /* Respect alignment of argument on the stack.  */
5525       unsigned Align =
5526         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5527       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5528 
5529       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5530 
5531       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5532     };
5533 
5534     if (CallConv != CallingConv::Fast) {
5535       ComputePtrOff();
5536 
5537       /* Compute GPR index associated with argument offset.  */
5538       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5539       GPR_idx = std::min(GPR_idx, NumGPRs);
5540     }
5541 
5542     // Promote integers to 64-bit values.
5543     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5544       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5545       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5546       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5547     }
5548 
5549     // FIXME memcpy is used way more than necessary.  Correctness first.
5550     // Note: "by value" is code for passing a structure by value, not
5551     // basic types.
5552     if (Flags.isByVal()) {
5553       // Note: Size includes alignment padding, so
5554       //   struct x { short a; char b; }
5555       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5556       // These are the proper values we need for right-justifying the
5557       // aggregate in a parameter register.
5558       unsigned Size = Flags.getByValSize();
5559 
5560       // An empty aggregate parameter takes up no storage and no
5561       // registers.
5562       if (Size == 0)
5563         continue;
5564 
5565       if (CallConv == CallingConv::Fast)
5566         ComputePtrOff();
5567 
5568       // All aggregates smaller than 8 bytes must be passed right-justified.
5569       if (Size==1 || Size==2 || Size==4) {
5570         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5571         if (GPR_idx != NumGPRs) {
5572           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5573                                         MachinePointerInfo(), VT);
5574           MemOpChains.push_back(Load.getValue(1));
5575           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5576 
5577           ArgOffset += PtrByteSize;
5578           continue;
5579         }
5580       }
5581 
5582       if (GPR_idx == NumGPRs && Size < 8) {
5583         SDValue AddPtr = PtrOff;
5584         if (!isLittleEndian) {
5585           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5586                                           PtrOff.getValueType());
5587           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5588         }
5589         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5590                                                           CallSeqStart,
5591                                                           Flags, DAG, dl);
5592         ArgOffset += PtrByteSize;
5593         continue;
5594       }
5595       // Copy entire object into memory.  There are cases where gcc-generated
5596       // code assumes it is there, even if it could be put entirely into
5597       // registers.  (This is not what the doc says.)
5598 
5599       // FIXME: The above statement is likely due to a misunderstanding of the
5600       // documents.  All arguments must be copied into the parameter area BY
5601       // THE CALLEE in the event that the callee takes the address of any
5602       // formal argument.  That has not yet been implemented.  However, it is
5603       // reasonable to use the stack area as a staging area for the register
5604       // load.
5605 
5606       // Skip this for small aggregates, as we will use the same slot for a
5607       // right-justified copy, below.
5608       if (Size >= 8)
5609         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5610                                                           CallSeqStart,
5611                                                           Flags, DAG, dl);
5612 
5613       // When a register is available, pass a small aggregate right-justified.
5614       if (Size < 8 && GPR_idx != NumGPRs) {
5615         // The easiest way to get this right-justified in a register
5616         // is to copy the structure into the rightmost portion of a
5617         // local variable slot, then load the whole slot into the
5618         // register.
5619         // FIXME: The memcpy seems to produce pretty awful code for
5620         // small aggregates, particularly for packed ones.
5621         // FIXME: It would be preferable to use the slot in the
5622         // parameter save area instead of a new local variable.
5623         SDValue AddPtr = PtrOff;
5624         if (!isLittleEndian) {
5625           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5626           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5627         }
5628         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5629                                                           CallSeqStart,
5630                                                           Flags, DAG, dl);
5631 
5632         // Load the slot into the register.
5633         SDValue Load =
5634             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5635         MemOpChains.push_back(Load.getValue(1));
5636         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5637 
5638         // Done with this argument.
5639         ArgOffset += PtrByteSize;
5640         continue;
5641       }
5642 
5643       // For aggregates larger than PtrByteSize, copy the pieces of the
5644       // object that fit into registers from the parameter save area.
5645       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5646         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5647         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5648         if (GPR_idx != NumGPRs) {
5649           SDValue Load =
5650               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5651           MemOpChains.push_back(Load.getValue(1));
5652           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5653           ArgOffset += PtrByteSize;
5654         } else {
5655           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5656           break;
5657         }
5658       }
5659       continue;
5660     }
5661 
5662     switch (Arg.getSimpleValueType().SimpleTy) {
5663     default: llvm_unreachable("Unexpected ValueType for argument!");
5664     case MVT::i1:
5665     case MVT::i32:
5666     case MVT::i64:
5667       if (Flags.isNest()) {
5668         // The 'nest' parameter, if any, is passed in R11.
5669         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5670         hasNest = true;
5671         break;
5672       }
5673 
5674       // These can be scalar arguments or elements of an integer array type
5675       // passed directly.  Clang may use those instead of "byval" aggregate
5676       // types to avoid forcing arguments to memory unnecessarily.
5677       if (GPR_idx != NumGPRs) {
5678         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5679       } else {
5680         if (CallConv == CallingConv::Fast)
5681           ComputePtrOff();
5682 
5683         assert(HasParameterArea &&
5684                "Parameter area must exist to pass an argument in memory.");
5685         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5686                          true, isTailCall, false, MemOpChains,
5687                          TailCallArguments, dl);
5688         if (CallConv == CallingConv::Fast)
5689           ArgOffset += PtrByteSize;
5690       }
5691       if (CallConv != CallingConv::Fast)
5692         ArgOffset += PtrByteSize;
5693       break;
5694     case MVT::f32:
5695     case MVT::f64: {
5696       // These can be scalar arguments or elements of a float array type
5697       // passed directly.  The latter are used to implement ELFv2 homogenous
5698       // float aggregates.
5699 
5700       // Named arguments go into FPRs first, and once they overflow, the
5701       // remaining arguments go into GPRs and then the parameter save area.
5702       // Unnamed arguments for vararg functions always go to GPRs and
5703       // then the parameter save area.  For now, put all arguments to vararg
5704       // routines always in both locations (FPR *and* GPR or stack slot).
5705       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5706       bool NeededLoad = false;
5707 
5708       // First load the argument into the next available FPR.
5709       if (FPR_idx != NumFPRs)
5710         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5711 
5712       // Next, load the argument into GPR or stack slot if needed.
5713       if (!NeedGPROrStack)
5714         ;
5715       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
5716         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5717         // once we support fp <-> gpr moves.
5718 
5719         // In the non-vararg case, this can only ever happen in the
5720         // presence of f32 array types, since otherwise we never run
5721         // out of FPRs before running out of GPRs.
5722         SDValue ArgVal;
5723 
5724         // Double values are always passed in a single GPR.
5725         if (Arg.getValueType() != MVT::f32) {
5726           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
5727 
5728         // Non-array float values are extended and passed in a GPR.
5729         } else if (!Flags.isInConsecutiveRegs()) {
5730           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5731           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5732 
5733         // If we have an array of floats, we collect every odd element
5734         // together with its predecessor into one GPR.
5735         } else if (ArgOffset % PtrByteSize != 0) {
5736           SDValue Lo, Hi;
5737           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5738           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5739           if (!isLittleEndian)
5740             std::swap(Lo, Hi);
5741           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5742 
5743         // The final element, if even, goes into the first half of a GPR.
5744         } else if (Flags.isInConsecutiveRegsLast()) {
5745           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5746           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5747           if (!isLittleEndian)
5748             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5749                                  DAG.getConstant(32, dl, MVT::i32));
5750 
5751         // Non-final even elements are skipped; they will be handled
5752         // together the with subsequent argument on the next go-around.
5753         } else
5754           ArgVal = SDValue();
5755 
5756         if (ArgVal.getNode())
5757           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5758       } else {
5759         if (CallConv == CallingConv::Fast)
5760           ComputePtrOff();
5761 
5762         // Single-precision floating-point values are mapped to the
5763         // second (rightmost) word of the stack doubleword.
5764         if (Arg.getValueType() == MVT::f32 &&
5765             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
5766           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5767           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5768         }
5769 
5770         assert(HasParameterArea &&
5771                "Parameter area must exist to pass an argument in memory.");
5772         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5773                          true, isTailCall, false, MemOpChains,
5774                          TailCallArguments, dl);
5775 
5776         NeededLoad = true;
5777       }
5778       // When passing an array of floats, the array occupies consecutive
5779       // space in the argument area; only round up to the next doubleword
5780       // at the end of the array.  Otherwise, each float takes 8 bytes.
5781       if (CallConv != CallingConv::Fast || NeededLoad) {
5782         ArgOffset += (Arg.getValueType() == MVT::f32 &&
5783                       Flags.isInConsecutiveRegs()) ? 4 : 8;
5784         if (Flags.isInConsecutiveRegsLast())
5785           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5786       }
5787       break;
5788     }
5789     case MVT::v4f32:
5790     case MVT::v4i32:
5791     case MVT::v8i16:
5792     case MVT::v16i8:
5793     case MVT::v2f64:
5794     case MVT::v2i64:
5795     case MVT::v1i128:
5796       if (!Subtarget.hasQPX()) {
5797       // These can be scalar arguments or elements of a vector array type
5798       // passed directly.  The latter are used to implement ELFv2 homogenous
5799       // vector aggregates.
5800 
5801       // For a varargs call, named arguments go into VRs or on the stack as
5802       // usual; unnamed arguments always go to the stack or the corresponding
5803       // GPRs when within range.  For now, we always put the value in both
5804       // locations (or even all three).
5805       if (isVarArg) {
5806         assert(HasParameterArea &&
5807                "Parameter area must exist if we have a varargs call.");
5808         // We could elide this store in the case where the object fits
5809         // entirely in R registers.  Maybe later.
5810         SDValue Store =
5811             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5812         MemOpChains.push_back(Store);
5813         if (VR_idx != NumVRs) {
5814           SDValue Load =
5815               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
5816           MemOpChains.push_back(Load.getValue(1));
5817           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5818         }
5819         ArgOffset += 16;
5820         for (unsigned i=0; i<16; i+=PtrByteSize) {
5821           if (GPR_idx == NumGPRs)
5822             break;
5823           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5824                                    DAG.getConstant(i, dl, PtrVT));
5825           SDValue Load =
5826               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5827           MemOpChains.push_back(Load.getValue(1));
5828           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5829         }
5830         break;
5831       }
5832 
5833       // Non-varargs Altivec params go into VRs or on the stack.
5834       if (VR_idx != NumVRs) {
5835         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5836       } else {
5837         if (CallConv == CallingConv::Fast)
5838           ComputePtrOff();
5839 
5840         assert(HasParameterArea &&
5841                "Parameter area must exist to pass an argument in memory.");
5842         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5843                          true, isTailCall, true, MemOpChains,
5844                          TailCallArguments, dl);
5845         if (CallConv == CallingConv::Fast)
5846           ArgOffset += 16;
5847       }
5848 
5849       if (CallConv != CallingConv::Fast)
5850         ArgOffset += 16;
5851       break;
5852       } // not QPX
5853 
5854       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5855              "Invalid QPX parameter type");
5856 
5857       /* fall through */
5858     case MVT::v4f64:
5859     case MVT::v4i1: {
5860       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5861       if (isVarArg) {
5862         assert(HasParameterArea &&
5863                "Parameter area must exist if we have a varargs call.");
5864         // We could elide this store in the case where the object fits
5865         // entirely in R registers.  Maybe later.
5866         SDValue Store =
5867             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5868         MemOpChains.push_back(Store);
5869         if (QFPR_idx != NumQFPRs) {
5870           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
5871                                      PtrOff, MachinePointerInfo());
5872           MemOpChains.push_back(Load.getValue(1));
5873           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5874         }
5875         ArgOffset += (IsF32 ? 16 : 32);
5876         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5877           if (GPR_idx == NumGPRs)
5878             break;
5879           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5880                                    DAG.getConstant(i, dl, PtrVT));
5881           SDValue Load =
5882               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5883           MemOpChains.push_back(Load.getValue(1));
5884           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5885         }
5886         break;
5887       }
5888 
5889       // Non-varargs QPX params go into registers or on the stack.
5890       if (QFPR_idx != NumQFPRs) {
5891         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5892       } else {
5893         if (CallConv == CallingConv::Fast)
5894           ComputePtrOff();
5895 
5896         assert(HasParameterArea &&
5897                "Parameter area must exist to pass an argument in memory.");
5898         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5899                          true, isTailCall, true, MemOpChains,
5900                          TailCallArguments, dl);
5901         if (CallConv == CallingConv::Fast)
5902           ArgOffset += (IsF32 ? 16 : 32);
5903       }
5904 
5905       if (CallConv != CallingConv::Fast)
5906         ArgOffset += (IsF32 ? 16 : 32);
5907       break;
5908       }
5909     }
5910   }
5911 
5912   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
5913          "mismatch in size of parameter area");
5914   (void)NumBytesActuallyUsed;
5915 
5916   if (!MemOpChains.empty())
5917     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5918 
5919   // Check if this is an indirect call (MTCTR/BCTRL).
5920   // See PrepareCall() for more information about calls through function
5921   // pointers in the 64-bit SVR4 ABI.
5922   if (!isTailCall && !isPatchPoint &&
5923       !isFunctionGlobalAddress(Callee) &&
5924       !isa<ExternalSymbolSDNode>(Callee)) {
5925     // Load r2 into a virtual register and store it to the TOC save area.
5926     setUsesTOCBasePtr(DAG);
5927     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5928     // TOC save area offset.
5929     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5930     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5931     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5932     Chain = DAG.getStore(
5933         Val.getValue(1), dl, Val, AddPtr,
5934         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
5935     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5936     // This does not mean the MTCTR instruction must use R12; it's easier
5937     // to model this as an extra parameter, so do that.
5938     if (isELFv2ABI && !isPatchPoint)
5939       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5940   }
5941 
5942   // Build a sequence of copy-to-reg nodes chained together with token chain
5943   // and flag operands which copy the outgoing args into the appropriate regs.
5944   SDValue InFlag;
5945   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5946     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5947                              RegsToPass[i].second, InFlag);
5948     InFlag = Chain.getValue(1);
5949   }
5950 
5951   if (isTailCall && !IsSibCall)
5952     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5953                     TailCallArguments);
5954 
5955   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
5956                     DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
5957                     SPDiff, NumBytes, Ins, InVals, CS);
5958 }
5959 
5960 SDValue PPCTargetLowering::LowerCall_Darwin(
5961     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5962     bool isTailCall, bool isPatchPoint,
5963     const SmallVectorImpl<ISD::OutputArg> &Outs,
5964     const SmallVectorImpl<SDValue> &OutVals,
5965     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5966     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5967     ImmutableCallSite *CS) const {
5968   unsigned NumOps = Outs.size();
5969 
5970   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5971   bool isPPC64 = PtrVT == MVT::i64;
5972   unsigned PtrByteSize = isPPC64 ? 8 : 4;
5973 
5974   MachineFunction &MF = DAG.getMachineFunction();
5975 
5976   // Mark this function as potentially containing a function that contains a
5977   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5978   // and restoring the callers stack pointer in this functions epilog. This is
5979   // done because by tail calling the called function might overwrite the value
5980   // in this function's (MF) stack pointer stack slot 0(SP).
5981   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5982       CallConv == CallingConv::Fast)
5983     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5984 
5985   // Count how many bytes are to be pushed on the stack, including the linkage
5986   // area, and parameter passing area.  We start with 24/48 bytes, which is
5987   // prereserved space for [SP][CR][LR][3 x unused].
5988   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5989   unsigned NumBytes = LinkageSize;
5990 
5991   // Add up all the space actually used.
5992   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
5993   // they all go in registers, but we must reserve stack space for them for
5994   // possible use by the caller.  In varargs or 64-bit calls, parameters are
5995   // assigned stack space in order, with padding so Altivec parameters are
5996   // 16-byte aligned.
5997   unsigned nAltivecParamsAtEnd = 0;
5998   for (unsigned i = 0; i != NumOps; ++i) {
5999     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6000     EVT ArgVT = Outs[i].VT;
6001     // Varargs Altivec parameters are padded to a 16 byte boundary.
6002     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6003         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6004         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6005       if (!isVarArg && !isPPC64) {
6006         // Non-varargs Altivec parameters go after all the non-Altivec
6007         // parameters; handle those later so we know how much padding we need.
6008         nAltivecParamsAtEnd++;
6009         continue;
6010       }
6011       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6012       NumBytes = ((NumBytes+15)/16)*16;
6013     }
6014     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6015   }
6016 
6017   // Allow for Altivec parameters at the end, if needed.
6018   if (nAltivecParamsAtEnd) {
6019     NumBytes = ((NumBytes+15)/16)*16;
6020     NumBytes += 16*nAltivecParamsAtEnd;
6021   }
6022 
6023   // The prolog code of the callee may store up to 8 GPR argument registers to
6024   // the stack, allowing va_start to index over them in memory if its varargs.
6025   // Because we cannot tell if this is needed on the caller side, we have to
6026   // conservatively assume that it is needed.  As such, make sure we have at
6027   // least enough stack space for the caller to store the 8 GPRs.
6028   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6029 
6030   // Tail call needs the stack to be aligned.
6031   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6032       CallConv == CallingConv::Fast)
6033     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6034 
6035   // Calculate by how many bytes the stack has to be adjusted in case of tail
6036   // call optimization.
6037   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
6038 
6039   // To protect arguments on the stack from being clobbered in a tail call,
6040   // force all the loads to happen before doing any other lowering.
6041   if (isTailCall)
6042     Chain = DAG.getStackArgumentTokenFactor(Chain);
6043 
6044   // Adjust the stack pointer for the new arguments...
6045   // These operations are automatically eliminated by the prolog/epilog pass
6046   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6047   SDValue CallSeqStart = Chain;
6048 
6049   // Load the return address and frame pointer so it can be move somewhere else
6050   // later.
6051   SDValue LROp, FPOp;
6052   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6053 
6054   // Set up a copy of the stack pointer for use loading and storing any
6055   // arguments that may not fit in the registers available for argument
6056   // passing.
6057   SDValue StackPtr;
6058   if (isPPC64)
6059     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6060   else
6061     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6062 
6063   // Figure out which arguments are going to go in registers, and which in
6064   // memory.  Also, if this is a vararg function, floating point operations
6065   // must be stored to our stack, and loaded into integer regs as well, if
6066   // any integer regs are available for argument passing.
6067   unsigned ArgOffset = LinkageSize;
6068   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6069 
6070   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6071     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6072     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6073   };
6074   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6075     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6076     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6077   };
6078   static const MCPhysReg VR[] = {
6079     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6080     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6081   };
6082   const unsigned NumGPRs = array_lengthof(GPR_32);
6083   const unsigned NumFPRs = 13;
6084   const unsigned NumVRs  = array_lengthof(VR);
6085 
6086   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6087 
6088   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6089   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6090 
6091   SmallVector<SDValue, 8> MemOpChains;
6092   for (unsigned i = 0; i != NumOps; ++i) {
6093     SDValue Arg = OutVals[i];
6094     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6095 
6096     // PtrOff will be used to store the current argument to the stack if a
6097     // register cannot be found for it.
6098     SDValue PtrOff;
6099 
6100     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6101 
6102     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6103 
6104     // On PPC64, promote integers to 64-bit values.
6105     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6106       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6107       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6108       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6109     }
6110 
6111     // FIXME memcpy is used way more than necessary.  Correctness first.
6112     // Note: "by value" is code for passing a structure by value, not
6113     // basic types.
6114     if (Flags.isByVal()) {
6115       unsigned Size = Flags.getByValSize();
6116       // Very small objects are passed right-justified.  Everything else is
6117       // passed left-justified.
6118       if (Size==1 || Size==2) {
6119         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6120         if (GPR_idx != NumGPRs) {
6121           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6122                                         MachinePointerInfo(), VT);
6123           MemOpChains.push_back(Load.getValue(1));
6124           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6125 
6126           ArgOffset += PtrByteSize;
6127         } else {
6128           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6129                                           PtrOff.getValueType());
6130           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6131           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6132                                                             CallSeqStart,
6133                                                             Flags, DAG, dl);
6134           ArgOffset += PtrByteSize;
6135         }
6136         continue;
6137       }
6138       // Copy entire object into memory.  There are cases where gcc-generated
6139       // code assumes it is there, even if it could be put entirely into
6140       // registers.  (This is not what the doc says.)
6141       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6142                                                         CallSeqStart,
6143                                                         Flags, DAG, dl);
6144 
6145       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6146       // copy the pieces of the object that fit into registers from the
6147       // parameter save area.
6148       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6149         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6150         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6151         if (GPR_idx != NumGPRs) {
6152           SDValue Load =
6153               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6154           MemOpChains.push_back(Load.getValue(1));
6155           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6156           ArgOffset += PtrByteSize;
6157         } else {
6158           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6159           break;
6160         }
6161       }
6162       continue;
6163     }
6164 
6165     switch (Arg.getSimpleValueType().SimpleTy) {
6166     default: llvm_unreachable("Unexpected ValueType for argument!");
6167     case MVT::i1:
6168     case MVT::i32:
6169     case MVT::i64:
6170       if (GPR_idx != NumGPRs) {
6171         if (Arg.getValueType() == MVT::i1)
6172           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6173 
6174         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6175       } else {
6176         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6177                          isPPC64, isTailCall, false, MemOpChains,
6178                          TailCallArguments, dl);
6179       }
6180       ArgOffset += PtrByteSize;
6181       break;
6182     case MVT::f32:
6183     case MVT::f64:
6184       if (FPR_idx != NumFPRs) {
6185         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6186 
6187         if (isVarArg) {
6188           SDValue Store =
6189               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6190           MemOpChains.push_back(Store);
6191 
6192           // Float varargs are always shadowed in available integer registers
6193           if (GPR_idx != NumGPRs) {
6194             SDValue Load =
6195                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6196             MemOpChains.push_back(Load.getValue(1));
6197             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6198           }
6199           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6200             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6201             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6202             SDValue Load =
6203                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6204             MemOpChains.push_back(Load.getValue(1));
6205             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6206           }
6207         } else {
6208           // If we have any FPRs remaining, we may also have GPRs remaining.
6209           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6210           // GPRs.
6211           if (GPR_idx != NumGPRs)
6212             ++GPR_idx;
6213           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6214               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6215             ++GPR_idx;
6216         }
6217       } else
6218         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6219                          isPPC64, isTailCall, false, MemOpChains,
6220                          TailCallArguments, dl);
6221       if (isPPC64)
6222         ArgOffset += 8;
6223       else
6224         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6225       break;
6226     case MVT::v4f32:
6227     case MVT::v4i32:
6228     case MVT::v8i16:
6229     case MVT::v16i8:
6230       if (isVarArg) {
6231         // These go aligned on the stack, or in the corresponding R registers
6232         // when within range.  The Darwin PPC ABI doc claims they also go in
6233         // V registers; in fact gcc does this only for arguments that are
6234         // prototyped, not for those that match the ...  We do it for all
6235         // arguments, seems to work.
6236         while (ArgOffset % 16 !=0) {
6237           ArgOffset += PtrByteSize;
6238           if (GPR_idx != NumGPRs)
6239             GPR_idx++;
6240         }
6241         // We could elide this store in the case where the object fits
6242         // entirely in R registers.  Maybe later.
6243         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6244                              DAG.getConstant(ArgOffset, dl, PtrVT));
6245         SDValue Store =
6246             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6247         MemOpChains.push_back(Store);
6248         if (VR_idx != NumVRs) {
6249           SDValue Load =
6250               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6251           MemOpChains.push_back(Load.getValue(1));
6252           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6253         }
6254         ArgOffset += 16;
6255         for (unsigned i=0; i<16; i+=PtrByteSize) {
6256           if (GPR_idx == NumGPRs)
6257             break;
6258           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6259                                    DAG.getConstant(i, dl, PtrVT));
6260           SDValue Load =
6261               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6262           MemOpChains.push_back(Load.getValue(1));
6263           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6264         }
6265         break;
6266       }
6267 
6268       // Non-varargs Altivec params generally go in registers, but have
6269       // stack space allocated at the end.
6270       if (VR_idx != NumVRs) {
6271         // Doesn't have GPR space allocated.
6272         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6273       } else if (nAltivecParamsAtEnd==0) {
6274         // We are emitting Altivec params in order.
6275         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6276                          isPPC64, isTailCall, true, MemOpChains,
6277                          TailCallArguments, dl);
6278         ArgOffset += 16;
6279       }
6280       break;
6281     }
6282   }
6283   // If all Altivec parameters fit in registers, as they usually do,
6284   // they get stack space following the non-Altivec parameters.  We
6285   // don't track this here because nobody below needs it.
6286   // If there are more Altivec parameters than fit in registers emit
6287   // the stores here.
6288   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
6289     unsigned j = 0;
6290     // Offset is aligned; skip 1st 12 params which go in V registers.
6291     ArgOffset = ((ArgOffset+15)/16)*16;
6292     ArgOffset += 12*16;
6293     for (unsigned i = 0; i != NumOps; ++i) {
6294       SDValue Arg = OutVals[i];
6295       EVT ArgType = Outs[i].VT;
6296       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6297           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6298         if (++j > NumVRs) {
6299           SDValue PtrOff;
6300           // We are emitting Altivec params in order.
6301           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6302                            isPPC64, isTailCall, true, MemOpChains,
6303                            TailCallArguments, dl);
6304           ArgOffset += 16;
6305         }
6306       }
6307     }
6308   }
6309 
6310   if (!MemOpChains.empty())
6311     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6312 
6313   // On Darwin, R12 must contain the address of an indirect callee.  This does
6314   // not mean the MTCTR instruction must use R12; it's easier to model this as
6315   // an extra parameter, so do that.
6316   if (!isTailCall &&
6317       !isFunctionGlobalAddress(Callee) &&
6318       !isa<ExternalSymbolSDNode>(Callee) &&
6319       !isBLACompatibleAddress(Callee, DAG))
6320     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6321                                                    PPC::R12), Callee));
6322 
6323   // Build a sequence of copy-to-reg nodes chained together with token chain
6324   // and flag operands which copy the outgoing args into the appropriate regs.
6325   SDValue InFlag;
6326   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6327     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6328                              RegsToPass[i].second, InFlag);
6329     InFlag = Chain.getValue(1);
6330   }
6331 
6332   if (isTailCall)
6333     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6334                     TailCallArguments);
6335 
6336   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6337                     /* unused except on PPC64 ELFv1 */ false, DAG,
6338                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6339                     NumBytes, Ins, InVals, CS);
6340 }
6341 
6342 bool
6343 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6344                                   MachineFunction &MF, bool isVarArg,
6345                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
6346                                   LLVMContext &Context) const {
6347   SmallVector<CCValAssign, 16> RVLocs;
6348   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6349   return CCInfo.CheckReturn(Outs, RetCC_PPC);
6350 }
6351 
6352 SDValue
6353 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6354                                bool isVarArg,
6355                                const SmallVectorImpl<ISD::OutputArg> &Outs,
6356                                const SmallVectorImpl<SDValue> &OutVals,
6357                                const SDLoc &dl, SelectionDAG &DAG) const {
6358   SmallVector<CCValAssign, 16> RVLocs;
6359   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6360                  *DAG.getContext());
6361   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
6362 
6363   SDValue Flag;
6364   SmallVector<SDValue, 4> RetOps(1, Chain);
6365 
6366   // Copy the result values into the output registers.
6367   for (unsigned i = 0; i != RVLocs.size(); ++i) {
6368     CCValAssign &VA = RVLocs[i];
6369     assert(VA.isRegLoc() && "Can only return in registers!");
6370 
6371     SDValue Arg = OutVals[i];
6372 
6373     switch (VA.getLocInfo()) {
6374     default: llvm_unreachable("Unknown loc info!");
6375     case CCValAssign::Full: break;
6376     case CCValAssign::AExt:
6377       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6378       break;
6379     case CCValAssign::ZExt:
6380       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6381       break;
6382     case CCValAssign::SExt:
6383       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6384       break;
6385     }
6386 
6387     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6388     Flag = Chain.getValue(1);
6389     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6390   }
6391 
6392   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6393   const MCPhysReg *I =
6394     TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6395   if (I) {
6396     for (; *I; ++I) {
6397 
6398       if (PPC::G8RCRegClass.contains(*I))
6399         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6400       else if (PPC::F8RCRegClass.contains(*I))
6401         RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6402       else if (PPC::CRRCRegClass.contains(*I))
6403         RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6404       else if (PPC::VRRCRegClass.contains(*I))
6405         RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6406       else
6407         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6408     }
6409   }
6410 
6411   RetOps[0] = Chain;  // Update chain.
6412 
6413   // Add the flag if we have it.
6414   if (Flag.getNode())
6415     RetOps.push_back(Flag);
6416 
6417   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6418 }
6419 
6420 SDValue
6421 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6422                                                 SelectionDAG &DAG) const {
6423   SDLoc dl(Op);
6424 
6425   // Get the corect type for integers.
6426   EVT IntVT = Op.getValueType();
6427 
6428   // Get the inputs.
6429   SDValue Chain = Op.getOperand(0);
6430   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6431   // Build a DYNAREAOFFSET node.
6432   SDValue Ops[2] = {Chain, FPSIdx};
6433   SDVTList VTs = DAG.getVTList(IntVT);
6434   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6435 }
6436 
6437 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6438                                              SelectionDAG &DAG) const {
6439   // When we pop the dynamic allocation we need to restore the SP link.
6440   SDLoc dl(Op);
6441 
6442   // Get the corect type for pointers.
6443   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6444 
6445   // Construct the stack pointer operand.
6446   bool isPPC64 = Subtarget.isPPC64();
6447   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6448   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6449 
6450   // Get the operands for the STACKRESTORE.
6451   SDValue Chain = Op.getOperand(0);
6452   SDValue SaveSP = Op.getOperand(1);
6453 
6454   // Load the old link SP.
6455   SDValue LoadLinkSP =
6456       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6457 
6458   // Restore the stack pointer.
6459   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6460 
6461   // Store the old link SP.
6462   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6463 }
6464 
6465 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6466   MachineFunction &MF = DAG.getMachineFunction();
6467   bool isPPC64 = Subtarget.isPPC64();
6468   EVT PtrVT = getPointerTy(MF.getDataLayout());
6469 
6470   // Get current frame pointer save index.  The users of this index will be
6471   // primarily DYNALLOC instructions.
6472   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6473   int RASI = FI->getReturnAddrSaveIndex();
6474 
6475   // If the frame pointer save index hasn't been defined yet.
6476   if (!RASI) {
6477     // Find out what the fix offset of the frame pointer save area.
6478     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6479     // Allocate the frame index for frame pointer save area.
6480     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6481     // Save the result.
6482     FI->setReturnAddrSaveIndex(RASI);
6483   }
6484   return DAG.getFrameIndex(RASI, PtrVT);
6485 }
6486 
6487 SDValue
6488 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6489   MachineFunction &MF = DAG.getMachineFunction();
6490   bool isPPC64 = Subtarget.isPPC64();
6491   EVT PtrVT = getPointerTy(MF.getDataLayout());
6492 
6493   // Get current frame pointer save index.  The users of this index will be
6494   // primarily DYNALLOC instructions.
6495   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6496   int FPSI = FI->getFramePointerSaveIndex();
6497 
6498   // If the frame pointer save index hasn't been defined yet.
6499   if (!FPSI) {
6500     // Find out what the fix offset of the frame pointer save area.
6501     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6502     // Allocate the frame index for frame pointer save area.
6503     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
6504     // Save the result.
6505     FI->setFramePointerSaveIndex(FPSI);
6506   }
6507   return DAG.getFrameIndex(FPSI, PtrVT);
6508 }
6509 
6510 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6511                                                    SelectionDAG &DAG) const {
6512   // Get the inputs.
6513   SDValue Chain = Op.getOperand(0);
6514   SDValue Size  = Op.getOperand(1);
6515   SDLoc dl(Op);
6516 
6517   // Get the corect type for pointers.
6518   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6519   // Negate the size.
6520   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
6521                                 DAG.getConstant(0, dl, PtrVT), Size);
6522   // Construct a node for the frame pointer save index.
6523   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6524   // Build a DYNALLOC node.
6525   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
6526   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
6527   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
6528 }
6529 
6530 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
6531                                                      SelectionDAG &DAG) const {
6532   MachineFunction &MF = DAG.getMachineFunction();
6533 
6534   bool isPPC64 = Subtarget.isPPC64();
6535   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6536 
6537   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
6538   return DAG.getFrameIndex(FI, PtrVT);
6539 }
6540 
6541 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
6542                                                SelectionDAG &DAG) const {
6543   SDLoc DL(Op);
6544   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
6545                      DAG.getVTList(MVT::i32, MVT::Other),
6546                      Op.getOperand(0), Op.getOperand(1));
6547 }
6548 
6549 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
6550                                                 SelectionDAG &DAG) const {
6551   SDLoc DL(Op);
6552   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
6553                      Op.getOperand(0), Op.getOperand(1));
6554 }
6555 
6556 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
6557   if (Op.getValueType().isVector())
6558     return LowerVectorLoad(Op, DAG);
6559 
6560   assert(Op.getValueType() == MVT::i1 &&
6561          "Custom lowering only for i1 loads");
6562 
6563   // First, load 8 bits into 32 bits, then truncate to 1 bit.
6564 
6565   SDLoc dl(Op);
6566   LoadSDNode *LD = cast<LoadSDNode>(Op);
6567 
6568   SDValue Chain = LD->getChain();
6569   SDValue BasePtr = LD->getBasePtr();
6570   MachineMemOperand *MMO = LD->getMemOperand();
6571 
6572   SDValue NewLD =
6573       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
6574                      BasePtr, MVT::i8, MMO);
6575   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
6576 
6577   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
6578   return DAG.getMergeValues(Ops, dl);
6579 }
6580 
6581 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
6582   if (Op.getOperand(1).getValueType().isVector())
6583     return LowerVectorStore(Op, DAG);
6584 
6585   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
6586          "Custom lowering only for i1 stores");
6587 
6588   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
6589 
6590   SDLoc dl(Op);
6591   StoreSDNode *ST = cast<StoreSDNode>(Op);
6592 
6593   SDValue Chain = ST->getChain();
6594   SDValue BasePtr = ST->getBasePtr();
6595   SDValue Value = ST->getValue();
6596   MachineMemOperand *MMO = ST->getMemOperand();
6597 
6598   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
6599                       Value);
6600   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
6601 }
6602 
6603 // FIXME: Remove this once the ANDI glue bug is fixed:
6604 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
6605   assert(Op.getValueType() == MVT::i1 &&
6606          "Custom lowering only for i1 results");
6607 
6608   SDLoc DL(Op);
6609   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
6610                      Op.getOperand(0));
6611 }
6612 
6613 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
6614 /// possible.
6615 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
6616   // Not FP? Not a fsel.
6617   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
6618       !Op.getOperand(2).getValueType().isFloatingPoint())
6619     return Op;
6620 
6621   // We might be able to do better than this under some circumstances, but in
6622   // general, fsel-based lowering of select is a finite-math-only optimization.
6623   // For more information, see section F.3 of the 2.06 ISA specification.
6624   if (!DAG.getTarget().Options.NoInfsFPMath ||
6625       !DAG.getTarget().Options.NoNaNsFPMath)
6626     return Op;
6627   // TODO: Propagate flags from the select rather than global settings.
6628   SDNodeFlags Flags;
6629   Flags.setNoInfs(true);
6630   Flags.setNoNaNs(true);
6631 
6632   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
6633 
6634   EVT ResVT = Op.getValueType();
6635   EVT CmpVT = Op.getOperand(0).getValueType();
6636   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6637   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
6638   SDLoc dl(Op);
6639 
6640   // If the RHS of the comparison is a 0.0, we don't need to do the
6641   // subtraction at all.
6642   SDValue Sel1;
6643   if (isFloatingPointZero(RHS))
6644     switch (CC) {
6645     default: break;       // SETUO etc aren't handled by fsel.
6646     case ISD::SETNE:
6647       std::swap(TV, FV);
6648     case ISD::SETEQ:
6649       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6650         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6651       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6652       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6653         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6654       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6655                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6656     case ISD::SETULT:
6657     case ISD::SETLT:
6658       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6659     case ISD::SETOGE:
6660     case ISD::SETGE:
6661       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6662         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6663       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6664     case ISD::SETUGT:
6665     case ISD::SETGT:
6666       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6667     case ISD::SETOLE:
6668     case ISD::SETLE:
6669       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6670         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6671       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6672                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6673     }
6674 
6675   SDValue Cmp;
6676   switch (CC) {
6677   default: break;       // SETUO etc aren't handled by fsel.
6678   case ISD::SETNE:
6679     std::swap(TV, FV);
6680   case ISD::SETEQ:
6681     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6682     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6683       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6684     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6685     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6686       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6687     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6688                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6689   case ISD::SETULT:
6690   case ISD::SETLT:
6691     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6692     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6693       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6694     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6695   case ISD::SETOGE:
6696   case ISD::SETGE:
6697     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6698     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6699       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6700     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6701   case ISD::SETUGT:
6702   case ISD::SETGT:
6703     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6704     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6705       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6706     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6707   case ISD::SETOLE:
6708   case ISD::SETLE:
6709     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6710     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6711       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6712     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6713   }
6714   return Op;
6715 }
6716 
6717 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
6718                                                SelectionDAG &DAG,
6719                                                const SDLoc &dl) const {
6720   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6721   SDValue Src = Op.getOperand(0);
6722   if (Src.getValueType() == MVT::f32)
6723     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6724 
6725   SDValue Tmp;
6726   switch (Op.getSimpleValueType().SimpleTy) {
6727   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6728   case MVT::i32:
6729     Tmp = DAG.getNode(
6730         Op.getOpcode() == ISD::FP_TO_SINT
6731             ? PPCISD::FCTIWZ
6732             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6733         dl, MVT::f64, Src);
6734     break;
6735   case MVT::i64:
6736     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6737            "i64 FP_TO_UINT is supported only with FPCVT");
6738     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6739                                                         PPCISD::FCTIDUZ,
6740                       dl, MVT::f64, Src);
6741     break;
6742   }
6743 
6744   // Convert the FP value to an int value through memory.
6745   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
6746     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
6747   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
6748   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
6749   MachinePointerInfo MPI =
6750       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6751 
6752   // Emit a store to the stack slot.
6753   SDValue Chain;
6754   if (i32Stack) {
6755     MachineFunction &MF = DAG.getMachineFunction();
6756     MachineMemOperand *MMO =
6757       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
6758     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
6759     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
6760               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
6761   } else
6762     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
6763 
6764   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
6765   // add in a bias on big endian.
6766   if (Op.getValueType() == MVT::i32 && !i32Stack) {
6767     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
6768                         DAG.getConstant(4, dl, FIPtr.getValueType()));
6769     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
6770   }
6771 
6772   RLI.Chain = Chain;
6773   RLI.Ptr = FIPtr;
6774   RLI.MPI = MPI;
6775 }
6776 
6777 /// \brief Custom lowers floating point to integer conversions to use
6778 /// the direct move instructions available in ISA 2.07 to avoid the
6779 /// need for load/store combinations.
6780 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6781                                                     SelectionDAG &DAG,
6782                                                     const SDLoc &dl) const {
6783   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6784   SDValue Src = Op.getOperand(0);
6785 
6786   if (Src.getValueType() == MVT::f32)
6787     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6788 
6789   SDValue Tmp;
6790   switch (Op.getSimpleValueType().SimpleTy) {
6791   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6792   case MVT::i32:
6793     Tmp = DAG.getNode(
6794         Op.getOpcode() == ISD::FP_TO_SINT
6795             ? PPCISD::FCTIWZ
6796             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6797         dl, MVT::f64, Src);
6798     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6799     break;
6800   case MVT::i64:
6801     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6802            "i64 FP_TO_UINT is supported only with FPCVT");
6803     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6804                                                         PPCISD::FCTIDUZ,
6805                       dl, MVT::f64, Src);
6806     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6807     break;
6808   }
6809   return Tmp;
6810 }
6811 
6812 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6813                                           const SDLoc &dl) const {
6814   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
6815     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6816 
6817   ReuseLoadInfo RLI;
6818   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6819 
6820   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
6821                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
6822 }
6823 
6824 // We're trying to insert a regular store, S, and then a load, L. If the
6825 // incoming value, O, is a load, we might just be able to have our load use the
6826 // address used by O. However, we don't know if anything else will store to
6827 // that address before we can load from it. To prevent this situation, we need
6828 // to insert our load, L, into the chain as a peer of O. To do this, we give L
6829 // the same chain operand as O, we create a token factor from the chain results
6830 // of O and L, and we replace all uses of O's chain result with that token
6831 // factor (see spliceIntoChain below for this last part).
6832 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6833                                             ReuseLoadInfo &RLI,
6834                                             SelectionDAG &DAG,
6835                                             ISD::LoadExtType ET) const {
6836   SDLoc dl(Op);
6837   if (ET == ISD::NON_EXTLOAD &&
6838       (Op.getOpcode() == ISD::FP_TO_UINT ||
6839        Op.getOpcode() == ISD::FP_TO_SINT) &&
6840       isOperationLegalOrCustom(Op.getOpcode(),
6841                                Op.getOperand(0).getValueType())) {
6842 
6843     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6844     return true;
6845   }
6846 
6847   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6848   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
6849       LD->isNonTemporal())
6850     return false;
6851   if (LD->getMemoryVT() != MemVT)
6852     return false;
6853 
6854   RLI.Ptr = LD->getBasePtr();
6855   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
6856     assert(LD->getAddressingMode() == ISD::PRE_INC &&
6857            "Non-pre-inc AM on PPC?");
6858     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6859                           LD->getOffset());
6860   }
6861 
6862   RLI.Chain = LD->getChain();
6863   RLI.MPI = LD->getPointerInfo();
6864   RLI.IsDereferenceable = LD->isDereferenceable();
6865   RLI.IsInvariant = LD->isInvariant();
6866   RLI.Alignment = LD->getAlignment();
6867   RLI.AAInfo = LD->getAAInfo();
6868   RLI.Ranges = LD->getRanges();
6869 
6870   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
6871   return true;
6872 }
6873 
6874 // Given the head of the old chain, ResChain, insert a token factor containing
6875 // it and NewResChain, and make users of ResChain now be users of that token
6876 // factor.
6877 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
6878 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6879                                         SDValue NewResChain,
6880                                         SelectionDAG &DAG) const {
6881   if (!ResChain)
6882     return;
6883 
6884   SDLoc dl(NewResChain);
6885 
6886   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
6887                            NewResChain, DAG.getUNDEF(MVT::Other));
6888   assert(TF.getNode() != NewResChain.getNode() &&
6889          "A new TF really is required here");
6890 
6891   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6892   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6893 }
6894 
6895 /// \brief Analyze profitability of direct move
6896 /// prefer float load to int load plus direct move
6897 /// when there is no integer use of int load
6898 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
6899   SDNode *Origin = Op.getOperand(0).getNode();
6900   if (Origin->getOpcode() != ISD::LOAD)
6901     return true;
6902 
6903   // If there is no LXSIBZX/LXSIHZX, like Power8,
6904   // prefer direct move if the memory size is 1 or 2 bytes.
6905   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
6906   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
6907     return true;
6908 
6909   for (SDNode::use_iterator UI = Origin->use_begin(),
6910                             UE = Origin->use_end();
6911        UI != UE; ++UI) {
6912 
6913     // Only look at the users of the loaded value.
6914     if (UI.getUse().get().getResNo() != 0)
6915       continue;
6916 
6917     if (UI->getOpcode() != ISD::SINT_TO_FP &&
6918         UI->getOpcode() != ISD::UINT_TO_FP)
6919       return true;
6920   }
6921 
6922   return false;
6923 }
6924 
6925 /// \brief Custom lowers integer to floating point conversions to use
6926 /// the direct move instructions available in ISA 2.07 to avoid the
6927 /// need for load/store combinations.
6928 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6929                                                     SelectionDAG &DAG,
6930                                                     const SDLoc &dl) const {
6931   assert((Op.getValueType() == MVT::f32 ||
6932           Op.getValueType() == MVT::f64) &&
6933          "Invalid floating point type as target of conversion");
6934   assert(Subtarget.hasFPCVT() &&
6935          "Int to FP conversions with direct moves require FPCVT");
6936   SDValue FP;
6937   SDValue Src = Op.getOperand(0);
6938   bool SinglePrec = Op.getValueType() == MVT::f32;
6939   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6940   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6941   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
6942                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
6943 
6944   if (WordInt) {
6945     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
6946                      dl, MVT::f64, Src);
6947     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6948   }
6949   else {
6950     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6951     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6952   }
6953 
6954   return FP;
6955 }
6956 
6957 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6958                                           SelectionDAG &DAG) const {
6959   SDLoc dl(Op);
6960 
6961   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6962     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6963       return SDValue();
6964 
6965     SDValue Value = Op.getOperand(0);
6966     // The values are now known to be -1 (false) or 1 (true). To convert this
6967     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6968     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6969     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6970 
6971     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
6972 
6973     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6974 
6975     if (Op.getValueType() != MVT::v4f64)
6976       Value = DAG.getNode(ISD::FP_ROUND, dl,
6977                           Op.getValueType(), Value,
6978                           DAG.getIntPtrConstant(1, dl));
6979     return Value;
6980   }
6981 
6982   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6983   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6984     return SDValue();
6985 
6986   if (Op.getOperand(0).getValueType() == MVT::i1)
6987     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
6988                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
6989                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
6990 
6991   // If we have direct moves, we can do all the conversion, skip the store/load
6992   // however, without FPCVT we can't do most conversions.
6993   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
6994       Subtarget.isPPC64() && Subtarget.hasFPCVT())
6995     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
6996 
6997   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
6998          "UINT_TO_FP is supported only with FPCVT");
6999 
7000   // If we have FCFIDS, then use it when converting to single-precision.
7001   // Otherwise, convert to double-precision and then round.
7002   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7003                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
7004                                                             : PPCISD::FCFIDS)
7005                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
7006                                                             : PPCISD::FCFID);
7007   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7008                   ? MVT::f32
7009                   : MVT::f64;
7010 
7011   if (Op.getOperand(0).getValueType() == MVT::i64) {
7012     SDValue SINT = Op.getOperand(0);
7013     // When converting to single-precision, we actually need to convert
7014     // to double-precision first and then round to single-precision.
7015     // To avoid double-rounding effects during that operation, we have
7016     // to prepare the input operand.  Bits that might be truncated when
7017     // converting to double-precision are replaced by a bit that won't
7018     // be lost at this stage, but is below the single-precision rounding
7019     // position.
7020     //
7021     // However, if -enable-unsafe-fp-math is in effect, accept double
7022     // rounding to avoid the extra overhead.
7023     if (Op.getValueType() == MVT::f32 &&
7024         !Subtarget.hasFPCVT() &&
7025         !DAG.getTarget().Options.UnsafeFPMath) {
7026 
7027       // Twiddle input to make sure the low 11 bits are zero.  (If this
7028       // is the case, we are guaranteed the value will fit into the 53 bit
7029       // mantissa of an IEEE double-precision value without rounding.)
7030       // If any of those low 11 bits were not zero originally, make sure
7031       // bit 12 (value 2048) is set instead, so that the final rounding
7032       // to single-precision gets the correct result.
7033       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7034                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
7035       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
7036                           Round, DAG.getConstant(2047, dl, MVT::i64));
7037       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
7038       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7039                           Round, DAG.getConstant(-2048, dl, MVT::i64));
7040 
7041       // However, we cannot use that value unconditionally: if the magnitude
7042       // of the input value is small, the bit-twiddling we did above might
7043       // end up visibly changing the output.  Fortunately, in that case, we
7044       // don't need to twiddle bits since the original input will convert
7045       // exactly to double-precision floating-point already.  Therefore,
7046       // construct a conditional to use the original value if the top 11
7047       // bits are all sign-bit copies, and use the rounded value computed
7048       // above otherwise.
7049       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
7050                                  SINT, DAG.getConstant(53, dl, MVT::i32));
7051       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
7052                          Cond, DAG.getConstant(1, dl, MVT::i64));
7053       Cond = DAG.getSetCC(dl, MVT::i32,
7054                           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
7055 
7056       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
7057     }
7058 
7059     ReuseLoadInfo RLI;
7060     SDValue Bits;
7061 
7062     MachineFunction &MF = DAG.getMachineFunction();
7063     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
7064       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7065                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7066       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7067     } else if (Subtarget.hasLFIWAX() &&
7068                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
7069       MachineMemOperand *MMO =
7070         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7071                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7072       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7073       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
7074                                      DAG.getVTList(MVT::f64, MVT::Other),
7075                                      Ops, MVT::i32, MMO);
7076       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7077     } else if (Subtarget.hasFPCVT() &&
7078                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
7079       MachineMemOperand *MMO =
7080         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7081                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7082       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7083       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
7084                                      DAG.getVTList(MVT::f64, MVT::Other),
7085                                      Ops, MVT::i32, MMO);
7086       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7087     } else if (((Subtarget.hasLFIWAX() &&
7088                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
7089                 (Subtarget.hasFPCVT() &&
7090                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
7091                SINT.getOperand(0).getValueType() == MVT::i32) {
7092       MachineFrameInfo &MFI = MF.getFrameInfo();
7093       EVT PtrVT = getPointerTy(DAG.getDataLayout());
7094 
7095       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7096       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7097 
7098       SDValue Store =
7099           DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
7100                        MachinePointerInfo::getFixedStack(
7101                            DAG.getMachineFunction(), FrameIdx));
7102 
7103       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7104              "Expected an i32 store");
7105 
7106       RLI.Ptr = FIdx;
7107       RLI.Chain = Store;
7108       RLI.MPI =
7109           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7110       RLI.Alignment = 4;
7111 
7112       MachineMemOperand *MMO =
7113         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7114                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7115       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7116       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
7117                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
7118                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
7119                                      Ops, MVT::i32, MMO);
7120     } else
7121       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
7122 
7123     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
7124 
7125     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7126       FP = DAG.getNode(ISD::FP_ROUND, dl,
7127                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
7128     return FP;
7129   }
7130 
7131   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
7132          "Unhandled INT_TO_FP type in custom expander!");
7133   // Since we only generate this in 64-bit mode, we can take advantage of
7134   // 64-bit registers.  In particular, sign extend the input value into the
7135   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7136   // then lfd it and fcfid it.
7137   MachineFunction &MF = DAG.getMachineFunction();
7138   MachineFrameInfo &MFI = MF.getFrameInfo();
7139   EVT PtrVT = getPointerTy(MF.getDataLayout());
7140 
7141   SDValue Ld;
7142   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
7143     ReuseLoadInfo RLI;
7144     bool ReusingLoad;
7145     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
7146                                             DAG))) {
7147       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7148       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7149 
7150       SDValue Store =
7151           DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7152                        MachinePointerInfo::getFixedStack(
7153                            DAG.getMachineFunction(), FrameIdx));
7154 
7155       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7156              "Expected an i32 store");
7157 
7158       RLI.Ptr = FIdx;
7159       RLI.Chain = Store;
7160       RLI.MPI =
7161           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7162       RLI.Alignment = 4;
7163     }
7164 
7165     MachineMemOperand *MMO =
7166       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7167                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7168     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7169     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
7170                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
7171                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
7172                                  Ops, MVT::i32, MMO);
7173     if (ReusingLoad)
7174       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
7175   } else {
7176     assert(Subtarget.isPPC64() &&
7177            "i32->FP without LFIWAX supported only on PPC64");
7178 
7179     int FrameIdx = MFI.CreateStackObject(8, 8, false);
7180     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7181 
7182     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
7183                                 Op.getOperand(0));
7184 
7185     // STD the extended value into the stack slot.
7186     SDValue Store = DAG.getStore(
7187         DAG.getEntryNode(), dl, Ext64, FIdx,
7188         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7189 
7190     // Load the value as a double.
7191     Ld = DAG.getLoad(
7192         MVT::f64, dl, Store, FIdx,
7193         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7194   }
7195 
7196   // FCFID it and return it.
7197   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
7198   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7199     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
7200                      DAG.getIntPtrConstant(0, dl));
7201   return FP;
7202 }
7203 
7204 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
7205                                             SelectionDAG &DAG) const {
7206   SDLoc dl(Op);
7207   /*
7208    The rounding mode is in bits 30:31 of FPSR, and has the following
7209    settings:
7210      00 Round to nearest
7211      01 Round to 0
7212      10 Round to +inf
7213      11 Round to -inf
7214 
7215   FLT_ROUNDS, on the other hand, expects the following:
7216     -1 Undefined
7217      0 Round to 0
7218      1 Round to nearest
7219      2 Round to +inf
7220      3 Round to -inf
7221 
7222   To perform the conversion, we do:
7223     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7224   */
7225 
7226   MachineFunction &MF = DAG.getMachineFunction();
7227   EVT VT = Op.getValueType();
7228   EVT PtrVT = getPointerTy(MF.getDataLayout());
7229 
7230   // Save FP Control Word to register
7231   EVT NodeTys[] = {
7232     MVT::f64,    // return register
7233     MVT::Glue    // unused in this context
7234   };
7235   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
7236 
7237   // Save FP register to stack slot
7238   int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
7239   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
7240   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
7241                                MachinePointerInfo());
7242 
7243   // Load FP Control Word from low 32 bits of stack slot.
7244   SDValue Four = DAG.getConstant(4, dl, PtrVT);
7245   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
7246   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
7247 
7248   // Transform as necessary
7249   SDValue CWD1 =
7250     DAG.getNode(ISD::AND, dl, MVT::i32,
7251                 CWD, DAG.getConstant(3, dl, MVT::i32));
7252   SDValue CWD2 =
7253     DAG.getNode(ISD::SRL, dl, MVT::i32,
7254                 DAG.getNode(ISD::AND, dl, MVT::i32,
7255                             DAG.getNode(ISD::XOR, dl, MVT::i32,
7256                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
7257                             DAG.getConstant(3, dl, MVT::i32)),
7258                 DAG.getConstant(1, dl, MVT::i32));
7259 
7260   SDValue RetVal =
7261     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
7262 
7263   return DAG.getNode((VT.getSizeInBits() < 16 ?
7264                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
7265 }
7266 
7267 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7268   EVT VT = Op.getValueType();
7269   unsigned BitWidth = VT.getSizeInBits();
7270   SDLoc dl(Op);
7271   assert(Op.getNumOperands() == 3 &&
7272          VT == Op.getOperand(1).getValueType() &&
7273          "Unexpected SHL!");
7274 
7275   // Expand into a bunch of logical ops.  Note that these ops
7276   // depend on the PPC behavior for oversized shift amounts.
7277   SDValue Lo = Op.getOperand(0);
7278   SDValue Hi = Op.getOperand(1);
7279   SDValue Amt = Op.getOperand(2);
7280   EVT AmtVT = Amt.getValueType();
7281 
7282   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7283                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7284   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
7285   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
7286   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
7287   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7288                              DAG.getConstant(-BitWidth, dl, AmtVT));
7289   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7290   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7291   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7292   SDValue OutOps[] = { OutLo, OutHi };
7293   return DAG.getMergeValues(OutOps, dl);
7294 }
7295 
7296 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7297   EVT VT = Op.getValueType();
7298   SDLoc dl(Op);
7299   unsigned BitWidth = VT.getSizeInBits();
7300   assert(Op.getNumOperands() == 3 &&
7301          VT == Op.getOperand(1).getValueType() &&
7302          "Unexpected SRL!");
7303 
7304   // Expand into a bunch of logical ops.  Note that these ops
7305   // depend on the PPC behavior for oversized shift amounts.
7306   SDValue Lo = Op.getOperand(0);
7307   SDValue Hi = Op.getOperand(1);
7308   SDValue Amt = Op.getOperand(2);
7309   EVT AmtVT = Amt.getValueType();
7310 
7311   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7312                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7313   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7314   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7315   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7316   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7317                              DAG.getConstant(-BitWidth, dl, AmtVT));
7318   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
7319   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7320   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
7321   SDValue OutOps[] = { OutLo, OutHi };
7322   return DAG.getMergeValues(OutOps, dl);
7323 }
7324 
7325 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
7326   SDLoc dl(Op);
7327   EVT VT = Op.getValueType();
7328   unsigned BitWidth = VT.getSizeInBits();
7329   assert(Op.getNumOperands() == 3 &&
7330          VT == Op.getOperand(1).getValueType() &&
7331          "Unexpected SRA!");
7332 
7333   // Expand into a bunch of logical ops, followed by a select_cc.
7334   SDValue Lo = Op.getOperand(0);
7335   SDValue Hi = Op.getOperand(1);
7336   SDValue Amt = Op.getOperand(2);
7337   EVT AmtVT = Amt.getValueType();
7338 
7339   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7340                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7341   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7342   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7343   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7344   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7345                              DAG.getConstant(-BitWidth, dl, AmtVT));
7346   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
7347   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
7348   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
7349                                   Tmp4, Tmp6, ISD::SETLE);
7350   SDValue OutOps[] = { OutLo, OutHi };
7351   return DAG.getMergeValues(OutOps, dl);
7352 }
7353 
7354 //===----------------------------------------------------------------------===//
7355 // Vector related lowering.
7356 //
7357 
7358 /// BuildSplatI - Build a canonical splati of Val with an element size of
7359 /// SplatSize.  Cast the result to VT.
7360 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
7361                            SelectionDAG &DAG, const SDLoc &dl) {
7362   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
7363 
7364   static const MVT VTys[] = { // canonical VT to use for each size.
7365     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
7366   };
7367 
7368   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
7369 
7370   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
7371   if (Val == -1)
7372     SplatSize = 1;
7373 
7374   EVT CanonicalVT = VTys[SplatSize-1];
7375 
7376   // Build a canonical splat for this value.
7377   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
7378 }
7379 
7380 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
7381 /// specified intrinsic ID.
7382 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
7383                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
7384   if (DestVT == MVT::Other) DestVT = Op.getValueType();
7385   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7386                      DAG.getConstant(IID, dl, MVT::i32), Op);
7387 }
7388 
7389 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
7390 /// specified intrinsic ID.
7391 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
7392                                 SelectionDAG &DAG, const SDLoc &dl,
7393                                 EVT DestVT = MVT::Other) {
7394   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
7395   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7396                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
7397 }
7398 
7399 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
7400 /// specified intrinsic ID.
7401 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
7402                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
7403                                 EVT DestVT = MVT::Other) {
7404   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
7405   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7406                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
7407 }
7408 
7409 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
7410 /// amount.  The result has the specified value type.
7411 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
7412                            SelectionDAG &DAG, const SDLoc &dl) {
7413   // Force LHS/RHS to be the right type.
7414   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
7415   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
7416 
7417   int Ops[16];
7418   for (unsigned i = 0; i != 16; ++i)
7419     Ops[i] = i + Amt;
7420   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
7421   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7422 }
7423 
7424 /// Do we have an efficient pattern in a .td file for this node?
7425 ///
7426 /// \param V - pointer to the BuildVectorSDNode being matched
7427 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
7428 ///
7429 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
7430 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
7431 /// the opposite is true (expansion is beneficial) are:
7432 /// - The node builds a vector out of integers that are not 32 or 64-bits
7433 /// - The node builds a vector out of constants
7434 /// - The node is a "load-and-splat"
7435 /// In all other cases, we will choose to keep the BUILD_VECTOR.
7436 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
7437                                             bool HasDirectMove) {
7438   EVT VecVT = V->getValueType(0);
7439   bool RightType = VecVT == MVT::v2f64 || VecVT == MVT::v4f32 ||
7440     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
7441   if (!RightType)
7442     return false;
7443 
7444   bool IsSplat = true;
7445   bool IsLoad = false;
7446   SDValue Op0 = V->getOperand(0);
7447 
7448   // This function is called in a block that confirms the node is not a constant
7449   // splat. So a constant BUILD_VECTOR here means the vector is built out of
7450   // different constants.
7451   if (V->isConstant())
7452     return false;
7453   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
7454     if (V->getOperand(i).isUndef())
7455       return false;
7456     // We want to expand nodes that represent load-and-splat even if the
7457     // loaded value is a floating point truncation or conversion to int.
7458     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
7459         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
7460          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7461         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
7462          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7463         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
7464          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
7465       IsLoad = true;
7466     // If the operands are different or the input is not a load and has more
7467     // uses than just this BV node, then it isn't a splat.
7468     if (V->getOperand(i) != Op0 ||
7469         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
7470       IsSplat = false;
7471   }
7472   return !(IsSplat && IsLoad);
7473 }
7474 
7475 // If this is a case we can't handle, return null and let the default
7476 // expansion code take care of it.  If we CAN select this case, and if it
7477 // selects to a single instruction, return Op.  Otherwise, if we can codegen
7478 // this case more efficiently than a constant pool load, lower it to the
7479 // sequence of ops that should be used.
7480 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
7481                                              SelectionDAG &DAG) const {
7482   SDLoc dl(Op);
7483   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7484   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
7485 
7486   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
7487     // We first build an i32 vector, load it into a QPX register,
7488     // then convert it to a floating-point vector and compare it
7489     // to a zero vector to get the boolean result.
7490     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7491     int FrameIdx = MFI.CreateStackObject(16, 16, false);
7492     MachinePointerInfo PtrInfo =
7493         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7494     EVT PtrVT = getPointerTy(DAG.getDataLayout());
7495     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7496 
7497     assert(BVN->getNumOperands() == 4 &&
7498       "BUILD_VECTOR for v4i1 does not have 4 operands");
7499 
7500     bool IsConst = true;
7501     for (unsigned i = 0; i < 4; ++i) {
7502       if (BVN->getOperand(i).isUndef()) continue;
7503       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
7504         IsConst = false;
7505         break;
7506       }
7507     }
7508 
7509     if (IsConst) {
7510       Constant *One =
7511         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
7512       Constant *NegOne =
7513         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
7514 
7515       Constant *CV[4];
7516       for (unsigned i = 0; i < 4; ++i) {
7517         if (BVN->getOperand(i).isUndef())
7518           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
7519         else if (isNullConstant(BVN->getOperand(i)))
7520           CV[i] = NegOne;
7521         else
7522           CV[i] = One;
7523       }
7524 
7525       Constant *CP = ConstantVector::get(CV);
7526       SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
7527                                           16 /* alignment */);
7528 
7529       SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
7530       SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
7531       return DAG.getMemIntrinsicNode(
7532           PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
7533           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7534     }
7535 
7536     SmallVector<SDValue, 4> Stores;
7537     for (unsigned i = 0; i < 4; ++i) {
7538       if (BVN->getOperand(i).isUndef()) continue;
7539 
7540       unsigned Offset = 4*i;
7541       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7542       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7543 
7544       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
7545       if (StoreSize > 4) {
7546         Stores.push_back(
7547             DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
7548                               PtrInfo.getWithOffset(Offset), MVT::i32));
7549       } else {
7550         SDValue StoreValue = BVN->getOperand(i);
7551         if (StoreSize < 4)
7552           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
7553 
7554         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
7555                                       PtrInfo.getWithOffset(Offset)));
7556       }
7557     }
7558 
7559     SDValue StoreChain;
7560     if (!Stores.empty())
7561       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7562     else
7563       StoreChain = DAG.getEntryNode();
7564 
7565     // Now load from v4i32 into the QPX register; this will extend it to
7566     // v4i64 but not yet convert it to a floating point. Nevertheless, this
7567     // is typed as v4f64 because the QPX register integer states are not
7568     // explicitly represented.
7569 
7570     SDValue Ops[] = {StoreChain,
7571                      DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
7572                      FIdx};
7573     SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
7574 
7575     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
7576       dl, VTs, Ops, MVT::v4i32, PtrInfo);
7577     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7578       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
7579       LoadedVect);
7580 
7581     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
7582 
7583     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
7584   }
7585 
7586   // All other QPX vectors are handled by generic code.
7587   if (Subtarget.hasQPX())
7588     return SDValue();
7589 
7590   // Check if this is a splat of a constant value.
7591   APInt APSplatBits, APSplatUndef;
7592   unsigned SplatBitSize;
7593   bool HasAnyUndefs;
7594   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
7595                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
7596       SplatBitSize > 32) {
7597     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
7598     // lowered to VSX instructions under certain conditions.
7599     // Without VSX, there is no pattern more efficient than expanding the node.
7600     if (Subtarget.hasVSX() &&
7601         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove()))
7602       return Op;
7603     return SDValue();
7604   }
7605 
7606   unsigned SplatBits = APSplatBits.getZExtValue();
7607   unsigned SplatUndef = APSplatUndef.getZExtValue();
7608   unsigned SplatSize = SplatBitSize / 8;
7609 
7610   // First, handle single instruction cases.
7611 
7612   // All zeros?
7613   if (SplatBits == 0) {
7614     // Canonicalize all zero vectors to be v4i32.
7615     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
7616       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
7617       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
7618     }
7619     return Op;
7620   }
7621 
7622   // We have XXSPLTIB for constant splats one byte wide
7623   if (Subtarget.hasP9Vector() && SplatSize == 1) {
7624     // This is a splat of 1-byte elements with some elements potentially undef.
7625     // Rather than trying to match undef in the SDAG patterns, ensure that all
7626     // elements are the same constant.
7627     if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
7628       SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
7629                                                        dl, MVT::i32));
7630       SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
7631       if (Op.getValueType() != MVT::v16i8)
7632         return DAG.getBitcast(Op.getValueType(), NewBV);
7633       return NewBV;
7634     }
7635     return Op;
7636   }
7637 
7638   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
7639   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
7640                     (32-SplatBitSize));
7641   if (SextVal >= -16 && SextVal <= 15)
7642     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
7643 
7644   // Two instruction sequences.
7645 
7646   // If this value is in the range [-32,30] and is even, use:
7647   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
7648   // If this value is in the range [17,31] and is odd, use:
7649   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
7650   // If this value is in the range [-31,-17] and is odd, use:
7651   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
7652   // Note the last two are three-instruction sequences.
7653   if (SextVal >= -32 && SextVal <= 31) {
7654     // To avoid having these optimizations undone by constant folding,
7655     // we convert to a pseudo that will be expanded later into one of
7656     // the above forms.
7657     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
7658     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
7659               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
7660     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
7661     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
7662     if (VT == Op.getValueType())
7663       return RetVal;
7664     else
7665       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
7666   }
7667 
7668   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
7669   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
7670   // for fneg/fabs.
7671   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
7672     // Make -1 and vspltisw -1:
7673     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
7674 
7675     // Make the VSLW intrinsic, computing 0x8000_0000.
7676     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
7677                                    OnesV, DAG, dl);
7678 
7679     // xor by OnesV to invert it.
7680     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
7681     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7682   }
7683 
7684   // Check to see if this is a wide variety of vsplti*, binop self cases.
7685   static const signed char SplatCsts[] = {
7686     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
7687     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
7688   };
7689 
7690   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
7691     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
7692     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
7693     int i = SplatCsts[idx];
7694 
7695     // Figure out what shift amount will be used by altivec if shifted by i in
7696     // this splat size.
7697     unsigned TypeShiftAmt = i & (SplatBitSize-1);
7698 
7699     // vsplti + shl self.
7700     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
7701       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7702       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7703         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
7704         Intrinsic::ppc_altivec_vslw
7705       };
7706       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7707       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7708     }
7709 
7710     // vsplti + srl self.
7711     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7712       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7713       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7714         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
7715         Intrinsic::ppc_altivec_vsrw
7716       };
7717       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7718       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7719     }
7720 
7721     // vsplti + sra self.
7722     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7723       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7724       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7725         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
7726         Intrinsic::ppc_altivec_vsraw
7727       };
7728       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7729       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7730     }
7731 
7732     // vsplti + rol self.
7733     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
7734                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
7735       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7736       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7737         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
7738         Intrinsic::ppc_altivec_vrlw
7739       };
7740       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7741       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7742     }
7743 
7744     // t = vsplti c, result = vsldoi t, t, 1
7745     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
7746       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7747       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
7748       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7749     }
7750     // t = vsplti c, result = vsldoi t, t, 2
7751     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
7752       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7753       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
7754       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7755     }
7756     // t = vsplti c, result = vsldoi t, t, 3
7757     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
7758       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7759       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
7760       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7761     }
7762   }
7763 
7764   return SDValue();
7765 }
7766 
7767 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7768 /// the specified operations to build the shuffle.
7769 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7770                                       SDValue RHS, SelectionDAG &DAG,
7771                                       const SDLoc &dl) {
7772   unsigned OpNum = (PFEntry >> 26) & 0x0F;
7773   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7774   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
7775 
7776   enum {
7777     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7778     OP_VMRGHW,
7779     OP_VMRGLW,
7780     OP_VSPLTISW0,
7781     OP_VSPLTISW1,
7782     OP_VSPLTISW2,
7783     OP_VSPLTISW3,
7784     OP_VSLDOI4,
7785     OP_VSLDOI8,
7786     OP_VSLDOI12
7787   };
7788 
7789   if (OpNum == OP_COPY) {
7790     if (LHSID == (1*9+2)*9+3) return LHS;
7791     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7792     return RHS;
7793   }
7794 
7795   SDValue OpLHS, OpRHS;
7796   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7797   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7798 
7799   int ShufIdxs[16];
7800   switch (OpNum) {
7801   default: llvm_unreachable("Unknown i32 permute!");
7802   case OP_VMRGHW:
7803     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
7804     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
7805     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
7806     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
7807     break;
7808   case OP_VMRGLW:
7809     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
7810     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
7811     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
7812     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
7813     break;
7814   case OP_VSPLTISW0:
7815     for (unsigned i = 0; i != 16; ++i)
7816       ShufIdxs[i] = (i&3)+0;
7817     break;
7818   case OP_VSPLTISW1:
7819     for (unsigned i = 0; i != 16; ++i)
7820       ShufIdxs[i] = (i&3)+4;
7821     break;
7822   case OP_VSPLTISW2:
7823     for (unsigned i = 0; i != 16; ++i)
7824       ShufIdxs[i] = (i&3)+8;
7825     break;
7826   case OP_VSPLTISW3:
7827     for (unsigned i = 0; i != 16; ++i)
7828       ShufIdxs[i] = (i&3)+12;
7829     break;
7830   case OP_VSLDOI4:
7831     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
7832   case OP_VSLDOI8:
7833     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
7834   case OP_VSLDOI12:
7835     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
7836   }
7837   EVT VT = OpLHS.getValueType();
7838   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
7839   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
7840   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
7841   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7842 }
7843 
7844 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
7845 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
7846 /// return the code it can be lowered into.  Worst case, it can always be
7847 /// lowered into a vperm.
7848 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
7849                                                SelectionDAG &DAG) const {
7850   SDLoc dl(Op);
7851   SDValue V1 = Op.getOperand(0);
7852   SDValue V2 = Op.getOperand(1);
7853   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7854   EVT VT = Op.getValueType();
7855   bool isLittleEndian = Subtarget.isLittleEndian();
7856 
7857   unsigned ShiftElts, InsertAtByte;
7858   bool Swap;
7859   if (Subtarget.hasP9Vector() &&
7860       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
7861                            isLittleEndian)) {
7862     if (Swap)
7863       std::swap(V1, V2);
7864     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7865     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
7866     if (ShiftElts) {
7867       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
7868                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
7869       SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl,
7870                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
7871       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7872     }
7873     SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2,
7874                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
7875     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7876   }
7877 
7878 
7879   if (Subtarget.hasVSX() &&
7880       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
7881     if (Swap)
7882       std::swap(V1, V2);
7883     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7884     SDValue Conv2 =
7885         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
7886 
7887     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
7888                               DAG.getConstant(ShiftElts, dl, MVT::i32));
7889     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
7890   }
7891 
7892   if (Subtarget.hasVSX() &&
7893     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
7894     if (Swap)
7895       std::swap(V1, V2);
7896     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
7897     SDValue Conv2 =
7898         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
7899 
7900     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
7901                               DAG.getConstant(ShiftElts, dl, MVT::i32));
7902     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
7903   }
7904 
7905   if (Subtarget.hasP9Vector()) {
7906      if (PPC::isXXBRHShuffleMask(SVOp)) {
7907       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
7908       SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
7909       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
7910     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
7911       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7912       SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
7913       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
7914     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
7915       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
7916       SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
7917       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
7918     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
7919       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
7920       SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
7921       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
7922     }
7923   }
7924 
7925   if (Subtarget.hasVSX()) {
7926     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
7927       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
7928 
7929       // If the source for the shuffle is a scalar_to_vector that came from a
7930       // 32-bit load, it will have used LXVWSX so we don't need to splat again.
7931       if (Subtarget.hasP9Vector() &&
7932           ((isLittleEndian && SplatIdx == 3) ||
7933            (!isLittleEndian && SplatIdx == 0))) {
7934         SDValue Src = V1.getOperand(0);
7935         if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
7936             Src.getOperand(0).getOpcode() == ISD::LOAD &&
7937             Src.getOperand(0).hasOneUse())
7938           return V1;
7939       }
7940       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7941       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
7942                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
7943       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
7944     }
7945 
7946     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
7947     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
7948       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
7949       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
7950       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
7951     }
7952   }
7953 
7954   if (Subtarget.hasQPX()) {
7955     if (VT.getVectorNumElements() != 4)
7956       return SDValue();
7957 
7958     if (V2.isUndef()) V2 = V1;
7959 
7960     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
7961     if (AlignIdx != -1) {
7962       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
7963                          DAG.getConstant(AlignIdx, dl, MVT::i32));
7964     } else if (SVOp->isSplat()) {
7965       int SplatIdx = SVOp->getSplatIndex();
7966       if (SplatIdx >= 4) {
7967         std::swap(V1, V2);
7968         SplatIdx -= 4;
7969       }
7970 
7971       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
7972                          DAG.getConstant(SplatIdx, dl, MVT::i32));
7973     }
7974 
7975     // Lower this into a qvgpci/qvfperm pair.
7976 
7977     // Compute the qvgpci literal
7978     unsigned idx = 0;
7979     for (unsigned i = 0; i < 4; ++i) {
7980       int m = SVOp->getMaskElt(i);
7981       unsigned mm = m >= 0 ? (unsigned) m : i;
7982       idx |= mm << (3-i)*3;
7983     }
7984 
7985     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
7986                              DAG.getConstant(idx, dl, MVT::i32));
7987     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
7988   }
7989 
7990   // Cases that are handled by instructions that take permute immediates
7991   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
7992   // selected by the instruction selector.
7993   if (V2.isUndef()) {
7994     if (PPC::isSplatShuffleMask(SVOp, 1) ||
7995         PPC::isSplatShuffleMask(SVOp, 2) ||
7996         PPC::isSplatShuffleMask(SVOp, 4) ||
7997         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
7998         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
7999         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
8000         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
8001         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
8002         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
8003         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
8004         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
8005         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
8006         (Subtarget.hasP8Altivec() && (
8007          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
8008          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
8009          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
8010       return Op;
8011     }
8012   }
8013 
8014   // Altivec has a variety of "shuffle immediates" that take two vector inputs
8015   // and produce a fixed permutation.  If any of these match, do not lower to
8016   // VPERM.
8017   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
8018   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8019       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8020       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
8021       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8022       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8023       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8024       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8025       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8026       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8027       (Subtarget.hasP8Altivec() && (
8028        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8029        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
8030        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
8031     return Op;
8032 
8033   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
8034   // perfect shuffle table to emit an optimal matching sequence.
8035   ArrayRef<int> PermMask = SVOp->getMask();
8036 
8037   unsigned PFIndexes[4];
8038   bool isFourElementShuffle = true;
8039   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
8040     unsigned EltNo = 8;   // Start out undef.
8041     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
8042       if (PermMask[i*4+j] < 0)
8043         continue;   // Undef, ignore it.
8044 
8045       unsigned ByteSource = PermMask[i*4+j];
8046       if ((ByteSource & 3) != j) {
8047         isFourElementShuffle = false;
8048         break;
8049       }
8050 
8051       if (EltNo == 8) {
8052         EltNo = ByteSource/4;
8053       } else if (EltNo != ByteSource/4) {
8054         isFourElementShuffle = false;
8055         break;
8056       }
8057     }
8058     PFIndexes[i] = EltNo;
8059   }
8060 
8061   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8062   // perfect shuffle vector to determine if it is cost effective to do this as
8063   // discrete instructions, or whether we should use a vperm.
8064   // For now, we skip this for little endian until such time as we have a
8065   // little-endian perfect shuffle table.
8066   if (isFourElementShuffle && !isLittleEndian) {
8067     // Compute the index in the perfect shuffle table.
8068     unsigned PFTableIndex =
8069       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8070 
8071     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8072     unsigned Cost  = (PFEntry >> 30);
8073 
8074     // Determining when to avoid vperm is tricky.  Many things affect the cost
8075     // of vperm, particularly how many times the perm mask needs to be computed.
8076     // For example, if the perm mask can be hoisted out of a loop or is already
8077     // used (perhaps because there are multiple permutes with the same shuffle
8078     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
8079     // the loop requires an extra register.
8080     //
8081     // As a compromise, we only emit discrete instructions if the shuffle can be
8082     // generated in 3 or fewer operations.  When we have loop information
8083     // available, if this block is within a loop, we should avoid using vperm
8084     // for 3-operation perms and use a constant pool load instead.
8085     if (Cost < 3)
8086       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8087   }
8088 
8089   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
8090   // vector that will get spilled to the constant pool.
8091   if (V2.isUndef()) V2 = V1;
8092 
8093   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
8094   // that it is in input element units, not in bytes.  Convert now.
8095 
8096   // For little endian, the order of the input vectors is reversed, and
8097   // the permutation mask is complemented with respect to 31.  This is
8098   // necessary to produce proper semantics with the big-endian-biased vperm
8099   // instruction.
8100   EVT EltVT = V1.getValueType().getVectorElementType();
8101   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
8102 
8103   SmallVector<SDValue, 16> ResultMask;
8104   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
8105     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
8106 
8107     for (unsigned j = 0; j != BytesPerElement; ++j)
8108       if (isLittleEndian)
8109         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
8110                                              dl, MVT::i32));
8111       else
8112         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
8113                                              MVT::i32));
8114   }
8115 
8116   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
8117   if (isLittleEndian)
8118     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8119                        V2, V1, VPermMask);
8120   else
8121     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8122                        V1, V2, VPermMask);
8123 }
8124 
8125 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
8126 /// vector comparison.  If it is, return true and fill in Opc/isDot with
8127 /// information about the intrinsic.
8128 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
8129                                  bool &isDot, const PPCSubtarget &Subtarget) {
8130   unsigned IntrinsicID =
8131       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
8132   CompareOpc = -1;
8133   isDot = false;
8134   switch (IntrinsicID) {
8135   default:
8136     return false;
8137   // Comparison predicates.
8138   case Intrinsic::ppc_altivec_vcmpbfp_p:
8139     CompareOpc = 966;
8140     isDot = true;
8141     break;
8142   case Intrinsic::ppc_altivec_vcmpeqfp_p:
8143     CompareOpc = 198;
8144     isDot = true;
8145     break;
8146   case Intrinsic::ppc_altivec_vcmpequb_p:
8147     CompareOpc = 6;
8148     isDot = true;
8149     break;
8150   case Intrinsic::ppc_altivec_vcmpequh_p:
8151     CompareOpc = 70;
8152     isDot = true;
8153     break;
8154   case Intrinsic::ppc_altivec_vcmpequw_p:
8155     CompareOpc = 134;
8156     isDot = true;
8157     break;
8158   case Intrinsic::ppc_altivec_vcmpequd_p:
8159     if (Subtarget.hasP8Altivec()) {
8160       CompareOpc = 199;
8161       isDot = true;
8162     } else
8163       return false;
8164     break;
8165   case Intrinsic::ppc_altivec_vcmpneb_p:
8166   case Intrinsic::ppc_altivec_vcmpneh_p:
8167   case Intrinsic::ppc_altivec_vcmpnew_p:
8168   case Intrinsic::ppc_altivec_vcmpnezb_p:
8169   case Intrinsic::ppc_altivec_vcmpnezh_p:
8170   case Intrinsic::ppc_altivec_vcmpnezw_p:
8171     if (Subtarget.hasP9Altivec()) {
8172       switch (IntrinsicID) {
8173       default:
8174         llvm_unreachable("Unknown comparison intrinsic.");
8175       case Intrinsic::ppc_altivec_vcmpneb_p:
8176         CompareOpc = 7;
8177         break;
8178       case Intrinsic::ppc_altivec_vcmpneh_p:
8179         CompareOpc = 71;
8180         break;
8181       case Intrinsic::ppc_altivec_vcmpnew_p:
8182         CompareOpc = 135;
8183         break;
8184       case Intrinsic::ppc_altivec_vcmpnezb_p:
8185         CompareOpc = 263;
8186         break;
8187       case Intrinsic::ppc_altivec_vcmpnezh_p:
8188         CompareOpc = 327;
8189         break;
8190       case Intrinsic::ppc_altivec_vcmpnezw_p:
8191         CompareOpc = 391;
8192         break;
8193       }
8194       isDot = true;
8195     } else
8196       return false;
8197     break;
8198   case Intrinsic::ppc_altivec_vcmpgefp_p:
8199     CompareOpc = 454;
8200     isDot = true;
8201     break;
8202   case Intrinsic::ppc_altivec_vcmpgtfp_p:
8203     CompareOpc = 710;
8204     isDot = true;
8205     break;
8206   case Intrinsic::ppc_altivec_vcmpgtsb_p:
8207     CompareOpc = 774;
8208     isDot = true;
8209     break;
8210   case Intrinsic::ppc_altivec_vcmpgtsh_p:
8211     CompareOpc = 838;
8212     isDot = true;
8213     break;
8214   case Intrinsic::ppc_altivec_vcmpgtsw_p:
8215     CompareOpc = 902;
8216     isDot = true;
8217     break;
8218   case Intrinsic::ppc_altivec_vcmpgtsd_p:
8219     if (Subtarget.hasP8Altivec()) {
8220       CompareOpc = 967;
8221       isDot = true;
8222     } else
8223       return false;
8224     break;
8225   case Intrinsic::ppc_altivec_vcmpgtub_p:
8226     CompareOpc = 518;
8227     isDot = true;
8228     break;
8229   case Intrinsic::ppc_altivec_vcmpgtuh_p:
8230     CompareOpc = 582;
8231     isDot = true;
8232     break;
8233   case Intrinsic::ppc_altivec_vcmpgtuw_p:
8234     CompareOpc = 646;
8235     isDot = true;
8236     break;
8237   case Intrinsic::ppc_altivec_vcmpgtud_p:
8238     if (Subtarget.hasP8Altivec()) {
8239       CompareOpc = 711;
8240       isDot = true;
8241     } else
8242       return false;
8243     break;
8244 
8245   // VSX predicate comparisons use the same infrastructure
8246   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
8247   case Intrinsic::ppc_vsx_xvcmpgedp_p:
8248   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
8249   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
8250   case Intrinsic::ppc_vsx_xvcmpgesp_p:
8251   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
8252     if (Subtarget.hasVSX()) {
8253       switch (IntrinsicID) {
8254       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
8255         CompareOpc = 99;
8256         break;
8257       case Intrinsic::ppc_vsx_xvcmpgedp_p:
8258         CompareOpc = 115;
8259         break;
8260       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
8261         CompareOpc = 107;
8262         break;
8263       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
8264         CompareOpc = 67;
8265         break;
8266       case Intrinsic::ppc_vsx_xvcmpgesp_p:
8267         CompareOpc = 83;
8268         break;
8269       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
8270         CompareOpc = 75;
8271         break;
8272       }
8273       isDot = true;
8274     } else
8275       return false;
8276     break;
8277 
8278   // Normal Comparisons.
8279   case Intrinsic::ppc_altivec_vcmpbfp:
8280     CompareOpc = 966;
8281     break;
8282   case Intrinsic::ppc_altivec_vcmpeqfp:
8283     CompareOpc = 198;
8284     break;
8285   case Intrinsic::ppc_altivec_vcmpequb:
8286     CompareOpc = 6;
8287     break;
8288   case Intrinsic::ppc_altivec_vcmpequh:
8289     CompareOpc = 70;
8290     break;
8291   case Intrinsic::ppc_altivec_vcmpequw:
8292     CompareOpc = 134;
8293     break;
8294   case Intrinsic::ppc_altivec_vcmpequd:
8295     if (Subtarget.hasP8Altivec())
8296       CompareOpc = 199;
8297     else
8298       return false;
8299     break;
8300   case Intrinsic::ppc_altivec_vcmpneb:
8301   case Intrinsic::ppc_altivec_vcmpneh:
8302   case Intrinsic::ppc_altivec_vcmpnew:
8303   case Intrinsic::ppc_altivec_vcmpnezb:
8304   case Intrinsic::ppc_altivec_vcmpnezh:
8305   case Intrinsic::ppc_altivec_vcmpnezw:
8306     if (Subtarget.hasP9Altivec())
8307       switch (IntrinsicID) {
8308       default:
8309         llvm_unreachable("Unknown comparison intrinsic.");
8310       case Intrinsic::ppc_altivec_vcmpneb:
8311         CompareOpc = 7;
8312         break;
8313       case Intrinsic::ppc_altivec_vcmpneh:
8314         CompareOpc = 71;
8315         break;
8316       case Intrinsic::ppc_altivec_vcmpnew:
8317         CompareOpc = 135;
8318         break;
8319       case Intrinsic::ppc_altivec_vcmpnezb:
8320         CompareOpc = 263;
8321         break;
8322       case Intrinsic::ppc_altivec_vcmpnezh:
8323         CompareOpc = 327;
8324         break;
8325       case Intrinsic::ppc_altivec_vcmpnezw:
8326         CompareOpc = 391;
8327         break;
8328       }
8329     else
8330       return false;
8331     break;
8332   case Intrinsic::ppc_altivec_vcmpgefp:
8333     CompareOpc = 454;
8334     break;
8335   case Intrinsic::ppc_altivec_vcmpgtfp:
8336     CompareOpc = 710;
8337     break;
8338   case Intrinsic::ppc_altivec_vcmpgtsb:
8339     CompareOpc = 774;
8340     break;
8341   case Intrinsic::ppc_altivec_vcmpgtsh:
8342     CompareOpc = 838;
8343     break;
8344   case Intrinsic::ppc_altivec_vcmpgtsw:
8345     CompareOpc = 902;
8346     break;
8347   case Intrinsic::ppc_altivec_vcmpgtsd:
8348     if (Subtarget.hasP8Altivec())
8349       CompareOpc = 967;
8350     else
8351       return false;
8352     break;
8353   case Intrinsic::ppc_altivec_vcmpgtub:
8354     CompareOpc = 518;
8355     break;
8356   case Intrinsic::ppc_altivec_vcmpgtuh:
8357     CompareOpc = 582;
8358     break;
8359   case Intrinsic::ppc_altivec_vcmpgtuw:
8360     CompareOpc = 646;
8361     break;
8362   case Intrinsic::ppc_altivec_vcmpgtud:
8363     if (Subtarget.hasP8Altivec())
8364       CompareOpc = 711;
8365     else
8366       return false;
8367     break;
8368   }
8369   return true;
8370 }
8371 
8372 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
8373 /// lower, do it, otherwise return null.
8374 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8375                                                    SelectionDAG &DAG) const {
8376   unsigned IntrinsicID =
8377     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8378 
8379   if (IntrinsicID == Intrinsic::thread_pointer) {
8380     // Reads the thread pointer register, used for __builtin_thread_pointer.
8381     if (Subtarget.isPPC64())
8382       return DAG.getRegister(PPC::X13, MVT::i64);
8383     return DAG.getRegister(PPC::R2, MVT::i32);
8384   }
8385 
8386   // If this is a lowered altivec predicate compare, CompareOpc is set to the
8387   // opcode number of the comparison.
8388   SDLoc dl(Op);
8389   int CompareOpc;
8390   bool isDot;
8391   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
8392     return SDValue();    // Don't custom lower most intrinsics.
8393 
8394   // If this is a non-dot comparison, make the VCMP node and we are done.
8395   if (!isDot) {
8396     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
8397                               Op.getOperand(1), Op.getOperand(2),
8398                               DAG.getConstant(CompareOpc, dl, MVT::i32));
8399     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
8400   }
8401 
8402   // Create the PPCISD altivec 'dot' comparison node.
8403   SDValue Ops[] = {
8404     Op.getOperand(2),  // LHS
8405     Op.getOperand(3),  // RHS
8406     DAG.getConstant(CompareOpc, dl, MVT::i32)
8407   };
8408   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
8409   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
8410 
8411   // Now that we have the comparison, emit a copy from the CR to a GPR.
8412   // This is flagged to the above dot comparison.
8413   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
8414                                 DAG.getRegister(PPC::CR6, MVT::i32),
8415                                 CompNode.getValue(1));
8416 
8417   // Unpack the result based on how the target uses it.
8418   unsigned BitNo;   // Bit # of CR6.
8419   bool InvertBit;   // Invert result?
8420   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
8421   default:  // Can't happen, don't crash on invalid number though.
8422   case 0:   // Return the value of the EQ bit of CR6.
8423     BitNo = 0; InvertBit = false;
8424     break;
8425   case 1:   // Return the inverted value of the EQ bit of CR6.
8426     BitNo = 0; InvertBit = true;
8427     break;
8428   case 2:   // Return the value of the LT bit of CR6.
8429     BitNo = 2; InvertBit = false;
8430     break;
8431   case 3:   // Return the inverted value of the LT bit of CR6.
8432     BitNo = 2; InvertBit = true;
8433     break;
8434   }
8435 
8436   // Shift the bit into the low position.
8437   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
8438                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
8439   // Isolate the bit.
8440   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
8441                       DAG.getConstant(1, dl, MVT::i32));
8442 
8443   // If we are supposed to, toggle the bit.
8444   if (InvertBit)
8445     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
8446                         DAG.getConstant(1, dl, MVT::i32));
8447   return Flags;
8448 }
8449 
8450 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
8451                                                SelectionDAG &DAG) const {
8452   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
8453   // the beginning of the argument list.
8454   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
8455   SDLoc DL(Op);
8456   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
8457   case Intrinsic::ppc_cfence: {
8458     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
8459     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
8460     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
8461                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
8462                                                   Op.getOperand(ArgStart + 1)),
8463                                       Op.getOperand(0)),
8464                    0);
8465   }
8466   default:
8467     break;
8468   }
8469   return SDValue();
8470 }
8471 
8472 SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
8473   // Check for a DIV with the same operands as this REM.
8474   for (auto UI : Op.getOperand(1)->uses()) {
8475     if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
8476         (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
8477       if (UI->getOperand(0) == Op.getOperand(0) &&
8478           UI->getOperand(1) == Op.getOperand(1))
8479         return SDValue();
8480   }
8481   return Op;
8482 }
8483 
8484 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
8485                                                   SelectionDAG &DAG) const {
8486   SDLoc dl(Op);
8487   // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
8488   // instructions), but for smaller types, we need to first extend up to v2i32
8489   // before doing going farther.
8490   if (Op.getValueType() == MVT::v2i64) {
8491     EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
8492     if (ExtVT != MVT::v2i32) {
8493       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
8494       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
8495                        DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
8496                                         ExtVT.getVectorElementType(), 4)));
8497       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
8498       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
8499                        DAG.getValueType(MVT::v2i32));
8500     }
8501 
8502     return Op;
8503   }
8504 
8505   return SDValue();
8506 }
8507 
8508 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
8509                                                  SelectionDAG &DAG) const {
8510   SDLoc dl(Op);
8511   // Create a stack slot that is 16-byte aligned.
8512   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8513   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8514   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8515   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8516 
8517   // Store the input value into Value#0 of the stack slot.
8518   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
8519                                MachinePointerInfo());
8520   // Load it out.
8521   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
8522 }
8523 
8524 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8525                                                   SelectionDAG &DAG) const {
8526   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
8527          "Should only be called for ISD::INSERT_VECTOR_ELT");
8528   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
8529   // We have legal lowering for constant indices but not for variable ones.
8530   if (C)
8531     return Op;
8532   return SDValue();
8533 }
8534 
8535 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
8536                                                    SelectionDAG &DAG) const {
8537   SDLoc dl(Op);
8538   SDNode *N = Op.getNode();
8539 
8540   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
8541          "Unknown extract_vector_elt type");
8542 
8543   SDValue Value = N->getOperand(0);
8544 
8545   // The first part of this is like the store lowering except that we don't
8546   // need to track the chain.
8547 
8548   // The values are now known to be -1 (false) or 1 (true). To convert this
8549   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8550   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8551   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8552 
8553   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8554   // understand how to form the extending load.
8555   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8556 
8557   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8558 
8559   // Now convert to an integer and store.
8560   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8561     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8562     Value);
8563 
8564   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8565   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8566   MachinePointerInfo PtrInfo =
8567       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8568   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8569   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8570 
8571   SDValue StoreChain = DAG.getEntryNode();
8572   SDValue Ops[] = {StoreChain,
8573                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8574                    Value, FIdx};
8575   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8576 
8577   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8578     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8579 
8580   // Extract the value requested.
8581   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
8582   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8583   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8584 
8585   SDValue IntVal =
8586       DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
8587 
8588   if (!Subtarget.useCRBits())
8589     return IntVal;
8590 
8591   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
8592 }
8593 
8594 /// Lowering for QPX v4i1 loads
8595 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
8596                                            SelectionDAG &DAG) const {
8597   SDLoc dl(Op);
8598   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
8599   SDValue LoadChain = LN->getChain();
8600   SDValue BasePtr = LN->getBasePtr();
8601 
8602   if (Op.getValueType() == MVT::v4f64 ||
8603       Op.getValueType() == MVT::v4f32) {
8604     EVT MemVT = LN->getMemoryVT();
8605     unsigned Alignment = LN->getAlignment();
8606 
8607     // If this load is properly aligned, then it is legal.
8608     if (Alignment >= MemVT.getStoreSize())
8609       return Op;
8610 
8611     EVT ScalarVT = Op.getValueType().getScalarType(),
8612         ScalarMemVT = MemVT.getScalarType();
8613     unsigned Stride = ScalarMemVT.getStoreSize();
8614 
8615     SDValue Vals[4], LoadChains[4];
8616     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8617       SDValue Load;
8618       if (ScalarVT != ScalarMemVT)
8619         Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
8620                               BasePtr,
8621                               LN->getPointerInfo().getWithOffset(Idx * Stride),
8622                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8623                               LN->getMemOperand()->getFlags(), LN->getAAInfo());
8624       else
8625         Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
8626                            LN->getPointerInfo().getWithOffset(Idx * Stride),
8627                            MinAlign(Alignment, Idx * Stride),
8628                            LN->getMemOperand()->getFlags(), LN->getAAInfo());
8629 
8630       if (Idx == 0 && LN->isIndexed()) {
8631         assert(LN->getAddressingMode() == ISD::PRE_INC &&
8632                "Unknown addressing mode on vector load");
8633         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
8634                                   LN->getAddressingMode());
8635       }
8636 
8637       Vals[Idx] = Load;
8638       LoadChains[Idx] = Load.getValue(1);
8639 
8640       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8641                             DAG.getConstant(Stride, dl,
8642                                             BasePtr.getValueType()));
8643     }
8644 
8645     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8646     SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
8647 
8648     if (LN->isIndexed()) {
8649       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
8650       return DAG.getMergeValues(RetOps, dl);
8651     }
8652 
8653     SDValue RetOps[] = { Value, TF };
8654     return DAG.getMergeValues(RetOps, dl);
8655   }
8656 
8657   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
8658   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
8659 
8660   // To lower v4i1 from a byte array, we load the byte elements of the
8661   // vector and then reuse the BUILD_VECTOR logic.
8662 
8663   SDValue VectElmts[4], VectElmtChains[4];
8664   for (unsigned i = 0; i < 4; ++i) {
8665     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8666     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8667 
8668     VectElmts[i] = DAG.getExtLoad(
8669         ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
8670         LN->getPointerInfo().getWithOffset(i), MVT::i8,
8671         /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
8672     VectElmtChains[i] = VectElmts[i].getValue(1);
8673   }
8674 
8675   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
8676   SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
8677 
8678   SDValue RVals[] = { Value, LoadChain };
8679   return DAG.getMergeValues(RVals, dl);
8680 }
8681 
8682 /// Lowering for QPX v4i1 stores
8683 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
8684                                             SelectionDAG &DAG) const {
8685   SDLoc dl(Op);
8686   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
8687   SDValue StoreChain = SN->getChain();
8688   SDValue BasePtr = SN->getBasePtr();
8689   SDValue Value = SN->getValue();
8690 
8691   if (Value.getValueType() == MVT::v4f64 ||
8692       Value.getValueType() == MVT::v4f32) {
8693     EVT MemVT = SN->getMemoryVT();
8694     unsigned Alignment = SN->getAlignment();
8695 
8696     // If this store is properly aligned, then it is legal.
8697     if (Alignment >= MemVT.getStoreSize())
8698       return Op;
8699 
8700     EVT ScalarVT = Value.getValueType().getScalarType(),
8701         ScalarMemVT = MemVT.getScalarType();
8702     unsigned Stride = ScalarMemVT.getStoreSize();
8703 
8704     SDValue Stores[4];
8705     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8706       SDValue Ex = DAG.getNode(
8707           ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
8708           DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
8709       SDValue Store;
8710       if (ScalarVT != ScalarMemVT)
8711         Store =
8712             DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
8713                               SN->getPointerInfo().getWithOffset(Idx * Stride),
8714                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8715                               SN->getMemOperand()->getFlags(), SN->getAAInfo());
8716       else
8717         Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
8718                              SN->getPointerInfo().getWithOffset(Idx * Stride),
8719                              MinAlign(Alignment, Idx * Stride),
8720                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
8721 
8722       if (Idx == 0 && SN->isIndexed()) {
8723         assert(SN->getAddressingMode() == ISD::PRE_INC &&
8724                "Unknown addressing mode on vector store");
8725         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
8726                                     SN->getAddressingMode());
8727       }
8728 
8729       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8730                             DAG.getConstant(Stride, dl,
8731                                             BasePtr.getValueType()));
8732       Stores[Idx] = Store;
8733     }
8734 
8735     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8736 
8737     if (SN->isIndexed()) {
8738       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
8739       return DAG.getMergeValues(RetOps, dl);
8740     }
8741 
8742     return TF;
8743   }
8744 
8745   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
8746   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
8747 
8748   // The values are now known to be -1 (false) or 1 (true). To convert this
8749   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8750   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8751   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8752 
8753   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8754   // understand how to form the extending load.
8755   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8756 
8757   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8758 
8759   // Now convert to an integer and store.
8760   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8761     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8762     Value);
8763 
8764   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8765   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8766   MachinePointerInfo PtrInfo =
8767       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8768   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8769   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8770 
8771   SDValue Ops[] = {StoreChain,
8772                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8773                    Value, FIdx};
8774   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8775 
8776   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8777     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8778 
8779   // Move data into the byte array.
8780   SDValue Loads[4], LoadChains[4];
8781   for (unsigned i = 0; i < 4; ++i) {
8782     unsigned Offset = 4*i;
8783     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8784     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8785 
8786     Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
8787                            PtrInfo.getWithOffset(Offset));
8788     LoadChains[i] = Loads[i].getValue(1);
8789   }
8790 
8791   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8792 
8793   SDValue Stores[4];
8794   for (unsigned i = 0; i < 4; ++i) {
8795     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8796     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8797 
8798     Stores[i] = DAG.getTruncStore(
8799         StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
8800         MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
8801         SN->getAAInfo());
8802   }
8803 
8804   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8805 
8806   return StoreChain;
8807 }
8808 
8809 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
8810   SDLoc dl(Op);
8811   if (Op.getValueType() == MVT::v4i32) {
8812     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8813 
8814     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
8815     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
8816 
8817     SDValue RHSSwap =   // = vrlw RHS, 16
8818       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
8819 
8820     // Shrinkify inputs to v8i16.
8821     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
8822     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
8823     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
8824 
8825     // Low parts multiplied together, generating 32-bit results (we ignore the
8826     // top parts).
8827     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
8828                                         LHS, RHS, DAG, dl, MVT::v4i32);
8829 
8830     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
8831                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
8832     // Shift the high parts up 16 bits.
8833     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
8834                               Neg16, DAG, dl);
8835     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
8836   } else if (Op.getValueType() == MVT::v8i16) {
8837     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8838 
8839     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
8840 
8841     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
8842                             LHS, RHS, Zero, DAG, dl);
8843   } else if (Op.getValueType() == MVT::v16i8) {
8844     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8845     bool isLittleEndian = Subtarget.isLittleEndian();
8846 
8847     // Multiply the even 8-bit parts, producing 16-bit sums.
8848     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
8849                                            LHS, RHS, DAG, dl, MVT::v8i16);
8850     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
8851 
8852     // Multiply the odd 8-bit parts, producing 16-bit sums.
8853     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
8854                                           LHS, RHS, DAG, dl, MVT::v8i16);
8855     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
8856 
8857     // Merge the results together.  Because vmuleub and vmuloub are
8858     // instructions with a big-endian bias, we must reverse the
8859     // element numbering and reverse the meaning of "odd" and "even"
8860     // when generating little endian code.
8861     int Ops[16];
8862     for (unsigned i = 0; i != 8; ++i) {
8863       if (isLittleEndian) {
8864         Ops[i*2  ] = 2*i;
8865         Ops[i*2+1] = 2*i+16;
8866       } else {
8867         Ops[i*2  ] = 2*i+1;
8868         Ops[i*2+1] = 2*i+1+16;
8869       }
8870     }
8871     if (isLittleEndian)
8872       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
8873     else
8874       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
8875   } else {
8876     llvm_unreachable("Unknown mul to lower!");
8877   }
8878 }
8879 
8880 /// LowerOperation - Provide custom lowering hooks for some operations.
8881 ///
8882 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
8883   switch (Op.getOpcode()) {
8884   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
8885   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
8886   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
8887   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
8888   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
8889   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
8890   case ISD::SETCC:              return LowerSETCC(Op, DAG);
8891   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
8892   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
8893   case ISD::VASTART:
8894     return LowerVASTART(Op, DAG);
8895 
8896   case ISD::VAARG:
8897     return LowerVAARG(Op, DAG);
8898 
8899   case ISD::VACOPY:
8900     return LowerVACOPY(Op, DAG);
8901 
8902   case ISD::STACKRESTORE:
8903     return LowerSTACKRESTORE(Op, DAG);
8904 
8905   case ISD::DYNAMIC_STACKALLOC:
8906     return LowerDYNAMIC_STACKALLOC(Op, DAG);
8907 
8908   case ISD::GET_DYNAMIC_AREA_OFFSET:
8909     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
8910 
8911   case ISD::EH_DWARF_CFA:
8912     return LowerEH_DWARF_CFA(Op, DAG);
8913 
8914   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
8915   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
8916 
8917   case ISD::LOAD:               return LowerLOAD(Op, DAG);
8918   case ISD::STORE:              return LowerSTORE(Op, DAG);
8919   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
8920   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
8921   case ISD::FP_TO_UINT:
8922   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
8923                                                       SDLoc(Op));
8924   case ISD::UINT_TO_FP:
8925   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
8926   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
8927 
8928   // Lower 64-bit shifts.
8929   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
8930   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
8931   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
8932 
8933   // Vector-related lowering.
8934   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
8935   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
8936   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
8937   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
8938   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
8939   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
8940   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
8941   case ISD::MUL:                return LowerMUL(Op, DAG);
8942 
8943   // For counter-based loop handling.
8944   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
8945 
8946   // Frame & Return address.
8947   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
8948   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
8949 
8950   case ISD::INTRINSIC_VOID:
8951     return LowerINTRINSIC_VOID(Op, DAG);
8952   case ISD::SREM:
8953   case ISD::UREM:
8954     return LowerREM(Op, DAG);
8955   }
8956 }
8957 
8958 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
8959                                            SmallVectorImpl<SDValue>&Results,
8960                                            SelectionDAG &DAG) const {
8961   SDLoc dl(N);
8962   switch (N->getOpcode()) {
8963   default:
8964     llvm_unreachable("Do not know how to custom type legalize this operation!");
8965   case ISD::READCYCLECOUNTER: {
8966     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
8967     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
8968 
8969     Results.push_back(RTB);
8970     Results.push_back(RTB.getValue(1));
8971     Results.push_back(RTB.getValue(2));
8972     break;
8973   }
8974   case ISD::INTRINSIC_W_CHAIN: {
8975     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
8976         Intrinsic::ppc_is_decremented_ctr_nonzero)
8977       break;
8978 
8979     assert(N->getValueType(0) == MVT::i1 &&
8980            "Unexpected result type for CTR decrement intrinsic");
8981     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
8982                                  N->getValueType(0));
8983     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
8984     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
8985                                  N->getOperand(1));
8986 
8987     Results.push_back(NewInt);
8988     Results.push_back(NewInt.getValue(1));
8989     break;
8990   }
8991   case ISD::VAARG: {
8992     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
8993       return;
8994 
8995     EVT VT = N->getValueType(0);
8996 
8997     if (VT == MVT::i64) {
8998       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
8999 
9000       Results.push_back(NewNode);
9001       Results.push_back(NewNode.getValue(1));
9002     }
9003     return;
9004   }
9005   case ISD::FP_ROUND_INREG: {
9006     assert(N->getValueType(0) == MVT::ppcf128);
9007     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
9008     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
9009                              MVT::f64, N->getOperand(0),
9010                              DAG.getIntPtrConstant(0, dl));
9011     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
9012                              MVT::f64, N->getOperand(0),
9013                              DAG.getIntPtrConstant(1, dl));
9014 
9015     // Add the two halves of the long double in round-to-zero mode.
9016     SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
9017 
9018     // We know the low half is about to be thrown away, so just use something
9019     // convenient.
9020     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
9021                                 FPreg, FPreg));
9022     return;
9023   }
9024   case ISD::FP_TO_SINT:
9025   case ISD::FP_TO_UINT:
9026     // LowerFP_TO_INT() can only handle f32 and f64.
9027     if (N->getOperand(0).getValueType() == MVT::ppcf128)
9028       return;
9029     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
9030     return;
9031   }
9032 }
9033 
9034 //===----------------------------------------------------------------------===//
9035 //  Other Lowering Code
9036 //===----------------------------------------------------------------------===//
9037 
9038 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
9039   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
9040   Function *Func = Intrinsic::getDeclaration(M, Id);
9041   return Builder.CreateCall(Func, {});
9042 }
9043 
9044 // The mappings for emitLeading/TrailingFence is taken from
9045 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
9046 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
9047                                                  Instruction *Inst,
9048                                                  AtomicOrdering Ord) const {
9049   if (Ord == AtomicOrdering::SequentiallyConsistent)
9050     return callIntrinsic(Builder, Intrinsic::ppc_sync);
9051   if (isReleaseOrStronger(Ord))
9052     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9053   return nullptr;
9054 }
9055 
9056 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
9057                                                   Instruction *Inst,
9058                                                   AtomicOrdering Ord) const {
9059   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
9060     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
9061     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
9062     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
9063     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
9064       return Builder.CreateCall(
9065           Intrinsic::getDeclaration(
9066               Builder.GetInsertBlock()->getParent()->getParent(),
9067               Intrinsic::ppc_cfence, {Inst->getType()}),
9068           {Inst});
9069     // FIXME: Can use isync for rmw operation.
9070     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9071   }
9072   return nullptr;
9073 }
9074 
9075 MachineBasicBlock *
9076 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
9077                                     unsigned AtomicSize,
9078                                     unsigned BinOpcode,
9079                                     unsigned CmpOpcode,
9080                                     unsigned CmpPred) const {
9081   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9082   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9083 
9084   auto LoadMnemonic = PPC::LDARX;
9085   auto StoreMnemonic = PPC::STDCX;
9086   switch (AtomicSize) {
9087   default:
9088     llvm_unreachable("Unexpected size of atomic entity");
9089   case 1:
9090     LoadMnemonic = PPC::LBARX;
9091     StoreMnemonic = PPC::STBCX;
9092     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9093     break;
9094   case 2:
9095     LoadMnemonic = PPC::LHARX;
9096     StoreMnemonic = PPC::STHCX;
9097     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9098     break;
9099   case 4:
9100     LoadMnemonic = PPC::LWARX;
9101     StoreMnemonic = PPC::STWCX;
9102     break;
9103   case 8:
9104     LoadMnemonic = PPC::LDARX;
9105     StoreMnemonic = PPC::STDCX;
9106     break;
9107   }
9108 
9109   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9110   MachineFunction *F = BB->getParent();
9111   MachineFunction::iterator It = ++BB->getIterator();
9112 
9113   unsigned dest = MI.getOperand(0).getReg();
9114   unsigned ptrA = MI.getOperand(1).getReg();
9115   unsigned ptrB = MI.getOperand(2).getReg();
9116   unsigned incr = MI.getOperand(3).getReg();
9117   DebugLoc dl = MI.getDebugLoc();
9118 
9119   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9120   MachineBasicBlock *loop2MBB =
9121     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
9122   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9123   F->insert(It, loopMBB);
9124   if (CmpOpcode)
9125     F->insert(It, loop2MBB);
9126   F->insert(It, exitMBB);
9127   exitMBB->splice(exitMBB->begin(), BB,
9128                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
9129   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9130 
9131   MachineRegisterInfo &RegInfo = F->getRegInfo();
9132   unsigned TmpReg = (!BinOpcode) ? incr :
9133     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
9134                                            : &PPC::GPRCRegClass);
9135 
9136   //  thisMBB:
9137   //   ...
9138   //   fallthrough --> loopMBB
9139   BB->addSuccessor(loopMBB);
9140 
9141   //  loopMBB:
9142   //   l[wd]arx dest, ptr
9143   //   add r0, dest, incr
9144   //   st[wd]cx. r0, ptr
9145   //   bne- loopMBB
9146   //   fallthrough --> exitMBB
9147 
9148   // For max/min...
9149   //  loopMBB:
9150   //   l[wd]arx dest, ptr
9151   //   cmpl?[wd] incr, dest
9152   //   bgt exitMBB
9153   //  loop2MBB:
9154   //   st[wd]cx. dest, ptr
9155   //   bne- loopMBB
9156   //   fallthrough --> exitMBB
9157 
9158   BB = loopMBB;
9159   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9160     .addReg(ptrA).addReg(ptrB);
9161   if (BinOpcode)
9162     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
9163   if (CmpOpcode) {
9164     // Signed comparisons of byte or halfword values must be sign-extended.
9165     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
9166       unsigned ExtReg =  RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9167       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
9168               ExtReg).addReg(dest);
9169       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9170         .addReg(incr).addReg(ExtReg);
9171     } else
9172       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9173         .addReg(incr).addReg(dest);
9174 
9175     BuildMI(BB, dl, TII->get(PPC::BCC))
9176       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
9177     BB->addSuccessor(loop2MBB);
9178     BB->addSuccessor(exitMBB);
9179     BB = loop2MBB;
9180   }
9181   BuildMI(BB, dl, TII->get(StoreMnemonic))
9182     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
9183   BuildMI(BB, dl, TII->get(PPC::BCC))
9184     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
9185   BB->addSuccessor(loopMBB);
9186   BB->addSuccessor(exitMBB);
9187 
9188   //  exitMBB:
9189   //   ...
9190   BB = exitMBB;
9191   return BB;
9192 }
9193 
9194 MachineBasicBlock *
9195 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
9196                                             MachineBasicBlock *BB,
9197                                             bool is8bit, // operation
9198                                             unsigned BinOpcode,
9199                                             unsigned CmpOpcode,
9200                                             unsigned CmpPred) const {
9201   // If we support part-word atomic mnemonics, just use them
9202   if (Subtarget.hasPartwordAtomics())
9203     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
9204                             CmpOpcode, CmpPred);
9205 
9206   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9207   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9208   // In 64 bit mode we have to use 64 bits for addresses, even though the
9209   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
9210   // registers without caring whether they're 32 or 64, but here we're
9211   // doing actual arithmetic on the addresses.
9212   bool is64bit = Subtarget.isPPC64();
9213   bool isLittleEndian = Subtarget.isLittleEndian();
9214   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
9215 
9216   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9217   MachineFunction *F = BB->getParent();
9218   MachineFunction::iterator It = ++BB->getIterator();
9219 
9220   unsigned dest = MI.getOperand(0).getReg();
9221   unsigned ptrA = MI.getOperand(1).getReg();
9222   unsigned ptrB = MI.getOperand(2).getReg();
9223   unsigned incr = MI.getOperand(3).getReg();
9224   DebugLoc dl = MI.getDebugLoc();
9225 
9226   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9227   MachineBasicBlock *loop2MBB =
9228     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
9229   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9230   F->insert(It, loopMBB);
9231   if (CmpOpcode)
9232     F->insert(It, loop2MBB);
9233   F->insert(It, exitMBB);
9234   exitMBB->splice(exitMBB->begin(), BB,
9235                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
9236   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9237 
9238   MachineRegisterInfo &RegInfo = F->getRegInfo();
9239   const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
9240                                           : &PPC::GPRCRegClass;
9241   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
9242   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
9243   unsigned ShiftReg =
9244     isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
9245   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
9246   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
9247   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
9248   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
9249   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
9250   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
9251   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
9252   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
9253   unsigned Ptr1Reg;
9254   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
9255 
9256   //  thisMBB:
9257   //   ...
9258   //   fallthrough --> loopMBB
9259   BB->addSuccessor(loopMBB);
9260 
9261   // The 4-byte load must be aligned, while a char or short may be
9262   // anywhere in the word.  Hence all this nasty bookkeeping code.
9263   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
9264   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
9265   //   xori shift, shift1, 24 [16]
9266   //   rlwinm ptr, ptr1, 0, 0, 29
9267   //   slw incr2, incr, shift
9268   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
9269   //   slw mask, mask2, shift
9270   //  loopMBB:
9271   //   lwarx tmpDest, ptr
9272   //   add tmp, tmpDest, incr2
9273   //   andc tmp2, tmpDest, mask
9274   //   and tmp3, tmp, mask
9275   //   or tmp4, tmp3, tmp2
9276   //   stwcx. tmp4, ptr
9277   //   bne- loopMBB
9278   //   fallthrough --> exitMBB
9279   //   srw dest, tmpDest, shift
9280   if (ptrA != ZeroReg) {
9281     Ptr1Reg = RegInfo.createVirtualRegister(RC);
9282     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
9283       .addReg(ptrA).addReg(ptrB);
9284   } else {
9285     Ptr1Reg = ptrB;
9286   }
9287   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
9288       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
9289   if (!isLittleEndian)
9290     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
9291         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
9292   if (is64bit)
9293     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
9294       .addReg(Ptr1Reg).addImm(0).addImm(61);
9295   else
9296     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
9297       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
9298   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
9299       .addReg(incr).addReg(ShiftReg);
9300   if (is8bit)
9301     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
9302   else {
9303     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
9304     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
9305   }
9306   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
9307       .addReg(Mask2Reg).addReg(ShiftReg);
9308 
9309   BB = loopMBB;
9310   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
9311     .addReg(ZeroReg).addReg(PtrReg);
9312   if (BinOpcode)
9313     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
9314       .addReg(Incr2Reg).addReg(TmpDestReg);
9315   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
9316     .addReg(TmpDestReg).addReg(MaskReg);
9317   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
9318     .addReg(TmpReg).addReg(MaskReg);
9319   if (CmpOpcode) {
9320     // For unsigned comparisons, we can directly compare the shifted values.
9321     // For signed comparisons we shift and sign extend.
9322     unsigned SReg = RegInfo.createVirtualRegister(RC);
9323     BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
9324       .addReg(TmpDestReg).addReg(MaskReg);
9325     unsigned ValueReg = SReg;
9326     unsigned CmpReg = Incr2Reg;
9327     if (CmpOpcode == PPC::CMPW) {
9328       ValueReg = RegInfo.createVirtualRegister(RC);
9329       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
9330         .addReg(SReg).addReg(ShiftReg);
9331       unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
9332       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
9333         .addReg(ValueReg);
9334       ValueReg = ValueSReg;
9335       CmpReg = incr;
9336     }
9337     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9338       .addReg(CmpReg).addReg(ValueReg);
9339     BuildMI(BB, dl, TII->get(PPC::BCC))
9340       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
9341     BB->addSuccessor(loop2MBB);
9342     BB->addSuccessor(exitMBB);
9343     BB = loop2MBB;
9344   }
9345   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
9346     .addReg(Tmp3Reg).addReg(Tmp2Reg);
9347   BuildMI(BB, dl, TII->get(PPC::STWCX))
9348     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
9349   BuildMI(BB, dl, TII->get(PPC::BCC))
9350     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
9351   BB->addSuccessor(loopMBB);
9352   BB->addSuccessor(exitMBB);
9353 
9354   //  exitMBB:
9355   //   ...
9356   BB = exitMBB;
9357   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
9358     .addReg(ShiftReg);
9359   return BB;
9360 }
9361 
9362 llvm::MachineBasicBlock *
9363 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
9364                                     MachineBasicBlock *MBB) const {
9365   DebugLoc DL = MI.getDebugLoc();
9366   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9367   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
9368 
9369   MachineFunction *MF = MBB->getParent();
9370   MachineRegisterInfo &MRI = MF->getRegInfo();
9371 
9372   const BasicBlock *BB = MBB->getBasicBlock();
9373   MachineFunction::iterator I = ++MBB->getIterator();
9374 
9375   // Memory Reference
9376   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
9377   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
9378 
9379   unsigned DstReg = MI.getOperand(0).getReg();
9380   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
9381   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
9382   unsigned mainDstReg = MRI.createVirtualRegister(RC);
9383   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
9384 
9385   MVT PVT = getPointerTy(MF->getDataLayout());
9386   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
9387          "Invalid Pointer Size!");
9388   // For v = setjmp(buf), we generate
9389   //
9390   // thisMBB:
9391   //  SjLjSetup mainMBB
9392   //  bl mainMBB
9393   //  v_restore = 1
9394   //  b sinkMBB
9395   //
9396   // mainMBB:
9397   //  buf[LabelOffset] = LR
9398   //  v_main = 0
9399   //
9400   // sinkMBB:
9401   //  v = phi(main, restore)
9402   //
9403 
9404   MachineBasicBlock *thisMBB = MBB;
9405   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
9406   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
9407   MF->insert(I, mainMBB);
9408   MF->insert(I, sinkMBB);
9409 
9410   MachineInstrBuilder MIB;
9411 
9412   // Transfer the remainder of BB and its successor edges to sinkMBB.
9413   sinkMBB->splice(sinkMBB->begin(), MBB,
9414                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
9415   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
9416 
9417   // Note that the structure of the jmp_buf used here is not compatible
9418   // with that used by libc, and is not designed to be. Specifically, it
9419   // stores only those 'reserved' registers that LLVM does not otherwise
9420   // understand how to spill. Also, by convention, by the time this
9421   // intrinsic is called, Clang has already stored the frame address in the
9422   // first slot of the buffer and stack address in the third. Following the
9423   // X86 target code, we'll store the jump address in the second slot. We also
9424   // need to save the TOC pointer (R2) to handle jumps between shared
9425   // libraries, and that will be stored in the fourth slot. The thread
9426   // identifier (R13) is not affected.
9427 
9428   // thisMBB:
9429   const int64_t LabelOffset = 1 * PVT.getStoreSize();
9430   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
9431   const int64_t BPOffset    = 4 * PVT.getStoreSize();
9432 
9433   // Prepare IP either in reg.
9434   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
9435   unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
9436   unsigned BufReg = MI.getOperand(1).getReg();
9437 
9438   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
9439     setUsesTOCBasePtr(*MBB->getParent());
9440     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
9441             .addReg(PPC::X2)
9442             .addImm(TOCOffset)
9443             .addReg(BufReg);
9444     MIB.setMemRefs(MMOBegin, MMOEnd);
9445   }
9446 
9447   // Naked functions never have a base pointer, and so we use r1. For all
9448   // other functions, this decision must be delayed until during PEI.
9449   unsigned BaseReg;
9450   if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
9451     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
9452   else
9453     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
9454 
9455   MIB = BuildMI(*thisMBB, MI, DL,
9456                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
9457             .addReg(BaseReg)
9458             .addImm(BPOffset)
9459             .addReg(BufReg);
9460   MIB.setMemRefs(MMOBegin, MMOEnd);
9461 
9462   // Setup
9463   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
9464   MIB.addRegMask(TRI->getNoPreservedMask());
9465 
9466   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
9467 
9468   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
9469           .addMBB(mainMBB);
9470   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
9471 
9472   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
9473   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
9474 
9475   // mainMBB:
9476   //  mainDstReg = 0
9477   MIB =
9478       BuildMI(mainMBB, DL,
9479               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
9480 
9481   // Store IP
9482   if (Subtarget.isPPC64()) {
9483     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
9484             .addReg(LabelReg)
9485             .addImm(LabelOffset)
9486             .addReg(BufReg);
9487   } else {
9488     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
9489             .addReg(LabelReg)
9490             .addImm(LabelOffset)
9491             .addReg(BufReg);
9492   }
9493 
9494   MIB.setMemRefs(MMOBegin, MMOEnd);
9495 
9496   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
9497   mainMBB->addSuccessor(sinkMBB);
9498 
9499   // sinkMBB:
9500   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
9501           TII->get(PPC::PHI), DstReg)
9502     .addReg(mainDstReg).addMBB(mainMBB)
9503     .addReg(restoreDstReg).addMBB(thisMBB);
9504 
9505   MI.eraseFromParent();
9506   return sinkMBB;
9507 }
9508 
9509 MachineBasicBlock *
9510 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
9511                                      MachineBasicBlock *MBB) const {
9512   DebugLoc DL = MI.getDebugLoc();
9513   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9514 
9515   MachineFunction *MF = MBB->getParent();
9516   MachineRegisterInfo &MRI = MF->getRegInfo();
9517 
9518   // Memory Reference
9519   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
9520   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
9521 
9522   MVT PVT = getPointerTy(MF->getDataLayout());
9523   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
9524          "Invalid Pointer Size!");
9525 
9526   const TargetRegisterClass *RC =
9527     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
9528   unsigned Tmp = MRI.createVirtualRegister(RC);
9529   // Since FP is only updated here but NOT referenced, it's treated as GPR.
9530   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
9531   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
9532   unsigned BP =
9533       (PVT == MVT::i64)
9534           ? PPC::X30
9535           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
9536                                                               : PPC::R30);
9537 
9538   MachineInstrBuilder MIB;
9539 
9540   const int64_t LabelOffset = 1 * PVT.getStoreSize();
9541   const int64_t SPOffset    = 2 * PVT.getStoreSize();
9542   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
9543   const int64_t BPOffset    = 4 * PVT.getStoreSize();
9544 
9545   unsigned BufReg = MI.getOperand(0).getReg();
9546 
9547   // Reload FP (the jumped-to function may not have had a
9548   // frame pointer, and if so, then its r31 will be restored
9549   // as necessary).
9550   if (PVT == MVT::i64) {
9551     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
9552             .addImm(0)
9553             .addReg(BufReg);
9554   } else {
9555     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
9556             .addImm(0)
9557             .addReg(BufReg);
9558   }
9559   MIB.setMemRefs(MMOBegin, MMOEnd);
9560 
9561   // Reload IP
9562   if (PVT == MVT::i64) {
9563     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
9564             .addImm(LabelOffset)
9565             .addReg(BufReg);
9566   } else {
9567     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
9568             .addImm(LabelOffset)
9569             .addReg(BufReg);
9570   }
9571   MIB.setMemRefs(MMOBegin, MMOEnd);
9572 
9573   // Reload SP
9574   if (PVT == MVT::i64) {
9575     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
9576             .addImm(SPOffset)
9577             .addReg(BufReg);
9578   } else {
9579     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
9580             .addImm(SPOffset)
9581             .addReg(BufReg);
9582   }
9583   MIB.setMemRefs(MMOBegin, MMOEnd);
9584 
9585   // Reload BP
9586   if (PVT == MVT::i64) {
9587     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
9588             .addImm(BPOffset)
9589             .addReg(BufReg);
9590   } else {
9591     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
9592             .addImm(BPOffset)
9593             .addReg(BufReg);
9594   }
9595   MIB.setMemRefs(MMOBegin, MMOEnd);
9596 
9597   // Reload TOC
9598   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
9599     setUsesTOCBasePtr(*MBB->getParent());
9600     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
9601             .addImm(TOCOffset)
9602             .addReg(BufReg);
9603 
9604     MIB.setMemRefs(MMOBegin, MMOEnd);
9605   }
9606 
9607   // Jump
9608   BuildMI(*MBB, MI, DL,
9609           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
9610   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
9611 
9612   MI.eraseFromParent();
9613   return MBB;
9614 }
9615 
9616 MachineBasicBlock *
9617 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
9618                                                MachineBasicBlock *BB) const {
9619   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
9620       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9621     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
9622         MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9623       // Call lowering should have added an r2 operand to indicate a dependence
9624       // on the TOC base pointer value. It can't however, because there is no
9625       // way to mark the dependence as implicit there, and so the stackmap code
9626       // will confuse it with a regular operand. Instead, add the dependence
9627       // here.
9628       setUsesTOCBasePtr(*BB->getParent());
9629       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
9630     }
9631 
9632     return emitPatchPoint(MI, BB);
9633   }
9634 
9635   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
9636       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
9637     return emitEHSjLjSetJmp(MI, BB);
9638   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
9639              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
9640     return emitEHSjLjLongJmp(MI, BB);
9641   }
9642 
9643   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9644 
9645   // To "insert" these instructions we actually have to insert their
9646   // control-flow patterns.
9647   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9648   MachineFunction::iterator It = ++BB->getIterator();
9649 
9650   MachineFunction *F = BB->getParent();
9651 
9652   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9653        MI.getOpcode() == PPC::SELECT_CC_I8 ||
9654        MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
9655     SmallVector<MachineOperand, 2> Cond;
9656     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9657         MI.getOpcode() == PPC::SELECT_CC_I8)
9658       Cond.push_back(MI.getOperand(4));
9659     else
9660       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
9661     Cond.push_back(MI.getOperand(1));
9662 
9663     DebugLoc dl = MI.getDebugLoc();
9664     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
9665                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
9666   } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9667              MI.getOpcode() == PPC::SELECT_CC_I8 ||
9668              MI.getOpcode() == PPC::SELECT_CC_F4 ||
9669              MI.getOpcode() == PPC::SELECT_CC_F8 ||
9670              MI.getOpcode() == PPC::SELECT_CC_QFRC ||
9671              MI.getOpcode() == PPC::SELECT_CC_QSRC ||
9672              MI.getOpcode() == PPC::SELECT_CC_QBRC ||
9673              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
9674              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
9675              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
9676              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
9677              MI.getOpcode() == PPC::SELECT_I4 ||
9678              MI.getOpcode() == PPC::SELECT_I8 ||
9679              MI.getOpcode() == PPC::SELECT_F4 ||
9680              MI.getOpcode() == PPC::SELECT_F8 ||
9681              MI.getOpcode() == PPC::SELECT_QFRC ||
9682              MI.getOpcode() == PPC::SELECT_QSRC ||
9683              MI.getOpcode() == PPC::SELECT_QBRC ||
9684              MI.getOpcode() == PPC::SELECT_VRRC ||
9685              MI.getOpcode() == PPC::SELECT_VSFRC ||
9686              MI.getOpcode() == PPC::SELECT_VSSRC ||
9687              MI.getOpcode() == PPC::SELECT_VSRC) {
9688     // The incoming instruction knows the destination vreg to set, the
9689     // condition code register to branch on, the true/false values to
9690     // select between, and a branch opcode to use.
9691 
9692     //  thisMBB:
9693     //  ...
9694     //   TrueVal = ...
9695     //   cmpTY ccX, r1, r2
9696     //   bCC copy1MBB
9697     //   fallthrough --> copy0MBB
9698     MachineBasicBlock *thisMBB = BB;
9699     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
9700     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9701     DebugLoc dl = MI.getDebugLoc();
9702     F->insert(It, copy0MBB);
9703     F->insert(It, sinkMBB);
9704 
9705     // Transfer the remainder of BB and its successor edges to sinkMBB.
9706     sinkMBB->splice(sinkMBB->begin(), BB,
9707                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9708     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9709 
9710     // Next, add the true and fallthrough blocks as its successors.
9711     BB->addSuccessor(copy0MBB);
9712     BB->addSuccessor(sinkMBB);
9713 
9714     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
9715         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
9716         MI.getOpcode() == PPC::SELECT_QFRC ||
9717         MI.getOpcode() == PPC::SELECT_QSRC ||
9718         MI.getOpcode() == PPC::SELECT_QBRC ||
9719         MI.getOpcode() == PPC::SELECT_VRRC ||
9720         MI.getOpcode() == PPC::SELECT_VSFRC ||
9721         MI.getOpcode() == PPC::SELECT_VSSRC ||
9722         MI.getOpcode() == PPC::SELECT_VSRC) {
9723       BuildMI(BB, dl, TII->get(PPC::BC))
9724           .addReg(MI.getOperand(1).getReg())
9725           .addMBB(sinkMBB);
9726     } else {
9727       unsigned SelectPred = MI.getOperand(4).getImm();
9728       BuildMI(BB, dl, TII->get(PPC::BCC))
9729           .addImm(SelectPred)
9730           .addReg(MI.getOperand(1).getReg())
9731           .addMBB(sinkMBB);
9732     }
9733 
9734     //  copy0MBB:
9735     //   %FalseValue = ...
9736     //   # fallthrough to sinkMBB
9737     BB = copy0MBB;
9738 
9739     // Update machine-CFG edges
9740     BB->addSuccessor(sinkMBB);
9741 
9742     //  sinkMBB:
9743     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9744     //  ...
9745     BB = sinkMBB;
9746     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
9747         .addReg(MI.getOperand(3).getReg())
9748         .addMBB(copy0MBB)
9749         .addReg(MI.getOperand(2).getReg())
9750         .addMBB(thisMBB);
9751   } else if (MI.getOpcode() == PPC::ReadTB) {
9752     // To read the 64-bit time-base register on a 32-bit target, we read the
9753     // two halves. Should the counter have wrapped while it was being read, we
9754     // need to try again.
9755     // ...
9756     // readLoop:
9757     // mfspr Rx,TBU # load from TBU
9758     // mfspr Ry,TB  # load from TB
9759     // mfspr Rz,TBU # load from TBU
9760     // cmpw crX,Rx,Rz # check if 'old'='new'
9761     // bne readLoop   # branch if they're not equal
9762     // ...
9763 
9764     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
9765     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9766     DebugLoc dl = MI.getDebugLoc();
9767     F->insert(It, readMBB);
9768     F->insert(It, sinkMBB);
9769 
9770     // Transfer the remainder of BB and its successor edges to sinkMBB.
9771     sinkMBB->splice(sinkMBB->begin(), BB,
9772                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9773     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9774 
9775     BB->addSuccessor(readMBB);
9776     BB = readMBB;
9777 
9778     MachineRegisterInfo &RegInfo = F->getRegInfo();
9779     unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9780     unsigned LoReg = MI.getOperand(0).getReg();
9781     unsigned HiReg = MI.getOperand(1).getReg();
9782 
9783     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
9784     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
9785     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
9786 
9787     unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9788 
9789     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
9790       .addReg(HiReg).addReg(ReadAgainReg);
9791     BuildMI(BB, dl, TII->get(PPC::BCC))
9792       .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
9793 
9794     BB->addSuccessor(readMBB);
9795     BB->addSuccessor(sinkMBB);
9796   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
9797     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
9798   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
9799     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
9800   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
9801     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
9802   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
9803     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
9804 
9805   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
9806     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
9807   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
9808     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
9809   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
9810     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
9811   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
9812     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
9813 
9814   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
9815     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
9816   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
9817     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
9818   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
9819     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
9820   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
9821     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
9822 
9823   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
9824     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
9825   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
9826     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
9827   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
9828     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
9829   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
9830     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
9831 
9832   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
9833     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
9834   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
9835     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
9836   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
9837     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
9838   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
9839     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
9840 
9841   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
9842     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
9843   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
9844     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
9845   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
9846     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
9847   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
9848     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
9849 
9850   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
9851     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
9852   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
9853     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
9854   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
9855     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
9856   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
9857     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
9858 
9859   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
9860     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
9861   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
9862     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
9863   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
9864     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
9865   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
9866     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
9867 
9868   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
9869     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
9870   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
9871     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
9872   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
9873     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
9874   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
9875     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
9876 
9877   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
9878     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
9879   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
9880     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
9881   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
9882     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
9883   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
9884     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
9885 
9886   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
9887     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
9888   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
9889     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
9890   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
9891     BB = EmitAtomicBinary(MI, BB, 4, 0);
9892   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
9893     BB = EmitAtomicBinary(MI, BB, 8, 0);
9894   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
9895            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
9896            (Subtarget.hasPartwordAtomics() &&
9897             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
9898            (Subtarget.hasPartwordAtomics() &&
9899             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
9900     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
9901 
9902     auto LoadMnemonic = PPC::LDARX;
9903     auto StoreMnemonic = PPC::STDCX;
9904     switch (MI.getOpcode()) {
9905     default:
9906       llvm_unreachable("Compare and swap of unknown size");
9907     case PPC::ATOMIC_CMP_SWAP_I8:
9908       LoadMnemonic = PPC::LBARX;
9909       StoreMnemonic = PPC::STBCX;
9910       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9911       break;
9912     case PPC::ATOMIC_CMP_SWAP_I16:
9913       LoadMnemonic = PPC::LHARX;
9914       StoreMnemonic = PPC::STHCX;
9915       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9916       break;
9917     case PPC::ATOMIC_CMP_SWAP_I32:
9918       LoadMnemonic = PPC::LWARX;
9919       StoreMnemonic = PPC::STWCX;
9920       break;
9921     case PPC::ATOMIC_CMP_SWAP_I64:
9922       LoadMnemonic = PPC::LDARX;
9923       StoreMnemonic = PPC::STDCX;
9924       break;
9925     }
9926     unsigned dest = MI.getOperand(0).getReg();
9927     unsigned ptrA = MI.getOperand(1).getReg();
9928     unsigned ptrB = MI.getOperand(2).getReg();
9929     unsigned oldval = MI.getOperand(3).getReg();
9930     unsigned newval = MI.getOperand(4).getReg();
9931     DebugLoc dl = MI.getDebugLoc();
9932 
9933     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9934     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9935     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9936     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9937     F->insert(It, loop1MBB);
9938     F->insert(It, loop2MBB);
9939     F->insert(It, midMBB);
9940     F->insert(It, exitMBB);
9941     exitMBB->splice(exitMBB->begin(), BB,
9942                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9943     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9944 
9945     //  thisMBB:
9946     //   ...
9947     //   fallthrough --> loopMBB
9948     BB->addSuccessor(loop1MBB);
9949 
9950     // loop1MBB:
9951     //   l[bhwd]arx dest, ptr
9952     //   cmp[wd] dest, oldval
9953     //   bne- midMBB
9954     // loop2MBB:
9955     //   st[bhwd]cx. newval, ptr
9956     //   bne- loopMBB
9957     //   b exitBB
9958     // midMBB:
9959     //   st[bhwd]cx. dest, ptr
9960     // exitBB:
9961     BB = loop1MBB;
9962     BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9963       .addReg(ptrA).addReg(ptrB);
9964     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
9965       .addReg(oldval).addReg(dest);
9966     BuildMI(BB, dl, TII->get(PPC::BCC))
9967       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9968     BB->addSuccessor(loop2MBB);
9969     BB->addSuccessor(midMBB);
9970 
9971     BB = loop2MBB;
9972     BuildMI(BB, dl, TII->get(StoreMnemonic))
9973       .addReg(newval).addReg(ptrA).addReg(ptrB);
9974     BuildMI(BB, dl, TII->get(PPC::BCC))
9975       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9976     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9977     BB->addSuccessor(loop1MBB);
9978     BB->addSuccessor(exitMBB);
9979 
9980     BB = midMBB;
9981     BuildMI(BB, dl, TII->get(StoreMnemonic))
9982       .addReg(dest).addReg(ptrA).addReg(ptrB);
9983     BB->addSuccessor(exitMBB);
9984 
9985     //  exitMBB:
9986     //   ...
9987     BB = exitMBB;
9988   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
9989              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
9990     // We must use 64-bit registers for addresses when targeting 64-bit,
9991     // since we're actually doing arithmetic on them.  Other registers
9992     // can be 32-bit.
9993     bool is64bit = Subtarget.isPPC64();
9994     bool isLittleEndian = Subtarget.isLittleEndian();
9995     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
9996 
9997     unsigned dest = MI.getOperand(0).getReg();
9998     unsigned ptrA = MI.getOperand(1).getReg();
9999     unsigned ptrB = MI.getOperand(2).getReg();
10000     unsigned oldval = MI.getOperand(3).getReg();
10001     unsigned newval = MI.getOperand(4).getReg();
10002     DebugLoc dl = MI.getDebugLoc();
10003 
10004     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
10005     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
10006     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
10007     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10008     F->insert(It, loop1MBB);
10009     F->insert(It, loop2MBB);
10010     F->insert(It, midMBB);
10011     F->insert(It, exitMBB);
10012     exitMBB->splice(exitMBB->begin(), BB,
10013                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
10014     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10015 
10016     MachineRegisterInfo &RegInfo = F->getRegInfo();
10017     const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
10018                                             : &PPC::GPRCRegClass;
10019     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
10020     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
10021     unsigned ShiftReg =
10022       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
10023     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
10024     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
10025     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
10026     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
10027     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
10028     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
10029     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
10030     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
10031     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
10032     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
10033     unsigned Ptr1Reg;
10034     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
10035     unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10036     //  thisMBB:
10037     //   ...
10038     //   fallthrough --> loopMBB
10039     BB->addSuccessor(loop1MBB);
10040 
10041     // The 4-byte load must be aligned, while a char or short may be
10042     // anywhere in the word.  Hence all this nasty bookkeeping code.
10043     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
10044     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10045     //   xori shift, shift1, 24 [16]
10046     //   rlwinm ptr, ptr1, 0, 0, 29
10047     //   slw newval2, newval, shift
10048     //   slw oldval2, oldval,shift
10049     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10050     //   slw mask, mask2, shift
10051     //   and newval3, newval2, mask
10052     //   and oldval3, oldval2, mask
10053     // loop1MBB:
10054     //   lwarx tmpDest, ptr
10055     //   and tmp, tmpDest, mask
10056     //   cmpw tmp, oldval3
10057     //   bne- midMBB
10058     // loop2MBB:
10059     //   andc tmp2, tmpDest, mask
10060     //   or tmp4, tmp2, newval3
10061     //   stwcx. tmp4, ptr
10062     //   bne- loop1MBB
10063     //   b exitBB
10064     // midMBB:
10065     //   stwcx. tmpDest, ptr
10066     // exitBB:
10067     //   srw dest, tmpDest, shift
10068     if (ptrA != ZeroReg) {
10069       Ptr1Reg = RegInfo.createVirtualRegister(RC);
10070       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10071         .addReg(ptrA).addReg(ptrB);
10072     } else {
10073       Ptr1Reg = ptrB;
10074     }
10075     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
10076         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
10077     if (!isLittleEndian)
10078       BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
10079           .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
10080     if (is64bit)
10081       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10082         .addReg(Ptr1Reg).addImm(0).addImm(61);
10083     else
10084       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10085         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
10086     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
10087         .addReg(newval).addReg(ShiftReg);
10088     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
10089         .addReg(oldval).addReg(ShiftReg);
10090     if (is8bit)
10091       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10092     else {
10093       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10094       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10095         .addReg(Mask3Reg).addImm(65535);
10096     }
10097     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10098         .addReg(Mask2Reg).addReg(ShiftReg);
10099     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
10100         .addReg(NewVal2Reg).addReg(MaskReg);
10101     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
10102         .addReg(OldVal2Reg).addReg(MaskReg);
10103 
10104     BB = loop1MBB;
10105     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10106         .addReg(ZeroReg).addReg(PtrReg);
10107     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
10108         .addReg(TmpDestReg).addReg(MaskReg);
10109     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
10110         .addReg(TmpReg).addReg(OldVal3Reg);
10111     BuildMI(BB, dl, TII->get(PPC::BCC))
10112         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
10113     BB->addSuccessor(loop2MBB);
10114     BB->addSuccessor(midMBB);
10115 
10116     BB = loop2MBB;
10117     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
10118         .addReg(TmpDestReg).addReg(MaskReg);
10119     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
10120         .addReg(Tmp2Reg).addReg(NewVal3Reg);
10121     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
10122         .addReg(ZeroReg).addReg(PtrReg);
10123     BuildMI(BB, dl, TII->get(PPC::BCC))
10124       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
10125     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
10126     BB->addSuccessor(loop1MBB);
10127     BB->addSuccessor(exitMBB);
10128 
10129     BB = midMBB;
10130     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
10131       .addReg(ZeroReg).addReg(PtrReg);
10132     BB->addSuccessor(exitMBB);
10133 
10134     //  exitMBB:
10135     //   ...
10136     BB = exitMBB;
10137     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
10138       .addReg(ShiftReg);
10139   } else if (MI.getOpcode() == PPC::FADDrtz) {
10140     // This pseudo performs an FADD with rounding mode temporarily forced
10141     // to round-to-zero.  We emit this via custom inserter since the FPSCR
10142     // is not modeled at the SelectionDAG level.
10143     unsigned Dest = MI.getOperand(0).getReg();
10144     unsigned Src1 = MI.getOperand(1).getReg();
10145     unsigned Src2 = MI.getOperand(2).getReg();
10146     DebugLoc dl = MI.getDebugLoc();
10147 
10148     MachineRegisterInfo &RegInfo = F->getRegInfo();
10149     unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
10150 
10151     // Save FPSCR value.
10152     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
10153 
10154     // Set rounding mode to round-to-zero.
10155     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
10156     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
10157 
10158     // Perform addition.
10159     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
10160 
10161     // Restore FPSCR value.
10162     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
10163   } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10164              MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
10165              MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10166              MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
10167     unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10168                        MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
10169                           ? PPC::ANDIo8
10170                           : PPC::ANDIo;
10171     bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10172                  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
10173 
10174     MachineRegisterInfo &RegInfo = F->getRegInfo();
10175     unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
10176                                                   &PPC::GPRCRegClass :
10177                                                   &PPC::G8RCRegClass);
10178 
10179     DebugLoc dl = MI.getDebugLoc();
10180     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
10181         .addReg(MI.getOperand(1).getReg())
10182         .addImm(1);
10183     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
10184             MI.getOperand(0).getReg())
10185         .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
10186   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
10187     DebugLoc Dl = MI.getDebugLoc();
10188     MachineRegisterInfo &RegInfo = F->getRegInfo();
10189     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10190     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
10191     return BB;
10192   } else {
10193     llvm_unreachable("Unexpected instr type to insert");
10194   }
10195 
10196   MI.eraseFromParent(); // The pseudo instruction is gone now.
10197   return BB;
10198 }
10199 
10200 //===----------------------------------------------------------------------===//
10201 // Target Optimization Hooks
10202 //===----------------------------------------------------------------------===//
10203 
10204 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
10205   // For the estimates, convergence is quadratic, so we essentially double the
10206   // number of digits correct after every iteration. For both FRE and FRSQRTE,
10207   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
10208   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
10209   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
10210   if (VT.getScalarType() == MVT::f64)
10211     RefinementSteps++;
10212   return RefinementSteps;
10213 }
10214 
10215 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
10216                                            int Enabled, int &RefinementSteps,
10217                                            bool &UseOneConstNR,
10218                                            bool Reciprocal) const {
10219   EVT VT = Operand.getValueType();
10220   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
10221       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
10222       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
10223       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
10224       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
10225       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
10226     if (RefinementSteps == ReciprocalEstimate::Unspecified)
10227       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10228 
10229     UseOneConstNR = true;
10230     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
10231   }
10232   return SDValue();
10233 }
10234 
10235 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
10236                                             int Enabled,
10237                                             int &RefinementSteps) const {
10238   EVT VT = Operand.getValueType();
10239   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
10240       (VT == MVT::f64 && Subtarget.hasFRE()) ||
10241       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
10242       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
10243       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
10244       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
10245     if (RefinementSteps == ReciprocalEstimate::Unspecified)
10246       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10247     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
10248   }
10249   return SDValue();
10250 }
10251 
10252 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
10253   // Note: This functionality is used only when unsafe-fp-math is enabled, and
10254   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
10255   // enabled for division), this functionality is redundant with the default
10256   // combiner logic (once the division -> reciprocal/multiply transformation
10257   // has taken place). As a result, this matters more for older cores than for
10258   // newer ones.
10259 
10260   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10261   // reciprocal if there are two or more FDIVs (for embedded cores with only
10262   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
10263   switch (Subtarget.getDarwinDirective()) {
10264   default:
10265     return 3;
10266   case PPC::DIR_440:
10267   case PPC::DIR_A2:
10268   case PPC::DIR_E500mc:
10269   case PPC::DIR_E5500:
10270     return 2;
10271   }
10272 }
10273 
10274 // isConsecutiveLSLoc needs to work even if all adds have not yet been
10275 // collapsed, and so we need to look through chains of them.
10276 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
10277                                      int64_t& Offset, SelectionDAG &DAG) {
10278   if (DAG.isBaseWithConstantOffset(Loc)) {
10279     Base = Loc.getOperand(0);
10280     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
10281 
10282     // The base might itself be a base plus an offset, and if so, accumulate
10283     // that as well.
10284     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
10285   }
10286 }
10287 
10288 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
10289                             unsigned Bytes, int Dist,
10290                             SelectionDAG &DAG) {
10291   if (VT.getSizeInBits() / 8 != Bytes)
10292     return false;
10293 
10294   SDValue BaseLoc = Base->getBasePtr();
10295   if (Loc.getOpcode() == ISD::FrameIndex) {
10296     if (BaseLoc.getOpcode() != ISD::FrameIndex)
10297       return false;
10298     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10299     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
10300     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
10301     int FS  = MFI.getObjectSize(FI);
10302     int BFS = MFI.getObjectSize(BFI);
10303     if (FS != BFS || FS != (int)Bytes) return false;
10304     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
10305   }
10306 
10307   SDValue Base1 = Loc, Base2 = BaseLoc;
10308   int64_t Offset1 = 0, Offset2 = 0;
10309   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
10310   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
10311   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
10312     return true;
10313 
10314   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10315   const GlobalValue *GV1 = nullptr;
10316   const GlobalValue *GV2 = nullptr;
10317   Offset1 = 0;
10318   Offset2 = 0;
10319   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
10320   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
10321   if (isGA1 && isGA2 && GV1 == GV2)
10322     return Offset1 == (Offset2 + Dist*Bytes);
10323   return false;
10324 }
10325 
10326 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
10327 // not enforce equality of the chain operands.
10328 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
10329                             unsigned Bytes, int Dist,
10330                             SelectionDAG &DAG) {
10331   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
10332     EVT VT = LS->getMemoryVT();
10333     SDValue Loc = LS->getBasePtr();
10334     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
10335   }
10336 
10337   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
10338     EVT VT;
10339     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10340     default: return false;
10341     case Intrinsic::ppc_qpx_qvlfd:
10342     case Intrinsic::ppc_qpx_qvlfda:
10343       VT = MVT::v4f64;
10344       break;
10345     case Intrinsic::ppc_qpx_qvlfs:
10346     case Intrinsic::ppc_qpx_qvlfsa:
10347       VT = MVT::v4f32;
10348       break;
10349     case Intrinsic::ppc_qpx_qvlfcd:
10350     case Intrinsic::ppc_qpx_qvlfcda:
10351       VT = MVT::v2f64;
10352       break;
10353     case Intrinsic::ppc_qpx_qvlfcs:
10354     case Intrinsic::ppc_qpx_qvlfcsa:
10355       VT = MVT::v2f32;
10356       break;
10357     case Intrinsic::ppc_qpx_qvlfiwa:
10358     case Intrinsic::ppc_qpx_qvlfiwz:
10359     case Intrinsic::ppc_altivec_lvx:
10360     case Intrinsic::ppc_altivec_lvxl:
10361     case Intrinsic::ppc_vsx_lxvw4x:
10362     case Intrinsic::ppc_vsx_lxvw4x_be:
10363       VT = MVT::v4i32;
10364       break;
10365     case Intrinsic::ppc_vsx_lxvd2x:
10366     case Intrinsic::ppc_vsx_lxvd2x_be:
10367       VT = MVT::v2f64;
10368       break;
10369     case Intrinsic::ppc_altivec_lvebx:
10370       VT = MVT::i8;
10371       break;
10372     case Intrinsic::ppc_altivec_lvehx:
10373       VT = MVT::i16;
10374       break;
10375     case Intrinsic::ppc_altivec_lvewx:
10376       VT = MVT::i32;
10377       break;
10378     }
10379 
10380     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
10381   }
10382 
10383   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
10384     EVT VT;
10385     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10386     default: return false;
10387     case Intrinsic::ppc_qpx_qvstfd:
10388     case Intrinsic::ppc_qpx_qvstfda:
10389       VT = MVT::v4f64;
10390       break;
10391     case Intrinsic::ppc_qpx_qvstfs:
10392     case Intrinsic::ppc_qpx_qvstfsa:
10393       VT = MVT::v4f32;
10394       break;
10395     case Intrinsic::ppc_qpx_qvstfcd:
10396     case Intrinsic::ppc_qpx_qvstfcda:
10397       VT = MVT::v2f64;
10398       break;
10399     case Intrinsic::ppc_qpx_qvstfcs:
10400     case Intrinsic::ppc_qpx_qvstfcsa:
10401       VT = MVT::v2f32;
10402       break;
10403     case Intrinsic::ppc_qpx_qvstfiw:
10404     case Intrinsic::ppc_qpx_qvstfiwa:
10405     case Intrinsic::ppc_altivec_stvx:
10406     case Intrinsic::ppc_altivec_stvxl:
10407     case Intrinsic::ppc_vsx_stxvw4x:
10408       VT = MVT::v4i32;
10409       break;
10410     case Intrinsic::ppc_vsx_stxvd2x:
10411       VT = MVT::v2f64;
10412       break;
10413     case Intrinsic::ppc_vsx_stxvw4x_be:
10414       VT = MVT::v4i32;
10415       break;
10416     case Intrinsic::ppc_vsx_stxvd2x_be:
10417       VT = MVT::v2f64;
10418       break;
10419     case Intrinsic::ppc_altivec_stvebx:
10420       VT = MVT::i8;
10421       break;
10422     case Intrinsic::ppc_altivec_stvehx:
10423       VT = MVT::i16;
10424       break;
10425     case Intrinsic::ppc_altivec_stvewx:
10426       VT = MVT::i32;
10427       break;
10428     }
10429 
10430     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
10431   }
10432 
10433   return false;
10434 }
10435 
10436 // Return true is there is a nearyby consecutive load to the one provided
10437 // (regardless of alignment). We search up and down the chain, looking though
10438 // token factors and other loads (but nothing else). As a result, a true result
10439 // indicates that it is safe to create a new consecutive load adjacent to the
10440 // load provided.
10441 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
10442   SDValue Chain = LD->getChain();
10443   EVT VT = LD->getMemoryVT();
10444 
10445   SmallSet<SDNode *, 16> LoadRoots;
10446   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
10447   SmallSet<SDNode *, 16> Visited;
10448 
10449   // First, search up the chain, branching to follow all token-factor operands.
10450   // If we find a consecutive load, then we're done, otherwise, record all
10451   // nodes just above the top-level loads and token factors.
10452   while (!Queue.empty()) {
10453     SDNode *ChainNext = Queue.pop_back_val();
10454     if (!Visited.insert(ChainNext).second)
10455       continue;
10456 
10457     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
10458       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
10459         return true;
10460 
10461       if (!Visited.count(ChainLD->getChain().getNode()))
10462         Queue.push_back(ChainLD->getChain().getNode());
10463     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
10464       for (const SDUse &O : ChainNext->ops())
10465         if (!Visited.count(O.getNode()))
10466           Queue.push_back(O.getNode());
10467     } else
10468       LoadRoots.insert(ChainNext);
10469   }
10470 
10471   // Second, search down the chain, starting from the top-level nodes recorded
10472   // in the first phase. These top-level nodes are the nodes just above all
10473   // loads and token factors. Starting with their uses, recursively look though
10474   // all loads (just the chain uses) and token factors to find a consecutive
10475   // load.
10476   Visited.clear();
10477   Queue.clear();
10478 
10479   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
10480        IE = LoadRoots.end(); I != IE; ++I) {
10481     Queue.push_back(*I);
10482 
10483     while (!Queue.empty()) {
10484       SDNode *LoadRoot = Queue.pop_back_val();
10485       if (!Visited.insert(LoadRoot).second)
10486         continue;
10487 
10488       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
10489         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
10490           return true;
10491 
10492       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
10493            UE = LoadRoot->use_end(); UI != UE; ++UI)
10494         if (((isa<MemSDNode>(*UI) &&
10495             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
10496             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
10497           Queue.push_back(*UI);
10498     }
10499   }
10500 
10501   return false;
10502 }
10503 
10504 /// This function is called when we have proved that a SETCC node can be replaced
10505 /// by subtraction (and other supporting instructions) so that the result of
10506 /// comparison is kept in a GPR instead of CR. This function is purely for
10507 /// codegen purposes and has some flags to guide the codegen process.
10508 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
10509                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
10510   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10511 
10512   // Zero extend the operands to the largest legal integer. Originally, they
10513   // must be of a strictly smaller size.
10514   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
10515                          DAG.getConstant(Size, DL, MVT::i32));
10516   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
10517                          DAG.getConstant(Size, DL, MVT::i32));
10518 
10519   // Swap if needed. Depends on the condition code.
10520   if (Swap)
10521     std::swap(Op0, Op1);
10522 
10523   // Subtract extended integers.
10524   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
10525 
10526   // Move the sign bit to the least significant position and zero out the rest.
10527   // Now the least significant bit carries the result of original comparison.
10528   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
10529                              DAG.getConstant(Size - 1, DL, MVT::i32));
10530   auto Final = Shifted;
10531 
10532   // Complement the result if needed. Based on the condition code.
10533   if (Complement)
10534     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
10535                         DAG.getConstant(1, DL, MVT::i64));
10536 
10537   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
10538 }
10539 
10540 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
10541                                                   DAGCombinerInfo &DCI) const {
10542   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10543 
10544   SelectionDAG &DAG = DCI.DAG;
10545   SDLoc DL(N);
10546 
10547   // Size of integers being compared has a critical role in the following
10548   // analysis, so we prefer to do this when all types are legal.
10549   if (!DCI.isAfterLegalizeVectorOps())
10550     return SDValue();
10551 
10552   // If all users of SETCC extend its value to a legal integer type
10553   // then we replace SETCC with a subtraction
10554   for (SDNode::use_iterator UI = N->use_begin(),
10555        UE = N->use_end(); UI != UE; ++UI) {
10556     if (UI->getOpcode() != ISD::ZERO_EXTEND)
10557       return SDValue();
10558   }
10559 
10560   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
10561   auto OpSize = N->getOperand(0).getValueSizeInBits();
10562 
10563   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
10564 
10565   if (OpSize < Size) {
10566     switch (CC) {
10567     default: break;
10568     case ISD::SETULT:
10569       return generateEquivalentSub(N, Size, false, false, DL, DAG);
10570     case ISD::SETULE:
10571       return generateEquivalentSub(N, Size, true, true, DL, DAG);
10572     case ISD::SETUGT:
10573       return generateEquivalentSub(N, Size, false, true, DL, DAG);
10574     case ISD::SETUGE:
10575       return generateEquivalentSub(N, Size, true, false, DL, DAG);
10576     }
10577   }
10578 
10579   return SDValue();
10580 }
10581 
10582 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
10583                                                   DAGCombinerInfo &DCI) const {
10584   SelectionDAG &DAG = DCI.DAG;
10585   SDLoc dl(N);
10586 
10587   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
10588   // If we're tracking CR bits, we need to be careful that we don't have:
10589   //   trunc(binary-ops(zext(x), zext(y)))
10590   // or
10591   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
10592   // such that we're unnecessarily moving things into GPRs when it would be
10593   // better to keep them in CR bits.
10594 
10595   // Note that trunc here can be an actual i1 trunc, or can be the effective
10596   // truncation that comes from a setcc or select_cc.
10597   if (N->getOpcode() == ISD::TRUNCATE &&
10598       N->getValueType(0) != MVT::i1)
10599     return SDValue();
10600 
10601   if (N->getOperand(0).getValueType() != MVT::i32 &&
10602       N->getOperand(0).getValueType() != MVT::i64)
10603     return SDValue();
10604 
10605   if (N->getOpcode() == ISD::SETCC ||
10606       N->getOpcode() == ISD::SELECT_CC) {
10607     // If we're looking at a comparison, then we need to make sure that the
10608     // high bits (all except for the first) don't matter the result.
10609     ISD::CondCode CC =
10610       cast<CondCodeSDNode>(N->getOperand(
10611         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
10612     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
10613 
10614     if (ISD::isSignedIntSetCC(CC)) {
10615       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
10616           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
10617         return SDValue();
10618     } else if (ISD::isUnsignedIntSetCC(CC)) {
10619       if (!DAG.MaskedValueIsZero(N->getOperand(0),
10620                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
10621           !DAG.MaskedValueIsZero(N->getOperand(1),
10622                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
10623         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
10624                                              : SDValue());
10625     } else {
10626       // This is neither a signed nor an unsigned comparison, just make sure
10627       // that the high bits are equal.
10628       KnownBits Op1Known, Op2Known;
10629       DAG.computeKnownBits(N->getOperand(0), Op1Known);
10630       DAG.computeKnownBits(N->getOperand(1), Op2Known);
10631 
10632       // We don't really care about what is known about the first bit (if
10633       // anything), so clear it in all masks prior to comparing them.
10634       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
10635       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
10636 
10637       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
10638         return SDValue();
10639     }
10640   }
10641 
10642   // We now know that the higher-order bits are irrelevant, we just need to
10643   // make sure that all of the intermediate operations are bit operations, and
10644   // all inputs are extensions.
10645   if (N->getOperand(0).getOpcode() != ISD::AND &&
10646       N->getOperand(0).getOpcode() != ISD::OR  &&
10647       N->getOperand(0).getOpcode() != ISD::XOR &&
10648       N->getOperand(0).getOpcode() != ISD::SELECT &&
10649       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
10650       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
10651       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
10652       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
10653       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
10654     return SDValue();
10655 
10656   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
10657       N->getOperand(1).getOpcode() != ISD::AND &&
10658       N->getOperand(1).getOpcode() != ISD::OR  &&
10659       N->getOperand(1).getOpcode() != ISD::XOR &&
10660       N->getOperand(1).getOpcode() != ISD::SELECT &&
10661       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
10662       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
10663       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
10664       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
10665       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
10666     return SDValue();
10667 
10668   SmallVector<SDValue, 4> Inputs;
10669   SmallVector<SDValue, 8> BinOps, PromOps;
10670   SmallPtrSet<SDNode *, 16> Visited;
10671 
10672   for (unsigned i = 0; i < 2; ++i) {
10673     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10674           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10675           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10676           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10677         isa<ConstantSDNode>(N->getOperand(i)))
10678       Inputs.push_back(N->getOperand(i));
10679     else
10680       BinOps.push_back(N->getOperand(i));
10681 
10682     if (N->getOpcode() == ISD::TRUNCATE)
10683       break;
10684   }
10685 
10686   // Visit all inputs, collect all binary operations (and, or, xor and
10687   // select) that are all fed by extensions.
10688   while (!BinOps.empty()) {
10689     SDValue BinOp = BinOps.back();
10690     BinOps.pop_back();
10691 
10692     if (!Visited.insert(BinOp.getNode()).second)
10693       continue;
10694 
10695     PromOps.push_back(BinOp);
10696 
10697     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10698       // The condition of the select is not promoted.
10699       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10700         continue;
10701       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10702         continue;
10703 
10704       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10705             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10706             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10707            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10708           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10709         Inputs.push_back(BinOp.getOperand(i));
10710       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10711                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10712                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10713                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10714                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
10715                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10716                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10717                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10718                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
10719         BinOps.push_back(BinOp.getOperand(i));
10720       } else {
10721         // We have an input that is not an extension or another binary
10722         // operation; we'll abort this transformation.
10723         return SDValue();
10724       }
10725     }
10726   }
10727 
10728   // Make sure that this is a self-contained cluster of operations (which
10729   // is not quite the same thing as saying that everything has only one
10730   // use).
10731   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10732     if (isa<ConstantSDNode>(Inputs[i]))
10733       continue;
10734 
10735     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10736                               UE = Inputs[i].getNode()->use_end();
10737          UI != UE; ++UI) {
10738       SDNode *User = *UI;
10739       if (User != N && !Visited.count(User))
10740         return SDValue();
10741 
10742       // Make sure that we're not going to promote the non-output-value
10743       // operand(s) or SELECT or SELECT_CC.
10744       // FIXME: Although we could sometimes handle this, and it does occur in
10745       // practice that one of the condition inputs to the select is also one of
10746       // the outputs, we currently can't deal with this.
10747       if (User->getOpcode() == ISD::SELECT) {
10748         if (User->getOperand(0) == Inputs[i])
10749           return SDValue();
10750       } else if (User->getOpcode() == ISD::SELECT_CC) {
10751         if (User->getOperand(0) == Inputs[i] ||
10752             User->getOperand(1) == Inputs[i])
10753           return SDValue();
10754       }
10755     }
10756   }
10757 
10758   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10759     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10760                               UE = PromOps[i].getNode()->use_end();
10761          UI != UE; ++UI) {
10762       SDNode *User = *UI;
10763       if (User != N && !Visited.count(User))
10764         return SDValue();
10765 
10766       // Make sure that we're not going to promote the non-output-value
10767       // operand(s) or SELECT or SELECT_CC.
10768       // FIXME: Although we could sometimes handle this, and it does occur in
10769       // practice that one of the condition inputs to the select is also one of
10770       // the outputs, we currently can't deal with this.
10771       if (User->getOpcode() == ISD::SELECT) {
10772         if (User->getOperand(0) == PromOps[i])
10773           return SDValue();
10774       } else if (User->getOpcode() == ISD::SELECT_CC) {
10775         if (User->getOperand(0) == PromOps[i] ||
10776             User->getOperand(1) == PromOps[i])
10777           return SDValue();
10778       }
10779     }
10780   }
10781 
10782   // Replace all inputs with the extension operand.
10783   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10784     // Constants may have users outside the cluster of to-be-promoted nodes,
10785     // and so we need to replace those as we do the promotions.
10786     if (isa<ConstantSDNode>(Inputs[i]))
10787       continue;
10788     else
10789       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
10790   }
10791 
10792   std::list<HandleSDNode> PromOpHandles;
10793   for (auto &PromOp : PromOps)
10794     PromOpHandles.emplace_back(PromOp);
10795 
10796   // Replace all operations (these are all the same, but have a different
10797   // (i1) return type). DAG.getNode will validate that the types of
10798   // a binary operator match, so go through the list in reverse so that
10799   // we've likely promoted both operands first. Any intermediate truncations or
10800   // extensions disappear.
10801   while (!PromOpHandles.empty()) {
10802     SDValue PromOp = PromOpHandles.back().getValue();
10803     PromOpHandles.pop_back();
10804 
10805     if (PromOp.getOpcode() == ISD::TRUNCATE ||
10806         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
10807         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
10808         PromOp.getOpcode() == ISD::ANY_EXTEND) {
10809       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
10810           PromOp.getOperand(0).getValueType() != MVT::i1) {
10811         // The operand is not yet ready (see comment below).
10812         PromOpHandles.emplace_front(PromOp);
10813         continue;
10814       }
10815 
10816       SDValue RepValue = PromOp.getOperand(0);
10817       if (isa<ConstantSDNode>(RepValue))
10818         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
10819 
10820       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
10821       continue;
10822     }
10823 
10824     unsigned C;
10825     switch (PromOp.getOpcode()) {
10826     default:             C = 0; break;
10827     case ISD::SELECT:    C = 1; break;
10828     case ISD::SELECT_CC: C = 2; break;
10829     }
10830 
10831     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10832          PromOp.getOperand(C).getValueType() != MVT::i1) ||
10833         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10834          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
10835       // The to-be-promoted operands of this node have not yet been
10836       // promoted (this should be rare because we're going through the
10837       // list backward, but if one of the operands has several users in
10838       // this cluster of to-be-promoted nodes, it is possible).
10839       PromOpHandles.emplace_front(PromOp);
10840       continue;
10841     }
10842 
10843     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10844                                 PromOp.getNode()->op_end());
10845 
10846     // If there are any constant inputs, make sure they're replaced now.
10847     for (unsigned i = 0; i < 2; ++i)
10848       if (isa<ConstantSDNode>(Ops[C+i]))
10849         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
10850 
10851     DAG.ReplaceAllUsesOfValueWith(PromOp,
10852       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
10853   }
10854 
10855   // Now we're left with the initial truncation itself.
10856   if (N->getOpcode() == ISD::TRUNCATE)
10857     return N->getOperand(0);
10858 
10859   // Otherwise, this is a comparison. The operands to be compared have just
10860   // changed type (to i1), but everything else is the same.
10861   return SDValue(N, 0);
10862 }
10863 
10864 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
10865                                                   DAGCombinerInfo &DCI) const {
10866   SelectionDAG &DAG = DCI.DAG;
10867   SDLoc dl(N);
10868 
10869   // If we're tracking CR bits, we need to be careful that we don't have:
10870   //   zext(binary-ops(trunc(x), trunc(y)))
10871   // or
10872   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
10873   // such that we're unnecessarily moving things into CR bits that can more
10874   // efficiently stay in GPRs. Note that if we're not certain that the high
10875   // bits are set as required by the final extension, we still may need to do
10876   // some masking to get the proper behavior.
10877 
10878   // This same functionality is important on PPC64 when dealing with
10879   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
10880   // the return values of functions. Because it is so similar, it is handled
10881   // here as well.
10882 
10883   if (N->getValueType(0) != MVT::i32 &&
10884       N->getValueType(0) != MVT::i64)
10885     return SDValue();
10886 
10887   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
10888         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
10889     return SDValue();
10890 
10891   if (N->getOperand(0).getOpcode() != ISD::AND &&
10892       N->getOperand(0).getOpcode() != ISD::OR  &&
10893       N->getOperand(0).getOpcode() != ISD::XOR &&
10894       N->getOperand(0).getOpcode() != ISD::SELECT &&
10895       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
10896     return SDValue();
10897 
10898   SmallVector<SDValue, 4> Inputs;
10899   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
10900   SmallPtrSet<SDNode *, 16> Visited;
10901 
10902   // Visit all inputs, collect all binary operations (and, or, xor and
10903   // select) that are all fed by truncations.
10904   while (!BinOps.empty()) {
10905     SDValue BinOp = BinOps.back();
10906     BinOps.pop_back();
10907 
10908     if (!Visited.insert(BinOp.getNode()).second)
10909       continue;
10910 
10911     PromOps.push_back(BinOp);
10912 
10913     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10914       // The condition of the select is not promoted.
10915       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10916         continue;
10917       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10918         continue;
10919 
10920       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10921           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10922         Inputs.push_back(BinOp.getOperand(i));
10923       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10924                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10925                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10926                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10927                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
10928         BinOps.push_back(BinOp.getOperand(i));
10929       } else {
10930         // We have an input that is not a truncation or another binary
10931         // operation; we'll abort this transformation.
10932         return SDValue();
10933       }
10934     }
10935   }
10936 
10937   // The operands of a select that must be truncated when the select is
10938   // promoted because the operand is actually part of the to-be-promoted set.
10939   DenseMap<SDNode *, EVT> SelectTruncOp[2];
10940 
10941   // Make sure that this is a self-contained cluster of operations (which
10942   // is not quite the same thing as saying that everything has only one
10943   // use).
10944   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10945     if (isa<ConstantSDNode>(Inputs[i]))
10946       continue;
10947 
10948     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10949                               UE = Inputs[i].getNode()->use_end();
10950          UI != UE; ++UI) {
10951       SDNode *User = *UI;
10952       if (User != N && !Visited.count(User))
10953         return SDValue();
10954 
10955       // If we're going to promote the non-output-value operand(s) or SELECT or
10956       // SELECT_CC, record them for truncation.
10957       if (User->getOpcode() == ISD::SELECT) {
10958         if (User->getOperand(0) == Inputs[i])
10959           SelectTruncOp[0].insert(std::make_pair(User,
10960                                     User->getOperand(0).getValueType()));
10961       } else if (User->getOpcode() == ISD::SELECT_CC) {
10962         if (User->getOperand(0) == Inputs[i])
10963           SelectTruncOp[0].insert(std::make_pair(User,
10964                                     User->getOperand(0).getValueType()));
10965         if (User->getOperand(1) == Inputs[i])
10966           SelectTruncOp[1].insert(std::make_pair(User,
10967                                     User->getOperand(1).getValueType()));
10968       }
10969     }
10970   }
10971 
10972   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10973     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10974                               UE = PromOps[i].getNode()->use_end();
10975          UI != UE; ++UI) {
10976       SDNode *User = *UI;
10977       if (User != N && !Visited.count(User))
10978         return SDValue();
10979 
10980       // If we're going to promote the non-output-value operand(s) or SELECT or
10981       // SELECT_CC, record them for truncation.
10982       if (User->getOpcode() == ISD::SELECT) {
10983         if (User->getOperand(0) == PromOps[i])
10984           SelectTruncOp[0].insert(std::make_pair(User,
10985                                     User->getOperand(0).getValueType()));
10986       } else if (User->getOpcode() == ISD::SELECT_CC) {
10987         if (User->getOperand(0) == PromOps[i])
10988           SelectTruncOp[0].insert(std::make_pair(User,
10989                                     User->getOperand(0).getValueType()));
10990         if (User->getOperand(1) == PromOps[i])
10991           SelectTruncOp[1].insert(std::make_pair(User,
10992                                     User->getOperand(1).getValueType()));
10993       }
10994     }
10995   }
10996 
10997   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
10998   bool ReallyNeedsExt = false;
10999   if (N->getOpcode() != ISD::ANY_EXTEND) {
11000     // If all of the inputs are not already sign/zero extended, then
11001     // we'll still need to do that at the end.
11002     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11003       if (isa<ConstantSDNode>(Inputs[i]))
11004         continue;
11005 
11006       unsigned OpBits =
11007         Inputs[i].getOperand(0).getValueSizeInBits();
11008       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
11009 
11010       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
11011            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
11012                                   APInt::getHighBitsSet(OpBits,
11013                                                         OpBits-PromBits))) ||
11014           (N->getOpcode() == ISD::SIGN_EXTEND &&
11015            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
11016              (OpBits-(PromBits-1)))) {
11017         ReallyNeedsExt = true;
11018         break;
11019       }
11020     }
11021   }
11022 
11023   // Replace all inputs, either with the truncation operand, or a
11024   // truncation or extension to the final output type.
11025   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11026     // Constant inputs need to be replaced with the to-be-promoted nodes that
11027     // use them because they might have users outside of the cluster of
11028     // promoted nodes.
11029     if (isa<ConstantSDNode>(Inputs[i]))
11030       continue;
11031 
11032     SDValue InSrc = Inputs[i].getOperand(0);
11033     if (Inputs[i].getValueType() == N->getValueType(0))
11034       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
11035     else if (N->getOpcode() == ISD::SIGN_EXTEND)
11036       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11037         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
11038     else if (N->getOpcode() == ISD::ZERO_EXTEND)
11039       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11040         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
11041     else
11042       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11043         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
11044   }
11045 
11046   std::list<HandleSDNode> PromOpHandles;
11047   for (auto &PromOp : PromOps)
11048     PromOpHandles.emplace_back(PromOp);
11049 
11050   // Replace all operations (these are all the same, but have a different
11051   // (promoted) return type). DAG.getNode will validate that the types of
11052   // a binary operator match, so go through the list in reverse so that
11053   // we've likely promoted both operands first.
11054   while (!PromOpHandles.empty()) {
11055     SDValue PromOp = PromOpHandles.back().getValue();
11056     PromOpHandles.pop_back();
11057 
11058     unsigned C;
11059     switch (PromOp.getOpcode()) {
11060     default:             C = 0; break;
11061     case ISD::SELECT:    C = 1; break;
11062     case ISD::SELECT_CC: C = 2; break;
11063     }
11064 
11065     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
11066          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
11067         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
11068          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
11069       // The to-be-promoted operands of this node have not yet been
11070       // promoted (this should be rare because we're going through the
11071       // list backward, but if one of the operands has several users in
11072       // this cluster of to-be-promoted nodes, it is possible).
11073       PromOpHandles.emplace_front(PromOp);
11074       continue;
11075     }
11076 
11077     // For SELECT and SELECT_CC nodes, we do a similar check for any
11078     // to-be-promoted comparison inputs.
11079     if (PromOp.getOpcode() == ISD::SELECT ||
11080         PromOp.getOpcode() == ISD::SELECT_CC) {
11081       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
11082            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
11083           (SelectTruncOp[1].count(PromOp.getNode()) &&
11084            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
11085         PromOpHandles.emplace_front(PromOp);
11086         continue;
11087       }
11088     }
11089 
11090     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
11091                                 PromOp.getNode()->op_end());
11092 
11093     // If this node has constant inputs, then they'll need to be promoted here.
11094     for (unsigned i = 0; i < 2; ++i) {
11095       if (!isa<ConstantSDNode>(Ops[C+i]))
11096         continue;
11097       if (Ops[C+i].getValueType() == N->getValueType(0))
11098         continue;
11099 
11100       if (N->getOpcode() == ISD::SIGN_EXTEND)
11101         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11102       else if (N->getOpcode() == ISD::ZERO_EXTEND)
11103         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11104       else
11105         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11106     }
11107 
11108     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
11109     // truncate them again to the original value type.
11110     if (PromOp.getOpcode() == ISD::SELECT ||
11111         PromOp.getOpcode() == ISD::SELECT_CC) {
11112       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
11113       if (SI0 != SelectTruncOp[0].end())
11114         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
11115       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
11116       if (SI1 != SelectTruncOp[1].end())
11117         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
11118     }
11119 
11120     DAG.ReplaceAllUsesOfValueWith(PromOp,
11121       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
11122   }
11123 
11124   // Now we're left with the initial extension itself.
11125   if (!ReallyNeedsExt)
11126     return N->getOperand(0);
11127 
11128   // To zero extend, just mask off everything except for the first bit (in the
11129   // i1 case).
11130   if (N->getOpcode() == ISD::ZERO_EXTEND)
11131     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
11132                        DAG.getConstant(APInt::getLowBitsSet(
11133                                          N->getValueSizeInBits(0), PromBits),
11134                                        dl, N->getValueType(0)));
11135 
11136   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
11137          "Invalid extension type");
11138   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
11139   SDValue ShiftCst =
11140       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
11141   return DAG.getNode(
11142       ISD::SRA, dl, N->getValueType(0),
11143       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
11144       ShiftCst);
11145 }
11146 
11147 /// \brief Reduces the number of fp-to-int conversion when building a vector.
11148 ///
11149 /// If this vector is built out of floating to integer conversions,
11150 /// transform it to a vector built out of floating point values followed by a
11151 /// single floating to integer conversion of the vector.
11152 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
11153 /// becomes (fptosi (build_vector ($A, $B, ...)))
11154 SDValue PPCTargetLowering::
11155 combineElementTruncationToVectorTruncation(SDNode *N,
11156                                            DAGCombinerInfo &DCI) const {
11157   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11158          "Should be called with a BUILD_VECTOR node");
11159 
11160   SelectionDAG &DAG = DCI.DAG;
11161   SDLoc dl(N);
11162 
11163   SDValue FirstInput = N->getOperand(0);
11164   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
11165          "The input operand must be an fp-to-int conversion.");
11166 
11167   // This combine happens after legalization so the fp_to_[su]i nodes are
11168   // already converted to PPCSISD nodes.
11169   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
11170   if (FirstConversion == PPCISD::FCTIDZ ||
11171       FirstConversion == PPCISD::FCTIDUZ ||
11172       FirstConversion == PPCISD::FCTIWZ ||
11173       FirstConversion == PPCISD::FCTIWUZ) {
11174     bool IsSplat = true;
11175     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
11176       FirstConversion == PPCISD::FCTIWUZ;
11177     EVT SrcVT = FirstInput.getOperand(0).getValueType();
11178     SmallVector<SDValue, 4> Ops;
11179     EVT TargetVT = N->getValueType(0);
11180     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
11181       if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
11182         return SDValue();
11183       unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
11184       if (NextConversion != FirstConversion)
11185         return SDValue();
11186       if (N->getOperand(i) != FirstInput)
11187         IsSplat = false;
11188     }
11189 
11190     // If this is a splat, we leave it as-is since there will be only a single
11191     // fp-to-int conversion followed by a splat of the integer. This is better
11192     // for 32-bit and smaller ints and neutral for 64-bit ints.
11193     if (IsSplat)
11194       return SDValue();
11195 
11196     // Now that we know we have the right type of node, get its operands
11197     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
11198       SDValue In = N->getOperand(i).getOperand(0);
11199       // For 32-bit values, we need to add an FP_ROUND node.
11200       if (Is32Bit) {
11201         if (In.isUndef())
11202           Ops.push_back(DAG.getUNDEF(SrcVT));
11203         else {
11204           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
11205                                       MVT::f32, In.getOperand(0),
11206                                       DAG.getIntPtrConstant(1, dl));
11207           Ops.push_back(Trunc);
11208         }
11209       } else
11210         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
11211     }
11212 
11213     unsigned Opcode;
11214     if (FirstConversion == PPCISD::FCTIDZ ||
11215         FirstConversion == PPCISD::FCTIWZ)
11216       Opcode = ISD::FP_TO_SINT;
11217     else
11218       Opcode = ISD::FP_TO_UINT;
11219 
11220     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
11221     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
11222     return DAG.getNode(Opcode, dl, TargetVT, BV);
11223   }
11224   return SDValue();
11225 }
11226 
11227 /// \brief Reduce the number of loads when building a vector.
11228 ///
11229 /// Building a vector out of multiple loads can be converted to a load
11230 /// of the vector type if the loads are consecutive. If the loads are
11231 /// consecutive but in descending order, a shuffle is added at the end
11232 /// to reorder the vector.
11233 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
11234   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11235          "Should be called with a BUILD_VECTOR node");
11236 
11237   SDLoc dl(N);
11238   bool InputsAreConsecutiveLoads = true;
11239   bool InputsAreReverseConsecutive = true;
11240   unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
11241   SDValue FirstInput = N->getOperand(0);
11242   bool IsRoundOfExtLoad = false;
11243 
11244   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
11245       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
11246     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
11247     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
11248   }
11249   // Not a build vector of (possibly fp_rounded) loads.
11250   if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD)
11251     return SDValue();
11252 
11253   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
11254     // If any inputs are fp_round(extload), they all must be.
11255     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
11256       return SDValue();
11257 
11258     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
11259       N->getOperand(i);
11260     if (NextInput.getOpcode() != ISD::LOAD)
11261       return SDValue();
11262 
11263     SDValue PreviousInput =
11264       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
11265     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
11266     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
11267 
11268     // If any inputs are fp_round(extload), they all must be.
11269     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
11270       return SDValue();
11271 
11272     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
11273       InputsAreConsecutiveLoads = false;
11274     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
11275       InputsAreReverseConsecutive = false;
11276 
11277     // Exit early if the loads are neither consecutive nor reverse consecutive.
11278     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
11279       return SDValue();
11280   }
11281 
11282   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
11283          "The loads cannot be both consecutive and reverse consecutive.");
11284 
11285   SDValue FirstLoadOp =
11286     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
11287   SDValue LastLoadOp =
11288     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
11289                        N->getOperand(N->getNumOperands()-1);
11290 
11291   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
11292   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
11293   if (InputsAreConsecutiveLoads) {
11294     assert(LD1 && "Input needs to be a LoadSDNode.");
11295     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
11296                        LD1->getBasePtr(), LD1->getPointerInfo(),
11297                        LD1->getAlignment());
11298   }
11299   if (InputsAreReverseConsecutive) {
11300     assert(LDL && "Input needs to be a LoadSDNode.");
11301     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
11302                                LDL->getBasePtr(), LDL->getPointerInfo(),
11303                                LDL->getAlignment());
11304     SmallVector<int, 16> Ops;
11305     for (int i = N->getNumOperands() - 1; i >= 0; i--)
11306       Ops.push_back(i);
11307 
11308     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
11309                                 DAG.getUNDEF(N->getValueType(0)), Ops);
11310   }
11311   return SDValue();
11312 }
11313 
11314 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
11315                                                  DAGCombinerInfo &DCI) const {
11316   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11317          "Should be called with a BUILD_VECTOR node");
11318 
11319   SelectionDAG &DAG = DCI.DAG;
11320   SDLoc dl(N);
11321 
11322   if (!Subtarget.hasVSX())
11323     return SDValue();
11324 
11325   // The target independent DAG combiner will leave a build_vector of
11326   // float-to-int conversions intact. We can generate MUCH better code for
11327   // a float-to-int conversion of a vector of floats.
11328   SDValue FirstInput = N->getOperand(0);
11329   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
11330     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
11331     if (Reduced)
11332       return Reduced;
11333   }
11334 
11335   // If we're building a vector out of consecutive loads, just load that
11336   // vector type.
11337   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
11338   if (Reduced)
11339     return Reduced;
11340 
11341   if (N->getValueType(0) != MVT::v2f64)
11342     return SDValue();
11343 
11344   // Looking for:
11345   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
11346   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
11347       FirstInput.getOpcode() != ISD::UINT_TO_FP)
11348     return SDValue();
11349   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
11350       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
11351     return SDValue();
11352   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
11353     return SDValue();
11354 
11355   SDValue Ext1 = FirstInput.getOperand(0);
11356   SDValue Ext2 = N->getOperand(1).getOperand(0);
11357   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
11358      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11359     return SDValue();
11360 
11361   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
11362   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
11363   if (!Ext1Op || !Ext2Op)
11364     return SDValue();
11365   if (Ext1.getValueType() != MVT::i32 ||
11366       Ext2.getValueType() != MVT::i32)
11367   if (Ext1.getOperand(0) != Ext2.getOperand(0))
11368     return SDValue();
11369 
11370   int FirstElem = Ext1Op->getZExtValue();
11371   int SecondElem = Ext2Op->getZExtValue();
11372   int SubvecIdx;
11373   if (FirstElem == 0 && SecondElem == 1)
11374     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
11375   else if (FirstElem == 2 && SecondElem == 3)
11376     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
11377   else
11378     return SDValue();
11379 
11380   SDValue SrcVec = Ext1.getOperand(0);
11381   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
11382     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
11383   return DAG.getNode(NodeType, dl, MVT::v2f64,
11384                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
11385 }
11386 
11387 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
11388                                               DAGCombinerInfo &DCI) const {
11389   assert((N->getOpcode() == ISD::SINT_TO_FP ||
11390           N->getOpcode() == ISD::UINT_TO_FP) &&
11391          "Need an int -> FP conversion node here");
11392 
11393   if (useSoftFloat() || !Subtarget.has64BitSupport())
11394     return SDValue();
11395 
11396   SelectionDAG &DAG = DCI.DAG;
11397   SDLoc dl(N);
11398   SDValue Op(N, 0);
11399 
11400   SDValue FirstOperand(Op.getOperand(0));
11401   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
11402     (FirstOperand.getValueType() == MVT::i8 ||
11403      FirstOperand.getValueType() == MVT::i16);
11404   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
11405     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
11406     bool DstDouble = Op.getValueType() == MVT::f64;
11407     unsigned ConvOp = Signed ?
11408       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
11409       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
11410     SDValue WidthConst =
11411       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
11412                             dl, false);
11413     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
11414     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
11415     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
11416                                          DAG.getVTList(MVT::f64, MVT::Other),
11417                                          Ops, MVT::i8, LDN->getMemOperand());
11418 
11419     // For signed conversion, we need to sign-extend the value in the VSR
11420     if (Signed) {
11421       SDValue ExtOps[] = { Ld, WidthConst };
11422       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
11423       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
11424     } else
11425       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
11426   }
11427 
11428   // Don't handle ppc_fp128 here or i1 conversions.
11429   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
11430     return SDValue();
11431   if (Op.getOperand(0).getValueType() == MVT::i1)
11432     return SDValue();
11433 
11434   // For i32 intermediate values, unfortunately, the conversion functions
11435   // leave the upper 32 bits of the value are undefined. Within the set of
11436   // scalar instructions, we have no method for zero- or sign-extending the
11437   // value. Thus, we cannot handle i32 intermediate values here.
11438   if (Op.getOperand(0).getValueType() == MVT::i32)
11439     return SDValue();
11440 
11441   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
11442          "UINT_TO_FP is supported only with FPCVT");
11443 
11444   // If we have FCFIDS, then use it when converting to single-precision.
11445   // Otherwise, convert to double-precision and then round.
11446   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
11447                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
11448                                                             : PPCISD::FCFIDS)
11449                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
11450                                                             : PPCISD::FCFID);
11451   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
11452                   ? MVT::f32
11453                   : MVT::f64;
11454 
11455   // If we're converting from a float, to an int, and back to a float again,
11456   // then we don't need the store/load pair at all.
11457   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
11458        Subtarget.hasFPCVT()) ||
11459       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
11460     SDValue Src = Op.getOperand(0).getOperand(0);
11461     if (Src.getValueType() == MVT::f32) {
11462       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
11463       DCI.AddToWorklist(Src.getNode());
11464     } else if (Src.getValueType() != MVT::f64) {
11465       // Make sure that we don't pick up a ppc_fp128 source value.
11466       return SDValue();
11467     }
11468 
11469     unsigned FCTOp =
11470       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
11471                                                         PPCISD::FCTIDUZ;
11472 
11473     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
11474     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
11475 
11476     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
11477       FP = DAG.getNode(ISD::FP_ROUND, dl,
11478                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
11479       DCI.AddToWorklist(FP.getNode());
11480     }
11481 
11482     return FP;
11483   }
11484 
11485   return SDValue();
11486 }
11487 
11488 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
11489 // builtins) into loads with swaps.
11490 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
11491                                               DAGCombinerInfo &DCI) const {
11492   SelectionDAG &DAG = DCI.DAG;
11493   SDLoc dl(N);
11494   SDValue Chain;
11495   SDValue Base;
11496   MachineMemOperand *MMO;
11497 
11498   switch (N->getOpcode()) {
11499   default:
11500     llvm_unreachable("Unexpected opcode for little endian VSX load");
11501   case ISD::LOAD: {
11502     LoadSDNode *LD = cast<LoadSDNode>(N);
11503     Chain = LD->getChain();
11504     Base = LD->getBasePtr();
11505     MMO = LD->getMemOperand();
11506     // If the MMO suggests this isn't a load of a full vector, leave
11507     // things alone.  For a built-in, we have to make the change for
11508     // correctness, so if there is a size problem that will be a bug.
11509     if (MMO->getSize() < 16)
11510       return SDValue();
11511     break;
11512   }
11513   case ISD::INTRINSIC_W_CHAIN: {
11514     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11515     Chain = Intrin->getChain();
11516     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
11517     // us what we want. Get operand 2 instead.
11518     Base = Intrin->getOperand(2);
11519     MMO = Intrin->getMemOperand();
11520     break;
11521   }
11522   }
11523 
11524   MVT VecTy = N->getValueType(0).getSimpleVT();
11525 
11526   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
11527   // aligned and the type is a vector with elements up to 4 bytes
11528   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
11529       && VecTy.getScalarSizeInBits() <= 32 ) {
11530     return SDValue();
11531   }
11532 
11533   SDValue LoadOps[] = { Chain, Base };
11534   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
11535                                          DAG.getVTList(MVT::v2f64, MVT::Other),
11536                                          LoadOps, MVT::v2f64, MMO);
11537 
11538   DCI.AddToWorklist(Load.getNode());
11539   Chain = Load.getValue(1);
11540   SDValue Swap = DAG.getNode(
11541       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
11542   DCI.AddToWorklist(Swap.getNode());
11543 
11544   // Add a bitcast if the resulting load type doesn't match v2f64.
11545   if (VecTy != MVT::v2f64) {
11546     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
11547     DCI.AddToWorklist(N.getNode());
11548     // Package {bitcast value, swap's chain} to match Load's shape.
11549     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
11550                        N, Swap.getValue(1));
11551   }
11552 
11553   return Swap;
11554 }
11555 
11556 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
11557 // builtins) into stores with swaps.
11558 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
11559                                                DAGCombinerInfo &DCI) const {
11560   SelectionDAG &DAG = DCI.DAG;
11561   SDLoc dl(N);
11562   SDValue Chain;
11563   SDValue Base;
11564   unsigned SrcOpnd;
11565   MachineMemOperand *MMO;
11566 
11567   switch (N->getOpcode()) {
11568   default:
11569     llvm_unreachable("Unexpected opcode for little endian VSX store");
11570   case ISD::STORE: {
11571     StoreSDNode *ST = cast<StoreSDNode>(N);
11572     Chain = ST->getChain();
11573     Base = ST->getBasePtr();
11574     MMO = ST->getMemOperand();
11575     SrcOpnd = 1;
11576     // If the MMO suggests this isn't a store of a full vector, leave
11577     // things alone.  For a built-in, we have to make the change for
11578     // correctness, so if there is a size problem that will be a bug.
11579     if (MMO->getSize() < 16)
11580       return SDValue();
11581     break;
11582   }
11583   case ISD::INTRINSIC_VOID: {
11584     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11585     Chain = Intrin->getChain();
11586     // Intrin->getBasePtr() oddly does not get what we want.
11587     Base = Intrin->getOperand(3);
11588     MMO = Intrin->getMemOperand();
11589     SrcOpnd = 2;
11590     break;
11591   }
11592   }
11593 
11594   SDValue Src = N->getOperand(SrcOpnd);
11595   MVT VecTy = Src.getValueType().getSimpleVT();
11596 
11597   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
11598   // aligned and the type is a vector with elements up to 4 bytes
11599   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
11600       && VecTy.getScalarSizeInBits() <= 32 ) {
11601     return SDValue();
11602   }
11603 
11604   // All stores are done as v2f64 and possible bit cast.
11605   if (VecTy != MVT::v2f64) {
11606     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
11607     DCI.AddToWorklist(Src.getNode());
11608   }
11609 
11610   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
11611                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
11612   DCI.AddToWorklist(Swap.getNode());
11613   Chain = Swap.getValue(1);
11614   SDValue StoreOps[] = { Chain, Swap, Base };
11615   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
11616                                           DAG.getVTList(MVT::Other),
11617                                           StoreOps, VecTy, MMO);
11618   DCI.AddToWorklist(Store.getNode());
11619   return Store;
11620 }
11621 
11622 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
11623                                              DAGCombinerInfo &DCI) const {
11624   SelectionDAG &DAG = DCI.DAG;
11625   SDLoc dl(N);
11626   switch (N->getOpcode()) {
11627   default: break;
11628   case ISD::SHL:
11629     return combineSHL(N, DCI);
11630   case ISD::SRA:
11631     return combineSRA(N, DCI);
11632   case ISD::SRL:
11633     return combineSRL(N, DCI);
11634   case PPCISD::SHL:
11635     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
11636         return N->getOperand(0);
11637     break;
11638   case PPCISD::SRL:
11639     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
11640         return N->getOperand(0);
11641     break;
11642   case PPCISD::SRA:
11643     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
11644       if (C->isNullValue() ||   //  0 >>s V -> 0.
11645           C->isAllOnesValue())    // -1 >>s V -> -1.
11646         return N->getOperand(0);
11647     }
11648     break;
11649   case ISD::SIGN_EXTEND:
11650   case ISD::ZERO_EXTEND:
11651   case ISD::ANY_EXTEND:
11652     return DAGCombineExtBoolTrunc(N, DCI);
11653   case ISD::TRUNCATE:
11654   case ISD::SETCC:
11655   case ISD::SELECT_CC:
11656     return DAGCombineTruncBoolExt(N, DCI);
11657   case ISD::SINT_TO_FP:
11658   case ISD::UINT_TO_FP:
11659     return combineFPToIntToFP(N, DCI);
11660   case ISD::STORE: {
11661     EVT Op1VT = N->getOperand(1).getValueType();
11662     bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
11663       (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
11664 
11665     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
11666     if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
11667         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
11668         ValidTypeForStoreFltAsInt &&
11669         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
11670       SDValue Val = N->getOperand(1).getOperand(0);
11671       if (Val.getValueType() == MVT::f32) {
11672         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
11673         DCI.AddToWorklist(Val.getNode());
11674       }
11675       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
11676       DCI.AddToWorklist(Val.getNode());
11677 
11678       if (Op1VT == MVT::i32) {
11679         SDValue Ops[] = {
11680           N->getOperand(0), Val, N->getOperand(2),
11681           DAG.getValueType(N->getOperand(1).getValueType())
11682         };
11683 
11684         Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
11685                 DAG.getVTList(MVT::Other), Ops,
11686                 cast<StoreSDNode>(N)->getMemoryVT(),
11687                 cast<StoreSDNode>(N)->getMemOperand());
11688       } else {
11689         unsigned WidthInBytes =
11690           N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
11691         SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
11692 
11693         SDValue Ops[] = {
11694           N->getOperand(0), Val, N->getOperand(2), WidthConst,
11695           DAG.getValueType(N->getOperand(1).getValueType())
11696         };
11697         Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
11698                                       DAG.getVTList(MVT::Other), Ops,
11699                                       cast<StoreSDNode>(N)->getMemoryVT(),
11700                                       cast<StoreSDNode>(N)->getMemOperand());
11701       }
11702 
11703       DCI.AddToWorklist(Val.getNode());
11704       return Val;
11705     }
11706 
11707     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
11708     if (cast<StoreSDNode>(N)->isUnindexed() &&
11709         N->getOperand(1).getOpcode() == ISD::BSWAP &&
11710         N->getOperand(1).getNode()->hasOneUse() &&
11711         (N->getOperand(1).getValueType() == MVT::i32 ||
11712          N->getOperand(1).getValueType() == MVT::i16 ||
11713          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
11714           N->getOperand(1).getValueType() == MVT::i64))) {
11715       SDValue BSwapOp = N->getOperand(1).getOperand(0);
11716       // Do an any-extend to 32-bits if this is a half-word input.
11717       if (BSwapOp.getValueType() == MVT::i16)
11718         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
11719 
11720       // If the type of BSWAP operand is wider than stored memory width
11721       // it need to be shifted to the right side before STBRX.
11722       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
11723       if (Op1VT.bitsGT(mVT)) {
11724         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
11725         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
11726                               DAG.getConstant(Shift, dl, MVT::i32));
11727         // Need to truncate if this is a bswap of i64 stored as i32/i16.
11728         if (Op1VT == MVT::i64)
11729           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
11730       }
11731 
11732       SDValue Ops[] = {
11733         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
11734       };
11735       return
11736         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
11737                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
11738                                 cast<StoreSDNode>(N)->getMemOperand());
11739     }
11740 
11741     // For little endian, VSX stores require generating xxswapd/lxvd2x.
11742     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
11743     EVT VT = N->getOperand(1).getValueType();
11744     if (VT.isSimple()) {
11745       MVT StoreVT = VT.getSimpleVT();
11746       if (Subtarget.needsSwapsForVSXMemOps() &&
11747           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
11748            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
11749         return expandVSXStoreForLE(N, DCI);
11750     }
11751     break;
11752   }
11753   case ISD::LOAD: {
11754     LoadSDNode *LD = cast<LoadSDNode>(N);
11755     EVT VT = LD->getValueType(0);
11756 
11757     // For little endian, VSX loads require generating lxvd2x/xxswapd.
11758     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
11759     if (VT.isSimple()) {
11760       MVT LoadVT = VT.getSimpleVT();
11761       if (Subtarget.needsSwapsForVSXMemOps() &&
11762           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
11763            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
11764         return expandVSXLoadForLE(N, DCI);
11765     }
11766 
11767     // We sometimes end up with a 64-bit integer load, from which we extract
11768     // two single-precision floating-point numbers. This happens with
11769     // std::complex<float>, and other similar structures, because of the way we
11770     // canonicalize structure copies. However, if we lack direct moves,
11771     // then the final bitcasts from the extracted integer values to the
11772     // floating-point numbers turn into store/load pairs. Even with direct moves,
11773     // just loading the two floating-point numbers is likely better.
11774     auto ReplaceTwoFloatLoad = [&]() {
11775       if (VT != MVT::i64)
11776         return false;
11777 
11778       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
11779           LD->isVolatile())
11780         return false;
11781 
11782       //  We're looking for a sequence like this:
11783       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
11784       //      t16: i64 = srl t13, Constant:i32<32>
11785       //    t17: i32 = truncate t16
11786       //  t18: f32 = bitcast t17
11787       //    t19: i32 = truncate t13
11788       //  t20: f32 = bitcast t19
11789 
11790       if (!LD->hasNUsesOfValue(2, 0))
11791         return false;
11792 
11793       auto UI = LD->use_begin();
11794       while (UI.getUse().getResNo() != 0) ++UI;
11795       SDNode *Trunc = *UI++;
11796       while (UI.getUse().getResNo() != 0) ++UI;
11797       SDNode *RightShift = *UI;
11798       if (Trunc->getOpcode() != ISD::TRUNCATE)
11799         std::swap(Trunc, RightShift);
11800 
11801       if (Trunc->getOpcode() != ISD::TRUNCATE ||
11802           Trunc->getValueType(0) != MVT::i32 ||
11803           !Trunc->hasOneUse())
11804         return false;
11805       if (RightShift->getOpcode() != ISD::SRL ||
11806           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
11807           RightShift->getConstantOperandVal(1) != 32 ||
11808           !RightShift->hasOneUse())
11809         return false;
11810 
11811       SDNode *Trunc2 = *RightShift->use_begin();
11812       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
11813           Trunc2->getValueType(0) != MVT::i32 ||
11814           !Trunc2->hasOneUse())
11815         return false;
11816 
11817       SDNode *Bitcast = *Trunc->use_begin();
11818       SDNode *Bitcast2 = *Trunc2->use_begin();
11819 
11820       if (Bitcast->getOpcode() != ISD::BITCAST ||
11821           Bitcast->getValueType(0) != MVT::f32)
11822         return false;
11823       if (Bitcast2->getOpcode() != ISD::BITCAST ||
11824           Bitcast2->getValueType(0) != MVT::f32)
11825         return false;
11826 
11827       if (Subtarget.isLittleEndian())
11828         std::swap(Bitcast, Bitcast2);
11829 
11830       // Bitcast has the second float (in memory-layout order) and Bitcast2
11831       // has the first one.
11832 
11833       SDValue BasePtr = LD->getBasePtr();
11834       if (LD->isIndexed()) {
11835         assert(LD->getAddressingMode() == ISD::PRE_INC &&
11836                "Non-pre-inc AM on PPC?");
11837         BasePtr =
11838           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11839                       LD->getOffset());
11840       }
11841 
11842       auto MMOFlags =
11843           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
11844       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
11845                                       LD->getPointerInfo(), LD->getAlignment(),
11846                                       MMOFlags, LD->getAAInfo());
11847       SDValue AddPtr =
11848         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
11849                     BasePtr, DAG.getIntPtrConstant(4, dl));
11850       SDValue FloatLoad2 = DAG.getLoad(
11851           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
11852           LD->getPointerInfo().getWithOffset(4),
11853           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
11854 
11855       if (LD->isIndexed()) {
11856         // Note that DAGCombine should re-form any pre-increment load(s) from
11857         // what is produced here if that makes sense.
11858         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
11859       }
11860 
11861       DCI.CombineTo(Bitcast2, FloatLoad);
11862       DCI.CombineTo(Bitcast, FloatLoad2);
11863 
11864       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
11865                                     SDValue(FloatLoad2.getNode(), 1));
11866       return true;
11867     };
11868 
11869     if (ReplaceTwoFloatLoad())
11870       return SDValue(N, 0);
11871 
11872     EVT MemVT = LD->getMemoryVT();
11873     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
11874     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
11875     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
11876     unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
11877     if (LD->isUnindexed() && VT.isVector() &&
11878         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
11879           // P8 and later hardware should just use LOAD.
11880           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
11881                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
11882          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
11883           LD->getAlignment() >= ScalarABIAlignment)) &&
11884         LD->getAlignment() < ABIAlignment) {
11885       // This is a type-legal unaligned Altivec or QPX load.
11886       SDValue Chain = LD->getChain();
11887       SDValue Ptr = LD->getBasePtr();
11888       bool isLittleEndian = Subtarget.isLittleEndian();
11889 
11890       // This implements the loading of unaligned vectors as described in
11891       // the venerable Apple Velocity Engine overview. Specifically:
11892       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
11893       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
11894       //
11895       // The general idea is to expand a sequence of one or more unaligned
11896       // loads into an alignment-based permutation-control instruction (lvsl
11897       // or lvsr), a series of regular vector loads (which always truncate
11898       // their input address to an aligned address), and a series of
11899       // permutations.  The results of these permutations are the requested
11900       // loaded values.  The trick is that the last "extra" load is not taken
11901       // from the address you might suspect (sizeof(vector) bytes after the
11902       // last requested load), but rather sizeof(vector) - 1 bytes after the
11903       // last requested vector. The point of this is to avoid a page fault if
11904       // the base address happened to be aligned. This works because if the
11905       // base address is aligned, then adding less than a full vector length
11906       // will cause the last vector in the sequence to be (re)loaded.
11907       // Otherwise, the next vector will be fetched as you might suspect was
11908       // necessary.
11909 
11910       // We might be able to reuse the permutation generation from
11911       // a different base address offset from this one by an aligned amount.
11912       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
11913       // optimization later.
11914       Intrinsic::ID Intr, IntrLD, IntrPerm;
11915       MVT PermCntlTy, PermTy, LDTy;
11916       if (Subtarget.hasAltivec()) {
11917         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
11918                                  Intrinsic::ppc_altivec_lvsl;
11919         IntrLD = Intrinsic::ppc_altivec_lvx;
11920         IntrPerm = Intrinsic::ppc_altivec_vperm;
11921         PermCntlTy = MVT::v16i8;
11922         PermTy = MVT::v4i32;
11923         LDTy = MVT::v4i32;
11924       } else {
11925         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
11926                                        Intrinsic::ppc_qpx_qvlpcls;
11927         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
11928                                        Intrinsic::ppc_qpx_qvlfs;
11929         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
11930         PermCntlTy = MVT::v4f64;
11931         PermTy = MVT::v4f64;
11932         LDTy = MemVT.getSimpleVT();
11933       }
11934 
11935       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
11936 
11937       // Create the new MMO for the new base load. It is like the original MMO,
11938       // but represents an area in memory almost twice the vector size centered
11939       // on the original address. If the address is unaligned, we might start
11940       // reading up to (sizeof(vector)-1) bytes below the address of the
11941       // original unaligned load.
11942       MachineFunction &MF = DAG.getMachineFunction();
11943       MachineMemOperand *BaseMMO =
11944         MF.getMachineMemOperand(LD->getMemOperand(),
11945                                 -(long)MemVT.getStoreSize()+1,
11946                                 2*MemVT.getStoreSize()-1);
11947 
11948       // Create the new base load.
11949       SDValue LDXIntID =
11950           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
11951       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
11952       SDValue BaseLoad =
11953         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
11954                                 DAG.getVTList(PermTy, MVT::Other),
11955                                 BaseLoadOps, LDTy, BaseMMO);
11956 
11957       // Note that the value of IncOffset (which is provided to the next
11958       // load's pointer info offset value, and thus used to calculate the
11959       // alignment), and the value of IncValue (which is actually used to
11960       // increment the pointer value) are different! This is because we
11961       // require the next load to appear to be aligned, even though it
11962       // is actually offset from the base pointer by a lesser amount.
11963       int IncOffset = VT.getSizeInBits() / 8;
11964       int IncValue = IncOffset;
11965 
11966       // Walk (both up and down) the chain looking for another load at the real
11967       // (aligned) offset (the alignment of the other load does not matter in
11968       // this case). If found, then do not use the offset reduction trick, as
11969       // that will prevent the loads from being later combined (as they would
11970       // otherwise be duplicates).
11971       if (!findConsecutiveLoad(LD, DAG))
11972         --IncValue;
11973 
11974       SDValue Increment =
11975           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
11976       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
11977 
11978       MachineMemOperand *ExtraMMO =
11979         MF.getMachineMemOperand(LD->getMemOperand(),
11980                                 1, 2*MemVT.getStoreSize()-1);
11981       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
11982       SDValue ExtraLoad =
11983         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
11984                                 DAG.getVTList(PermTy, MVT::Other),
11985                                 ExtraLoadOps, LDTy, ExtraMMO);
11986 
11987       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
11988         BaseLoad.getValue(1), ExtraLoad.getValue(1));
11989 
11990       // Because vperm has a big-endian bias, we must reverse the order
11991       // of the input vectors and complement the permute control vector
11992       // when generating little endian code.  We have already handled the
11993       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
11994       // and ExtraLoad here.
11995       SDValue Perm;
11996       if (isLittleEndian)
11997         Perm = BuildIntrinsicOp(IntrPerm,
11998                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
11999       else
12000         Perm = BuildIntrinsicOp(IntrPerm,
12001                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
12002 
12003       if (VT != PermTy)
12004         Perm = Subtarget.hasAltivec() ?
12005                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
12006                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
12007                                DAG.getTargetConstant(1, dl, MVT::i64));
12008                                // second argument is 1 because this rounding
12009                                // is always exact.
12010 
12011       // The output of the permutation is our loaded result, the TokenFactor is
12012       // our new chain.
12013       DCI.CombineTo(N, Perm, TF);
12014       return SDValue(N, 0);
12015     }
12016     }
12017     break;
12018     case ISD::INTRINSIC_WO_CHAIN: {
12019       bool isLittleEndian = Subtarget.isLittleEndian();
12020       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
12021       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
12022                                            : Intrinsic::ppc_altivec_lvsl);
12023       if ((IID == Intr ||
12024            IID == Intrinsic::ppc_qpx_qvlpcld  ||
12025            IID == Intrinsic::ppc_qpx_qvlpcls) &&
12026         N->getOperand(1)->getOpcode() == ISD::ADD) {
12027         SDValue Add = N->getOperand(1);
12028 
12029         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
12030                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
12031 
12032         if (DAG.MaskedValueIsZero(Add->getOperand(1),
12033                                   APInt::getAllOnesValue(Bits /* alignment */)
12034                                       .zext(Add.getScalarValueSizeInBits()))) {
12035           SDNode *BasePtr = Add->getOperand(0).getNode();
12036           for (SDNode::use_iterator UI = BasePtr->use_begin(),
12037                                     UE = BasePtr->use_end();
12038                UI != UE; ++UI) {
12039             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12040                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
12041               // We've found another LVSL/LVSR, and this address is an aligned
12042               // multiple of that one. The results will be the same, so use the
12043               // one we've just found instead.
12044 
12045               return SDValue(*UI, 0);
12046             }
12047           }
12048         }
12049 
12050         if (isa<ConstantSDNode>(Add->getOperand(1))) {
12051           SDNode *BasePtr = Add->getOperand(0).getNode();
12052           for (SDNode::use_iterator UI = BasePtr->use_begin(),
12053                UE = BasePtr->use_end(); UI != UE; ++UI) {
12054             if (UI->getOpcode() == ISD::ADD &&
12055                 isa<ConstantSDNode>(UI->getOperand(1)) &&
12056                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
12057                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
12058                 (1ULL << Bits) == 0) {
12059               SDNode *OtherAdd = *UI;
12060               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
12061                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
12062                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12063                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
12064                   return SDValue(*VI, 0);
12065                 }
12066               }
12067             }
12068           }
12069         }
12070       }
12071     }
12072 
12073     break;
12074   case ISD::INTRINSIC_W_CHAIN:
12075     // For little endian, VSX loads require generating lxvd2x/xxswapd.
12076     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
12077     if (Subtarget.needsSwapsForVSXMemOps()) {
12078       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12079       default:
12080         break;
12081       case Intrinsic::ppc_vsx_lxvw4x:
12082       case Intrinsic::ppc_vsx_lxvd2x:
12083         return expandVSXLoadForLE(N, DCI);
12084       }
12085     }
12086     break;
12087   case ISD::INTRINSIC_VOID:
12088     // For little endian, VSX stores require generating xxswapd/stxvd2x.
12089     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
12090     if (Subtarget.needsSwapsForVSXMemOps()) {
12091       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12092       default:
12093         break;
12094       case Intrinsic::ppc_vsx_stxvw4x:
12095       case Intrinsic::ppc_vsx_stxvd2x:
12096         return expandVSXStoreForLE(N, DCI);
12097       }
12098     }
12099     break;
12100   case ISD::BSWAP:
12101     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
12102     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
12103         N->getOperand(0).hasOneUse() &&
12104         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
12105          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
12106           N->getValueType(0) == MVT::i64))) {
12107       SDValue Load = N->getOperand(0);
12108       LoadSDNode *LD = cast<LoadSDNode>(Load);
12109       // Create the byte-swapping load.
12110       SDValue Ops[] = {
12111         LD->getChain(),    // Chain
12112         LD->getBasePtr(),  // Ptr
12113         DAG.getValueType(N->getValueType(0)) // VT
12114       };
12115       SDValue BSLoad =
12116         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
12117                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
12118                                               MVT::i64 : MVT::i32, MVT::Other),
12119                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
12120 
12121       // If this is an i16 load, insert the truncate.
12122       SDValue ResVal = BSLoad;
12123       if (N->getValueType(0) == MVT::i16)
12124         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
12125 
12126       // First, combine the bswap away.  This makes the value produced by the
12127       // load dead.
12128       DCI.CombineTo(N, ResVal);
12129 
12130       // Next, combine the load away, we give it a bogus result value but a real
12131       // chain result.  The result value is dead because the bswap is dead.
12132       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
12133 
12134       // Return N so it doesn't get rechecked!
12135       return SDValue(N, 0);
12136     }
12137     break;
12138   case PPCISD::VCMP:
12139     // If a VCMPo node already exists with exactly the same operands as this
12140     // node, use its result instead of this node (VCMPo computes both a CR6 and
12141     // a normal output).
12142     //
12143     if (!N->getOperand(0).hasOneUse() &&
12144         !N->getOperand(1).hasOneUse() &&
12145         !N->getOperand(2).hasOneUse()) {
12146 
12147       // Scan all of the users of the LHS, looking for VCMPo's that match.
12148       SDNode *VCMPoNode = nullptr;
12149 
12150       SDNode *LHSN = N->getOperand(0).getNode();
12151       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
12152            UI != E; ++UI)
12153         if (UI->getOpcode() == PPCISD::VCMPo &&
12154             UI->getOperand(1) == N->getOperand(1) &&
12155             UI->getOperand(2) == N->getOperand(2) &&
12156             UI->getOperand(0) == N->getOperand(0)) {
12157           VCMPoNode = *UI;
12158           break;
12159         }
12160 
12161       // If there is no VCMPo node, or if the flag value has a single use, don't
12162       // transform this.
12163       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
12164         break;
12165 
12166       // Look at the (necessarily single) use of the flag value.  If it has a
12167       // chain, this transformation is more complex.  Note that multiple things
12168       // could use the value result, which we should ignore.
12169       SDNode *FlagUser = nullptr;
12170       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
12171            FlagUser == nullptr; ++UI) {
12172         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
12173         SDNode *User = *UI;
12174         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
12175           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
12176             FlagUser = User;
12177             break;
12178           }
12179         }
12180       }
12181 
12182       // If the user is a MFOCRF instruction, we know this is safe.
12183       // Otherwise we give up for right now.
12184       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
12185         return SDValue(VCMPoNode, 0);
12186     }
12187     break;
12188   case ISD::BRCOND: {
12189     SDValue Cond = N->getOperand(1);
12190     SDValue Target = N->getOperand(2);
12191 
12192     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12193         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
12194           Intrinsic::ppc_is_decremented_ctr_nonzero) {
12195 
12196       // We now need to make the intrinsic dead (it cannot be instruction
12197       // selected).
12198       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
12199       assert(Cond.getNode()->hasOneUse() &&
12200              "Counter decrement has more than one use");
12201 
12202       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
12203                          N->getOperand(0), Target);
12204     }
12205   }
12206   break;
12207   case ISD::BR_CC: {
12208     // If this is a branch on an altivec predicate comparison, lower this so
12209     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
12210     // lowering is done pre-legalize, because the legalizer lowers the predicate
12211     // compare down to code that is difficult to reassemble.
12212     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
12213     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
12214 
12215     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
12216     // value. If so, pass-through the AND to get to the intrinsic.
12217     if (LHS.getOpcode() == ISD::AND &&
12218         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12219         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
12220           Intrinsic::ppc_is_decremented_ctr_nonzero &&
12221         isa<ConstantSDNode>(LHS.getOperand(1)) &&
12222         !isNullConstant(LHS.getOperand(1)))
12223       LHS = LHS.getOperand(0);
12224 
12225     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12226         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
12227           Intrinsic::ppc_is_decremented_ctr_nonzero &&
12228         isa<ConstantSDNode>(RHS)) {
12229       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
12230              "Counter decrement comparison is not EQ or NE");
12231 
12232       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
12233       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
12234                     (CC == ISD::SETNE && !Val);
12235 
12236       // We now need to make the intrinsic dead (it cannot be instruction
12237       // selected).
12238       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
12239       assert(LHS.getNode()->hasOneUse() &&
12240              "Counter decrement has more than one use");
12241 
12242       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
12243                          N->getOperand(0), N->getOperand(4));
12244     }
12245 
12246     int CompareOpc;
12247     bool isDot;
12248 
12249     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12250         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
12251         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
12252       assert(isDot && "Can't compare against a vector result!");
12253 
12254       // If this is a comparison against something other than 0/1, then we know
12255       // that the condition is never/always true.
12256       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
12257       if (Val != 0 && Val != 1) {
12258         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
12259           return N->getOperand(0);
12260         // Always !=, turn it into an unconditional branch.
12261         return DAG.getNode(ISD::BR, dl, MVT::Other,
12262                            N->getOperand(0), N->getOperand(4));
12263       }
12264 
12265       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
12266 
12267       // Create the PPCISD altivec 'dot' comparison node.
12268       SDValue Ops[] = {
12269         LHS.getOperand(2),  // LHS of compare
12270         LHS.getOperand(3),  // RHS of compare
12271         DAG.getConstant(CompareOpc, dl, MVT::i32)
12272       };
12273       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
12274       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
12275 
12276       // Unpack the result based on how the target uses it.
12277       PPC::Predicate CompOpc;
12278       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
12279       default:  // Can't happen, don't crash on invalid number though.
12280       case 0:   // Branch on the value of the EQ bit of CR6.
12281         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
12282         break;
12283       case 1:   // Branch on the inverted value of the EQ bit of CR6.
12284         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
12285         break;
12286       case 2:   // Branch on the value of the LT bit of CR6.
12287         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
12288         break;
12289       case 3:   // Branch on the inverted value of the LT bit of CR6.
12290         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
12291         break;
12292       }
12293 
12294       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
12295                          DAG.getConstant(CompOpc, dl, MVT::i32),
12296                          DAG.getRegister(PPC::CR6, MVT::i32),
12297                          N->getOperand(4), CompNode.getValue(1));
12298     }
12299     break;
12300   }
12301   case ISD::BUILD_VECTOR:
12302     return DAGCombineBuildVector(N, DCI);
12303   }
12304 
12305   return SDValue();
12306 }
12307 
12308 SDValue
12309 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
12310                                   SelectionDAG &DAG,
12311                                   std::vector<SDNode *> *Created) const {
12312   // fold (sdiv X, pow2)
12313   EVT VT = N->getValueType(0);
12314   if (VT == MVT::i64 && !Subtarget.isPPC64())
12315     return SDValue();
12316   if ((VT != MVT::i32 && VT != MVT::i64) ||
12317       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
12318     return SDValue();
12319 
12320   SDLoc DL(N);
12321   SDValue N0 = N->getOperand(0);
12322 
12323   bool IsNegPow2 = (-Divisor).isPowerOf2();
12324   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
12325   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
12326 
12327   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
12328   if (Created)
12329     Created->push_back(Op.getNode());
12330 
12331   if (IsNegPow2) {
12332     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
12333     if (Created)
12334       Created->push_back(Op.getNode());
12335   }
12336 
12337   return Op;
12338 }
12339 
12340 //===----------------------------------------------------------------------===//
12341 // Inline Assembly Support
12342 //===----------------------------------------------------------------------===//
12343 
12344 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
12345                                                       KnownBits &Known,
12346                                                       const APInt &DemandedElts,
12347                                                       const SelectionDAG &DAG,
12348                                                       unsigned Depth) const {
12349   Known.resetAll();
12350   switch (Op.getOpcode()) {
12351   default: break;
12352   case PPCISD::LBRX: {
12353     // lhbrx is known to have the top bits cleared out.
12354     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
12355       Known.Zero = 0xFFFF0000;
12356     break;
12357   }
12358   case ISD::INTRINSIC_WO_CHAIN: {
12359     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
12360     default: break;
12361     case Intrinsic::ppc_altivec_vcmpbfp_p:
12362     case Intrinsic::ppc_altivec_vcmpeqfp_p:
12363     case Intrinsic::ppc_altivec_vcmpequb_p:
12364     case Intrinsic::ppc_altivec_vcmpequh_p:
12365     case Intrinsic::ppc_altivec_vcmpequw_p:
12366     case Intrinsic::ppc_altivec_vcmpequd_p:
12367     case Intrinsic::ppc_altivec_vcmpgefp_p:
12368     case Intrinsic::ppc_altivec_vcmpgtfp_p:
12369     case Intrinsic::ppc_altivec_vcmpgtsb_p:
12370     case Intrinsic::ppc_altivec_vcmpgtsh_p:
12371     case Intrinsic::ppc_altivec_vcmpgtsw_p:
12372     case Intrinsic::ppc_altivec_vcmpgtsd_p:
12373     case Intrinsic::ppc_altivec_vcmpgtub_p:
12374     case Intrinsic::ppc_altivec_vcmpgtuh_p:
12375     case Intrinsic::ppc_altivec_vcmpgtuw_p:
12376     case Intrinsic::ppc_altivec_vcmpgtud_p:
12377       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
12378       break;
12379     }
12380   }
12381   }
12382 }
12383 
12384 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
12385   switch (Subtarget.getDarwinDirective()) {
12386   default: break;
12387   case PPC::DIR_970:
12388   case PPC::DIR_PWR4:
12389   case PPC::DIR_PWR5:
12390   case PPC::DIR_PWR5X:
12391   case PPC::DIR_PWR6:
12392   case PPC::DIR_PWR6X:
12393   case PPC::DIR_PWR7:
12394   case PPC::DIR_PWR8:
12395   case PPC::DIR_PWR9: {
12396     if (!ML)
12397       break;
12398 
12399     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12400 
12401     // For small loops (between 5 and 8 instructions), align to a 32-byte
12402     // boundary so that the entire loop fits in one instruction-cache line.
12403     uint64_t LoopSize = 0;
12404     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
12405       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
12406         LoopSize += TII->getInstSizeInBytes(*J);
12407         if (LoopSize > 32)
12408           break;
12409       }
12410 
12411     if (LoopSize > 16 && LoopSize <= 32)
12412       return 5;
12413 
12414     break;
12415   }
12416   }
12417 
12418   return TargetLowering::getPrefLoopAlignment(ML);
12419 }
12420 
12421 /// getConstraintType - Given a constraint, return the type of
12422 /// constraint it is for this target.
12423 PPCTargetLowering::ConstraintType
12424 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
12425   if (Constraint.size() == 1) {
12426     switch (Constraint[0]) {
12427     default: break;
12428     case 'b':
12429     case 'r':
12430     case 'f':
12431     case 'd':
12432     case 'v':
12433     case 'y':
12434       return C_RegisterClass;
12435     case 'Z':
12436       // FIXME: While Z does indicate a memory constraint, it specifically
12437       // indicates an r+r address (used in conjunction with the 'y' modifier
12438       // in the replacement string). Currently, we're forcing the base
12439       // register to be r0 in the asm printer (which is interpreted as zero)
12440       // and forming the complete address in the second register. This is
12441       // suboptimal.
12442       return C_Memory;
12443     }
12444   } else if (Constraint == "wc") { // individual CR bits.
12445     return C_RegisterClass;
12446   } else if (Constraint == "wa" || Constraint == "wd" ||
12447              Constraint == "wf" || Constraint == "ws") {
12448     return C_RegisterClass; // VSX registers.
12449   }
12450   return TargetLowering::getConstraintType(Constraint);
12451 }
12452 
12453 /// Examine constraint type and operand type and determine a weight value.
12454 /// This object must already have been set up with the operand type
12455 /// and the current alternative constraint selected.
12456 TargetLowering::ConstraintWeight
12457 PPCTargetLowering::getSingleConstraintMatchWeight(
12458     AsmOperandInfo &info, const char *constraint) const {
12459   ConstraintWeight weight = CW_Invalid;
12460   Value *CallOperandVal = info.CallOperandVal;
12461     // If we don't have a value, we can't do a match,
12462     // but allow it at the lowest weight.
12463   if (!CallOperandVal)
12464     return CW_Default;
12465   Type *type = CallOperandVal->getType();
12466 
12467   // Look at the constraint type.
12468   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
12469     return CW_Register; // an individual CR bit.
12470   else if ((StringRef(constraint) == "wa" ||
12471             StringRef(constraint) == "wd" ||
12472             StringRef(constraint) == "wf") &&
12473            type->isVectorTy())
12474     return CW_Register;
12475   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
12476     return CW_Register;
12477 
12478   switch (*constraint) {
12479   default:
12480     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
12481     break;
12482   case 'b':
12483     if (type->isIntegerTy())
12484       weight = CW_Register;
12485     break;
12486   case 'f':
12487     if (type->isFloatTy())
12488       weight = CW_Register;
12489     break;
12490   case 'd':
12491     if (type->isDoubleTy())
12492       weight = CW_Register;
12493     break;
12494   case 'v':
12495     if (type->isVectorTy())
12496       weight = CW_Register;
12497     break;
12498   case 'y':
12499     weight = CW_Register;
12500     break;
12501   case 'Z':
12502     weight = CW_Memory;
12503     break;
12504   }
12505   return weight;
12506 }
12507 
12508 std::pair<unsigned, const TargetRegisterClass *>
12509 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
12510                                                 StringRef Constraint,
12511                                                 MVT VT) const {
12512   if (Constraint.size() == 1) {
12513     // GCC RS6000 Constraint Letters
12514     switch (Constraint[0]) {
12515     case 'b':   // R1-R31
12516       if (VT == MVT::i64 && Subtarget.isPPC64())
12517         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
12518       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
12519     case 'r':   // R0-R31
12520       if (VT == MVT::i64 && Subtarget.isPPC64())
12521         return std::make_pair(0U, &PPC::G8RCRegClass);
12522       return std::make_pair(0U, &PPC::GPRCRegClass);
12523     // 'd' and 'f' constraints are both defined to be "the floating point
12524     // registers", where one is for 32-bit and the other for 64-bit. We don't
12525     // really care overly much here so just give them all the same reg classes.
12526     case 'd':
12527     case 'f':
12528       if (VT == MVT::f32 || VT == MVT::i32)
12529         return std::make_pair(0U, &PPC::F4RCRegClass);
12530       if (VT == MVT::f64 || VT == MVT::i64)
12531         return std::make_pair(0U, &PPC::F8RCRegClass);
12532       if (VT == MVT::v4f64 && Subtarget.hasQPX())
12533         return std::make_pair(0U, &PPC::QFRCRegClass);
12534       if (VT == MVT::v4f32 && Subtarget.hasQPX())
12535         return std::make_pair(0U, &PPC::QSRCRegClass);
12536       break;
12537     case 'v':
12538       if (VT == MVT::v4f64 && Subtarget.hasQPX())
12539         return std::make_pair(0U, &PPC::QFRCRegClass);
12540       if (VT == MVT::v4f32 && Subtarget.hasQPX())
12541         return std::make_pair(0U, &PPC::QSRCRegClass);
12542       if (Subtarget.hasAltivec())
12543         return std::make_pair(0U, &PPC::VRRCRegClass);
12544     case 'y':   // crrc
12545       return std::make_pair(0U, &PPC::CRRCRegClass);
12546     }
12547   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
12548     // An individual CR bit.
12549     return std::make_pair(0U, &PPC::CRBITRCRegClass);
12550   } else if ((Constraint == "wa" || Constraint == "wd" ||
12551              Constraint == "wf") && Subtarget.hasVSX()) {
12552     return std::make_pair(0U, &PPC::VSRCRegClass);
12553   } else if (Constraint == "ws" && Subtarget.hasVSX()) {
12554     if (VT == MVT::f32 && Subtarget.hasP8Vector())
12555       return std::make_pair(0U, &PPC::VSSRCRegClass);
12556     else
12557       return std::make_pair(0U, &PPC::VSFRCRegClass);
12558   }
12559 
12560   std::pair<unsigned, const TargetRegisterClass *> R =
12561       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
12562 
12563   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
12564   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
12565   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
12566   // register.
12567   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
12568   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
12569   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
12570       PPC::GPRCRegClass.contains(R.first))
12571     return std::make_pair(TRI->getMatchingSuperReg(R.first,
12572                             PPC::sub_32, &PPC::G8RCRegClass),
12573                           &PPC::G8RCRegClass);
12574 
12575   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
12576   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
12577     R.first = PPC::CR0;
12578     R.second = &PPC::CRRCRegClass;
12579   }
12580 
12581   return R;
12582 }
12583 
12584 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
12585 /// vector.  If it is invalid, don't add anything to Ops.
12586 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
12587                                                      std::string &Constraint,
12588                                                      std::vector<SDValue>&Ops,
12589                                                      SelectionDAG &DAG) const {
12590   SDValue Result;
12591 
12592   // Only support length 1 constraints.
12593   if (Constraint.length() > 1) return;
12594 
12595   char Letter = Constraint[0];
12596   switch (Letter) {
12597   default: break;
12598   case 'I':
12599   case 'J':
12600   case 'K':
12601   case 'L':
12602   case 'M':
12603   case 'N':
12604   case 'O':
12605   case 'P': {
12606     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
12607     if (!CST) return; // Must be an immediate to match.
12608     SDLoc dl(Op);
12609     int64_t Value = CST->getSExtValue();
12610     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
12611                          // numbers are printed as such.
12612     switch (Letter) {
12613     default: llvm_unreachable("Unknown constraint letter!");
12614     case 'I':  // "I" is a signed 16-bit constant.
12615       if (isInt<16>(Value))
12616         Result = DAG.getTargetConstant(Value, dl, TCVT);
12617       break;
12618     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
12619       if (isShiftedUInt<16, 16>(Value))
12620         Result = DAG.getTargetConstant(Value, dl, TCVT);
12621       break;
12622     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
12623       if (isShiftedInt<16, 16>(Value))
12624         Result = DAG.getTargetConstant(Value, dl, TCVT);
12625       break;
12626     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
12627       if (isUInt<16>(Value))
12628         Result = DAG.getTargetConstant(Value, dl, TCVT);
12629       break;
12630     case 'M':  // "M" is a constant that is greater than 31.
12631       if (Value > 31)
12632         Result = DAG.getTargetConstant(Value, dl, TCVT);
12633       break;
12634     case 'N':  // "N" is a positive constant that is an exact power of two.
12635       if (Value > 0 && isPowerOf2_64(Value))
12636         Result = DAG.getTargetConstant(Value, dl, TCVT);
12637       break;
12638     case 'O':  // "O" is the constant zero.
12639       if (Value == 0)
12640         Result = DAG.getTargetConstant(Value, dl, TCVT);
12641       break;
12642     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
12643       if (isInt<16>(-Value))
12644         Result = DAG.getTargetConstant(Value, dl, TCVT);
12645       break;
12646     }
12647     break;
12648   }
12649   }
12650 
12651   if (Result.getNode()) {
12652     Ops.push_back(Result);
12653     return;
12654   }
12655 
12656   // Handle standard constraint letters.
12657   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
12658 }
12659 
12660 // isLegalAddressingMode - Return true if the addressing mode represented
12661 // by AM is legal for this target, for a load/store of the specified type.
12662 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
12663                                               const AddrMode &AM, Type *Ty,
12664                                               unsigned AS) const {
12665   // PPC does not allow r+i addressing modes for vectors!
12666   if (Ty->isVectorTy() && AM.BaseOffs != 0)
12667     return false;
12668 
12669   // PPC allows a sign-extended 16-bit immediate field.
12670   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
12671     return false;
12672 
12673   // No global is ever allowed as a base.
12674   if (AM.BaseGV)
12675     return false;
12676 
12677   // PPC only support r+r,
12678   switch (AM.Scale) {
12679   case 0:  // "r+i" or just "i", depending on HasBaseReg.
12680     break;
12681   case 1:
12682     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
12683       return false;
12684     // Otherwise we have r+r or r+i.
12685     break;
12686   case 2:
12687     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
12688       return false;
12689     // Allow 2*r as r+r.
12690     break;
12691   default:
12692     // No other scales are supported.
12693     return false;
12694   }
12695 
12696   return true;
12697 }
12698 
12699 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
12700                                            SelectionDAG &DAG) const {
12701   MachineFunction &MF = DAG.getMachineFunction();
12702   MachineFrameInfo &MFI = MF.getFrameInfo();
12703   MFI.setReturnAddressIsTaken(true);
12704 
12705   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
12706     return SDValue();
12707 
12708   SDLoc dl(Op);
12709   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12710 
12711   // Make sure the function does not optimize away the store of the RA to
12712   // the stack.
12713   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
12714   FuncInfo->setLRStoreRequired();
12715   bool isPPC64 = Subtarget.isPPC64();
12716   auto PtrVT = getPointerTy(MF.getDataLayout());
12717 
12718   if (Depth > 0) {
12719     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
12720     SDValue Offset =
12721         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
12722                         isPPC64 ? MVT::i64 : MVT::i32);
12723     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
12724                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
12725                        MachinePointerInfo());
12726   }
12727 
12728   // Just load the return address off the stack.
12729   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
12730   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
12731                      MachinePointerInfo());
12732 }
12733 
12734 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
12735                                           SelectionDAG &DAG) const {
12736   SDLoc dl(Op);
12737   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12738 
12739   MachineFunction &MF = DAG.getMachineFunction();
12740   MachineFrameInfo &MFI = MF.getFrameInfo();
12741   MFI.setFrameAddressIsTaken(true);
12742 
12743   EVT PtrVT = getPointerTy(MF.getDataLayout());
12744   bool isPPC64 = PtrVT == MVT::i64;
12745 
12746   // Naked functions never have a frame pointer, and so we use r1. For all
12747   // other functions, this decision must be delayed until during PEI.
12748   unsigned FrameReg;
12749   if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
12750     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
12751   else
12752     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
12753 
12754   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
12755                                          PtrVT);
12756   while (Depth--)
12757     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
12758                             FrameAddr, MachinePointerInfo());
12759   return FrameAddr;
12760 }
12761 
12762 // FIXME? Maybe this could be a TableGen attribute on some registers and
12763 // this table could be generated automatically from RegInfo.
12764 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
12765                                               SelectionDAG &DAG) const {
12766   bool isPPC64 = Subtarget.isPPC64();
12767   bool isDarwinABI = Subtarget.isDarwinABI();
12768 
12769   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
12770       (!isPPC64 && VT != MVT::i32))
12771     report_fatal_error("Invalid register global variable type");
12772 
12773   bool is64Bit = isPPC64 && VT == MVT::i64;
12774   unsigned Reg = StringSwitch<unsigned>(RegName)
12775                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
12776                    .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
12777                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
12778                                   (is64Bit ? PPC::X13 : PPC::R13))
12779                    .Default(0);
12780 
12781   if (Reg)
12782     return Reg;
12783   report_fatal_error("Invalid register name global variable");
12784 }
12785 
12786 bool
12787 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
12788   // The PowerPC target isn't yet aware of offsets.
12789   return false;
12790 }
12791 
12792 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12793                                            const CallInst &I,
12794                                            unsigned Intrinsic) const {
12795   switch (Intrinsic) {
12796   case Intrinsic::ppc_qpx_qvlfd:
12797   case Intrinsic::ppc_qpx_qvlfs:
12798   case Intrinsic::ppc_qpx_qvlfcd:
12799   case Intrinsic::ppc_qpx_qvlfcs:
12800   case Intrinsic::ppc_qpx_qvlfiwa:
12801   case Intrinsic::ppc_qpx_qvlfiwz:
12802   case Intrinsic::ppc_altivec_lvx:
12803   case Intrinsic::ppc_altivec_lvxl:
12804   case Intrinsic::ppc_altivec_lvebx:
12805   case Intrinsic::ppc_altivec_lvehx:
12806   case Intrinsic::ppc_altivec_lvewx:
12807   case Intrinsic::ppc_vsx_lxvd2x:
12808   case Intrinsic::ppc_vsx_lxvw4x: {
12809     EVT VT;
12810     switch (Intrinsic) {
12811     case Intrinsic::ppc_altivec_lvebx:
12812       VT = MVT::i8;
12813       break;
12814     case Intrinsic::ppc_altivec_lvehx:
12815       VT = MVT::i16;
12816       break;
12817     case Intrinsic::ppc_altivec_lvewx:
12818       VT = MVT::i32;
12819       break;
12820     case Intrinsic::ppc_vsx_lxvd2x:
12821       VT = MVT::v2f64;
12822       break;
12823     case Intrinsic::ppc_qpx_qvlfd:
12824       VT = MVT::v4f64;
12825       break;
12826     case Intrinsic::ppc_qpx_qvlfs:
12827       VT = MVT::v4f32;
12828       break;
12829     case Intrinsic::ppc_qpx_qvlfcd:
12830       VT = MVT::v2f64;
12831       break;
12832     case Intrinsic::ppc_qpx_qvlfcs:
12833       VT = MVT::v2f32;
12834       break;
12835     default:
12836       VT = MVT::v4i32;
12837       break;
12838     }
12839 
12840     Info.opc = ISD::INTRINSIC_W_CHAIN;
12841     Info.memVT = VT;
12842     Info.ptrVal = I.getArgOperand(0);
12843     Info.offset = -VT.getStoreSize()+1;
12844     Info.size = 2*VT.getStoreSize()-1;
12845     Info.align = 1;
12846     Info.vol = false;
12847     Info.readMem = true;
12848     Info.writeMem = false;
12849     return true;
12850   }
12851   case Intrinsic::ppc_qpx_qvlfda:
12852   case Intrinsic::ppc_qpx_qvlfsa:
12853   case Intrinsic::ppc_qpx_qvlfcda:
12854   case Intrinsic::ppc_qpx_qvlfcsa:
12855   case Intrinsic::ppc_qpx_qvlfiwaa:
12856   case Intrinsic::ppc_qpx_qvlfiwza: {
12857     EVT VT;
12858     switch (Intrinsic) {
12859     case Intrinsic::ppc_qpx_qvlfda:
12860       VT = MVT::v4f64;
12861       break;
12862     case Intrinsic::ppc_qpx_qvlfsa:
12863       VT = MVT::v4f32;
12864       break;
12865     case Intrinsic::ppc_qpx_qvlfcda:
12866       VT = MVT::v2f64;
12867       break;
12868     case Intrinsic::ppc_qpx_qvlfcsa:
12869       VT = MVT::v2f32;
12870       break;
12871     default:
12872       VT = MVT::v4i32;
12873       break;
12874     }
12875 
12876     Info.opc = ISD::INTRINSIC_W_CHAIN;
12877     Info.memVT = VT;
12878     Info.ptrVal = I.getArgOperand(0);
12879     Info.offset = 0;
12880     Info.size = VT.getStoreSize();
12881     Info.align = 1;
12882     Info.vol = false;
12883     Info.readMem = true;
12884     Info.writeMem = false;
12885     return true;
12886   }
12887   case Intrinsic::ppc_qpx_qvstfd:
12888   case Intrinsic::ppc_qpx_qvstfs:
12889   case Intrinsic::ppc_qpx_qvstfcd:
12890   case Intrinsic::ppc_qpx_qvstfcs:
12891   case Intrinsic::ppc_qpx_qvstfiw:
12892   case Intrinsic::ppc_altivec_stvx:
12893   case Intrinsic::ppc_altivec_stvxl:
12894   case Intrinsic::ppc_altivec_stvebx:
12895   case Intrinsic::ppc_altivec_stvehx:
12896   case Intrinsic::ppc_altivec_stvewx:
12897   case Intrinsic::ppc_vsx_stxvd2x:
12898   case Intrinsic::ppc_vsx_stxvw4x: {
12899     EVT VT;
12900     switch (Intrinsic) {
12901     case Intrinsic::ppc_altivec_stvebx:
12902       VT = MVT::i8;
12903       break;
12904     case Intrinsic::ppc_altivec_stvehx:
12905       VT = MVT::i16;
12906       break;
12907     case Intrinsic::ppc_altivec_stvewx:
12908       VT = MVT::i32;
12909       break;
12910     case Intrinsic::ppc_vsx_stxvd2x:
12911       VT = MVT::v2f64;
12912       break;
12913     case Intrinsic::ppc_qpx_qvstfd:
12914       VT = MVT::v4f64;
12915       break;
12916     case Intrinsic::ppc_qpx_qvstfs:
12917       VT = MVT::v4f32;
12918       break;
12919     case Intrinsic::ppc_qpx_qvstfcd:
12920       VT = MVT::v2f64;
12921       break;
12922     case Intrinsic::ppc_qpx_qvstfcs:
12923       VT = MVT::v2f32;
12924       break;
12925     default:
12926       VT = MVT::v4i32;
12927       break;
12928     }
12929 
12930     Info.opc = ISD::INTRINSIC_VOID;
12931     Info.memVT = VT;
12932     Info.ptrVal = I.getArgOperand(1);
12933     Info.offset = -VT.getStoreSize()+1;
12934     Info.size = 2*VT.getStoreSize()-1;
12935     Info.align = 1;
12936     Info.vol = false;
12937     Info.readMem = false;
12938     Info.writeMem = true;
12939     return true;
12940   }
12941   case Intrinsic::ppc_qpx_qvstfda:
12942   case Intrinsic::ppc_qpx_qvstfsa:
12943   case Intrinsic::ppc_qpx_qvstfcda:
12944   case Intrinsic::ppc_qpx_qvstfcsa:
12945   case Intrinsic::ppc_qpx_qvstfiwa: {
12946     EVT VT;
12947     switch (Intrinsic) {
12948     case Intrinsic::ppc_qpx_qvstfda:
12949       VT = MVT::v4f64;
12950       break;
12951     case Intrinsic::ppc_qpx_qvstfsa:
12952       VT = MVT::v4f32;
12953       break;
12954     case Intrinsic::ppc_qpx_qvstfcda:
12955       VT = MVT::v2f64;
12956       break;
12957     case Intrinsic::ppc_qpx_qvstfcsa:
12958       VT = MVT::v2f32;
12959       break;
12960     default:
12961       VT = MVT::v4i32;
12962       break;
12963     }
12964 
12965     Info.opc = ISD::INTRINSIC_VOID;
12966     Info.memVT = VT;
12967     Info.ptrVal = I.getArgOperand(1);
12968     Info.offset = 0;
12969     Info.size = VT.getStoreSize();
12970     Info.align = 1;
12971     Info.vol = false;
12972     Info.readMem = false;
12973     Info.writeMem = true;
12974     return true;
12975   }
12976   default:
12977     break;
12978   }
12979 
12980   return false;
12981 }
12982 
12983 /// getOptimalMemOpType - Returns the target specific optimal type for load
12984 /// and store operations as a result of memset, memcpy, and memmove
12985 /// lowering. If DstAlign is zero that means it's safe to destination
12986 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
12987 /// means there isn't a need to check it against alignment requirement,
12988 /// probably because the source does not need to be loaded. If 'IsMemset' is
12989 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
12990 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
12991 /// source is constant so it does not need to be loaded.
12992 /// It returns EVT::Other if the type should be determined using generic
12993 /// target-independent logic.
12994 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
12995                                            unsigned DstAlign, unsigned SrcAlign,
12996                                            bool IsMemset, bool ZeroMemset,
12997                                            bool MemcpyStrSrc,
12998                                            MachineFunction &MF) const {
12999   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
13000     const Function *F = MF.getFunction();
13001     // When expanding a memset, require at least two QPX instructions to cover
13002     // the cost of loading the value to be stored from the constant pool.
13003     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
13004        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
13005         !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
13006       return MVT::v4f64;
13007     }
13008 
13009     // We should use Altivec/VSX loads and stores when available. For unaligned
13010     // addresses, unaligned VSX loads are only fast starting with the P8.
13011     if (Subtarget.hasAltivec() && Size >= 16 &&
13012         (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
13013          ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
13014       return MVT::v4i32;
13015   }
13016 
13017   if (Subtarget.isPPC64()) {
13018     return MVT::i64;
13019   }
13020 
13021   return MVT::i32;
13022 }
13023 
13024 /// \brief Returns true if it is beneficial to convert a load of a constant
13025 /// to just the constant itself.
13026 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
13027                                                           Type *Ty) const {
13028   assert(Ty->isIntegerTy());
13029 
13030   unsigned BitSize = Ty->getPrimitiveSizeInBits();
13031   return !(BitSize == 0 || BitSize > 64);
13032 }
13033 
13034 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
13035   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
13036     return false;
13037   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
13038   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
13039   return NumBits1 == 64 && NumBits2 == 32;
13040 }
13041 
13042 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
13043   if (!VT1.isInteger() || !VT2.isInteger())
13044     return false;
13045   unsigned NumBits1 = VT1.getSizeInBits();
13046   unsigned NumBits2 = VT2.getSizeInBits();
13047   return NumBits1 == 64 && NumBits2 == 32;
13048 }
13049 
13050 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
13051   // Generally speaking, zexts are not free, but they are free when they can be
13052   // folded with other operations.
13053   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
13054     EVT MemVT = LD->getMemoryVT();
13055     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
13056          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
13057         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
13058          LD->getExtensionType() == ISD::ZEXTLOAD))
13059       return true;
13060   }
13061 
13062   // FIXME: Add other cases...
13063   //  - 32-bit shifts with a zext to i64
13064   //  - zext after ctlz, bswap, etc.
13065   //  - zext after and by a constant mask
13066 
13067   return TargetLowering::isZExtFree(Val, VT2);
13068 }
13069 
13070 bool PPCTargetLowering::isFPExtFree(EVT VT) const {
13071   assert(VT.isFloatingPoint());
13072   return true;
13073 }
13074 
13075 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
13076   return isInt<16>(Imm) || isUInt<16>(Imm);
13077 }
13078 
13079 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
13080   return isInt<16>(Imm) || isUInt<16>(Imm);
13081 }
13082 
13083 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
13084                                                        unsigned,
13085                                                        unsigned,
13086                                                        bool *Fast) const {
13087   if (DisablePPCUnaligned)
13088     return false;
13089 
13090   // PowerPC supports unaligned memory access for simple non-vector types.
13091   // Although accessing unaligned addresses is not as efficient as accessing
13092   // aligned addresses, it is generally more efficient than manual expansion,
13093   // and generally only traps for software emulation when crossing page
13094   // boundaries.
13095 
13096   if (!VT.isSimple())
13097     return false;
13098 
13099   if (VT.getSimpleVT().isVector()) {
13100     if (Subtarget.hasVSX()) {
13101       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
13102           VT != MVT::v4f32 && VT != MVT::v4i32)
13103         return false;
13104     } else {
13105       return false;
13106     }
13107   }
13108 
13109   if (VT == MVT::ppcf128)
13110     return false;
13111 
13112   if (Fast)
13113     *Fast = true;
13114 
13115   return true;
13116 }
13117 
13118 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
13119   VT = VT.getScalarType();
13120 
13121   if (!VT.isSimple())
13122     return false;
13123 
13124   switch (VT.getSimpleVT().SimpleTy) {
13125   case MVT::f32:
13126   case MVT::f64:
13127     return true;
13128   default:
13129     break;
13130   }
13131 
13132   return false;
13133 }
13134 
13135 const MCPhysReg *
13136 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
13137   // LR is a callee-save register, but we must treat it as clobbered by any call
13138   // site. Hence we include LR in the scratch registers, which are in turn added
13139   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
13140   // to CTR, which is used by any indirect call.
13141   static const MCPhysReg ScratchRegs[] = {
13142     PPC::X12, PPC::LR8, PPC::CTR8, 0
13143   };
13144 
13145   return ScratchRegs;
13146 }
13147 
13148 unsigned PPCTargetLowering::getExceptionPointerRegister(
13149     const Constant *PersonalityFn) const {
13150   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
13151 }
13152 
13153 unsigned PPCTargetLowering::getExceptionSelectorRegister(
13154     const Constant *PersonalityFn) const {
13155   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
13156 }
13157 
13158 bool
13159 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
13160                      EVT VT , unsigned DefinedValues) const {
13161   if (VT == MVT::v2i64)
13162     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
13163 
13164   if (Subtarget.hasVSX() || Subtarget.hasQPX())
13165     return true;
13166 
13167   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
13168 }
13169 
13170 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
13171   if (DisableILPPref || Subtarget.enableMachineScheduler())
13172     return TargetLowering::getSchedulingPreference(N);
13173 
13174   return Sched::ILP;
13175 }
13176 
13177 // Create a fast isel object.
13178 FastISel *
13179 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
13180                                   const TargetLibraryInfo *LibInfo) const {
13181   return PPC::createFastISel(FuncInfo, LibInfo);
13182 }
13183 
13184 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
13185   if (Subtarget.isDarwinABI()) return;
13186   if (!Subtarget.isPPC64()) return;
13187 
13188   // Update IsSplitCSR in PPCFunctionInfo
13189   PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
13190   PFI->setIsSplitCSR(true);
13191 }
13192 
13193 void PPCTargetLowering::insertCopiesSplitCSR(
13194   MachineBasicBlock *Entry,
13195   const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
13196   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13197   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
13198   if (!IStart)
13199     return;
13200 
13201   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13202   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
13203   MachineBasicBlock::iterator MBBI = Entry->begin();
13204   for (const MCPhysReg *I = IStart; *I; ++I) {
13205     const TargetRegisterClass *RC = nullptr;
13206     if (PPC::G8RCRegClass.contains(*I))
13207       RC = &PPC::G8RCRegClass;
13208     else if (PPC::F8RCRegClass.contains(*I))
13209       RC = &PPC::F8RCRegClass;
13210     else if (PPC::CRRCRegClass.contains(*I))
13211       RC = &PPC::CRRCRegClass;
13212     else if (PPC::VRRCRegClass.contains(*I))
13213       RC = &PPC::VRRCRegClass;
13214     else
13215       llvm_unreachable("Unexpected register class in CSRsViaCopy!");
13216 
13217     unsigned NewVR = MRI->createVirtualRegister(RC);
13218     // Create copy from CSR to a virtual register.
13219     // FIXME: this currently does not emit CFI pseudo-instructions, it works
13220     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
13221     // nounwind. If we want to generalize this later, we may need to emit
13222     // CFI pseudo-instructions.
13223     assert(Entry->getParent()->getFunction()->hasFnAttribute(
13224              Attribute::NoUnwind) &&
13225            "Function should be nounwind in insertCopiesSplitCSR!");
13226     Entry->addLiveIn(*I);
13227     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
13228       .addReg(*I);
13229 
13230     // Insert the copy-back instructions right before the terminator
13231     for (auto *Exit : Exits)
13232       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
13233               TII->get(TargetOpcode::COPY), *I)
13234         .addReg(NewVR);
13235   }
13236 }
13237 
13238 // Override to enable LOAD_STACK_GUARD lowering on Linux.
13239 bool PPCTargetLowering::useLoadStackGuardNode() const {
13240   if (!Subtarget.isTargetLinux())
13241     return TargetLowering::useLoadStackGuardNode();
13242   return true;
13243 }
13244 
13245 // Override to disable global variable loading on Linux.
13246 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
13247   if (!Subtarget.isTargetLinux())
13248     return TargetLowering::insertSSPDeclarations(M);
13249 }
13250 
13251 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
13252   if (!VT.isSimple() || !Subtarget.hasVSX())
13253     return false;
13254 
13255   switch(VT.getSimpleVT().SimpleTy) {
13256   default:
13257     // For FP types that are currently not supported by PPC backend, return
13258     // false. Examples: f16, f80.
13259     return false;
13260   case MVT::f32:
13261   case MVT::f64:
13262   case MVT::ppcf128:
13263     return Imm.isPosZero();
13264   }
13265 }
13266 
13267 // For vector shift operation op, fold
13268 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
13269 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
13270                                   SelectionDAG &DAG) {
13271   SDValue N0 = N->getOperand(0);
13272   SDValue N1 = N->getOperand(1);
13273   EVT VT = N0.getValueType();
13274   unsigned OpSizeInBits = VT.getScalarSizeInBits();
13275   unsigned Opcode = N->getOpcode();
13276   unsigned TargetOpcode;
13277 
13278   switch (Opcode) {
13279   default:
13280     llvm_unreachable("Unexpected shift operation");
13281   case ISD::SHL:
13282     TargetOpcode = PPCISD::SHL;
13283     break;
13284   case ISD::SRL:
13285     TargetOpcode = PPCISD::SRL;
13286     break;
13287   case ISD::SRA:
13288     TargetOpcode = PPCISD::SRA;
13289     break;
13290   }
13291 
13292   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
13293       N1->getOpcode() == ISD::AND)
13294     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
13295       if (Mask->getZExtValue() == OpSizeInBits - 1)
13296         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
13297 
13298   return SDValue();
13299 }
13300 
13301 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
13302   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13303     return Value;
13304 
13305   return SDValue();
13306 }
13307 
13308 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
13309   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13310     return Value;
13311 
13312   return SDValue();
13313 }
13314 
13315 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
13316   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13317     return Value;
13318 
13319   return SDValue();
13320 }
13321