1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetRegisterInfo.h"
56 #include "llvm/CodeGen/ValueTypes.h"
57 #include "llvm/IR/CallSite.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/Module.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/Value.h"
73 #include "llvm/MC/MCContext.h"
74 #include "llvm/MC/MCExpr.h"
75 #include "llvm/MC/MCRegisterInfo.h"
76 #include "llvm/MC/MCSymbolXCOFF.h"
77 #include "llvm/Support/AtomicOrdering.h"
78 #include "llvm/Support/BranchProbability.h"
79 #include "llvm/Support/Casting.h"
80 #include "llvm/Support/CodeGen.h"
81 #include "llvm/Support/CommandLine.h"
82 #include "llvm/Support/Compiler.h"
83 #include "llvm/Support/Debug.h"
84 #include "llvm/Support/ErrorHandling.h"
85 #include "llvm/Support/Format.h"
86 #include "llvm/Support/KnownBits.h"
87 #include "llvm/Support/MachineValueType.h"
88 #include "llvm/Support/MathExtras.h"
89 #include "llvm/Support/raw_ostream.h"
90 #include "llvm/Target/TargetMachine.h"
91 #include "llvm/Target/TargetOptions.h"
92 #include <algorithm>
93 #include <cassert>
94 #include <cstdint>
95 #include <iterator>
96 #include <list>
97 #include <utility>
98 #include <vector>
99 
100 using namespace llvm;
101 
102 #define DEBUG_TYPE "ppc-lowering"
103 
104 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
105 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
106 
107 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
108 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
109 
110 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
111 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
112 
113 static cl::opt<bool> DisableSCO("disable-ppc-sco",
114 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
115 
116 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
117 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
118 
119 static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
120 cl::desc("enable quad precision float support on ppc"), cl::Hidden);
121 
122 STATISTIC(NumTailCalls, "Number of tail calls");
123 STATISTIC(NumSiblingCalls, "Number of sibling calls");
124 
125 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
126 
127 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
128 
129 // FIXME: Remove this once the bug has been fixed!
130 extern cl::opt<bool> ANDIGlueBug;
131 
132 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
133                                      const PPCSubtarget &STI)
134     : TargetLowering(TM), Subtarget(STI) {
135   // Use _setjmp/_longjmp instead of setjmp/longjmp.
136   setUseUnderscoreSetJmp(true);
137   setUseUnderscoreLongJmp(true);
138 
139   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140   // arguments are at least 4/8 bytes aligned.
141   bool isPPC64 = Subtarget.isPPC64();
142   setMinStackArgumentAlignment(isPPC64 ? 8:4);
143 
144   // Set up the register classes.
145   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
146   if (!useSoftFloat()) {
147     if (hasSPE()) {
148       addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
149       addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
150     } else {
151       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
152       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
153     }
154   }
155 
156   // Match BITREVERSE to customized fast code sequence in the td file.
157   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
158   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
159 
160   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
162 
163   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164   for (MVT VT : MVT::integer_valuetypes()) {
165     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
166     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
167   }
168 
169   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
170 
171   // PowerPC has pre-inc load and store's.
172   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
173   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
174   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
175   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
176   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
177   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
178   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
179   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
180   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
181   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
182   if (!Subtarget.hasSPE()) {
183     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
184     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
185     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
186     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
187   }
188 
189   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
190   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
191   for (MVT VT : ScalarIntVTs) {
192     setOperationAction(ISD::ADDC, VT, Legal);
193     setOperationAction(ISD::ADDE, VT, Legal);
194     setOperationAction(ISD::SUBC, VT, Legal);
195     setOperationAction(ISD::SUBE, VT, Legal);
196   }
197 
198   if (Subtarget.useCRBits()) {
199     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
200 
201     if (isPPC64 || Subtarget.hasFPCVT()) {
202       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
203       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
204                          isPPC64 ? MVT::i64 : MVT::i32);
205       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
206       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
207                         isPPC64 ? MVT::i64 : MVT::i32);
208     } else {
209       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
210       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
211     }
212 
213     // PowerPC does not support direct load/store of condition registers.
214     setOperationAction(ISD::LOAD, MVT::i1, Custom);
215     setOperationAction(ISD::STORE, MVT::i1, Custom);
216 
217     // FIXME: Remove this once the ANDI glue bug is fixed:
218     if (ANDIGlueBug)
219       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
220 
221     for (MVT VT : MVT::integer_valuetypes()) {
222       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
223       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
224       setTruncStoreAction(VT, MVT::i1, Expand);
225     }
226 
227     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
228   }
229 
230   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
231   // PPC (the libcall is not available).
232   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
233   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
234 
235   // We do not currently implement these libm ops for PowerPC.
236   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
237   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
238   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
239   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
240   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
241   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
242 
243   // PowerPC has no SREM/UREM instructions unless we are on P9
244   // On P9 we may use a hardware instruction to compute the remainder.
245   // The instructions are not legalized directly because in the cases where the
246   // result of both the remainder and the division is required it is more
247   // efficient to compute the remainder from the result of the division rather
248   // than use the remainder instruction.
249   if (Subtarget.isISA3_0()) {
250     setOperationAction(ISD::SREM, MVT::i32, Custom);
251     setOperationAction(ISD::UREM, MVT::i32, Custom);
252     setOperationAction(ISD::SREM, MVT::i64, Custom);
253     setOperationAction(ISD::UREM, MVT::i64, Custom);
254   } else {
255     setOperationAction(ISD::SREM, MVT::i32, Expand);
256     setOperationAction(ISD::UREM, MVT::i32, Expand);
257     setOperationAction(ISD::SREM, MVT::i64, Expand);
258     setOperationAction(ISD::UREM, MVT::i64, Expand);
259   }
260 
261   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
262   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
263   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
264   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
265   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
266   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
267   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
268   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
269   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
270 
271   // We don't support sin/cos/sqrt/fmod/pow
272   setOperationAction(ISD::FSIN , MVT::f64, Expand);
273   setOperationAction(ISD::FCOS , MVT::f64, Expand);
274   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
275   setOperationAction(ISD::FREM , MVT::f64, Expand);
276   setOperationAction(ISD::FPOW , MVT::f64, Expand);
277   setOperationAction(ISD::FSIN , MVT::f32, Expand);
278   setOperationAction(ISD::FCOS , MVT::f32, Expand);
279   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
280   setOperationAction(ISD::FREM , MVT::f32, Expand);
281   setOperationAction(ISD::FPOW , MVT::f32, Expand);
282   if (Subtarget.hasSPE()) {
283     setOperationAction(ISD::FMA  , MVT::f64, Expand);
284     setOperationAction(ISD::FMA  , MVT::f32, Expand);
285   } else {
286     setOperationAction(ISD::FMA  , MVT::f64, Legal);
287     setOperationAction(ISD::FMA  , MVT::f32, Legal);
288   }
289 
290   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
291 
292   // If we're enabling GP optimizations, use hardware square root
293   if (!Subtarget.hasFSQRT() &&
294       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
295         Subtarget.hasFRE()))
296     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
297 
298   if (!Subtarget.hasFSQRT() &&
299       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
300         Subtarget.hasFRES()))
301     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
302 
303   if (Subtarget.hasFCPSGN()) {
304     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
305     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
306   } else {
307     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
308     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
309   }
310 
311   if (Subtarget.hasFPRND()) {
312     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
313     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
314     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
315     setOperationAction(ISD::FROUND, MVT::f64, Legal);
316 
317     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
318     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
319     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
320     setOperationAction(ISD::FROUND, MVT::f32, Legal);
321   }
322 
323   // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
324   // to speed up scalar BSWAP64.
325   // CTPOP or CTTZ were introduced in P8/P9 respectively
326   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
327   if (Subtarget.hasP9Vector())
328     setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
329   else
330     setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
331   if (Subtarget.isISA3_0()) {
332     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
333     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
334   } else {
335     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
336     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
337   }
338 
339   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
340     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
341     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
342   } else {
343     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
344     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
345   }
346 
347   // PowerPC does not have ROTR
348   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
349   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
350 
351   if (!Subtarget.useCRBits()) {
352     // PowerPC does not have Select
353     setOperationAction(ISD::SELECT, MVT::i32, Expand);
354     setOperationAction(ISD::SELECT, MVT::i64, Expand);
355     setOperationAction(ISD::SELECT, MVT::f32, Expand);
356     setOperationAction(ISD::SELECT, MVT::f64, Expand);
357   }
358 
359   // PowerPC wants to turn select_cc of FP into fsel when possible.
360   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
361   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
362 
363   // PowerPC wants to optimize integer setcc a bit
364   if (!Subtarget.useCRBits())
365     setOperationAction(ISD::SETCC, MVT::i32, Custom);
366 
367   // PowerPC does not have BRCOND which requires SetCC
368   if (!Subtarget.useCRBits())
369     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
370 
371   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
372 
373   if (Subtarget.hasSPE()) {
374     // SPE has built-in conversions
375     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
376     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
377     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
378   } else {
379     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
380     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
381 
382     // PowerPC does not have [U|S]INT_TO_FP
383     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
384     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
385   }
386 
387   if (Subtarget.hasDirectMove() && isPPC64) {
388     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
389     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
390     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
391     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
392   } else {
393     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
394     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
395     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
396     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
397   }
398 
399   // We cannot sextinreg(i1).  Expand to shifts.
400   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
401 
402   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
403   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
404   // support continuation, user-level threading, and etc.. As a result, no
405   // other SjLj exception interfaces are implemented and please don't build
406   // your own exception handling based on them.
407   // LLVM/Clang supports zero-cost DWARF exception handling.
408   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
409   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
410 
411   // We want to legalize GlobalAddress and ConstantPool nodes into the
412   // appropriate instructions to materialize the address.
413   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
414   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
415   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
416   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
417   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
418   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
419   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
420   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
421   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
422   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
423 
424   // TRAP is legal.
425   setOperationAction(ISD::TRAP, MVT::Other, Legal);
426 
427   // TRAMPOLINE is custom lowered.
428   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
429   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
430 
431   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
432   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
433 
434   if (Subtarget.isSVR4ABI()) {
435     if (isPPC64) {
436       // VAARG always uses double-word chunks, so promote anything smaller.
437       setOperationAction(ISD::VAARG, MVT::i1, Promote);
438       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
439       setOperationAction(ISD::VAARG, MVT::i8, Promote);
440       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
441       setOperationAction(ISD::VAARG, MVT::i16, Promote);
442       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
443       setOperationAction(ISD::VAARG, MVT::i32, Promote);
444       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
445       setOperationAction(ISD::VAARG, MVT::Other, Expand);
446     } else {
447       // VAARG is custom lowered with the 32-bit SVR4 ABI.
448       setOperationAction(ISD::VAARG, MVT::Other, Custom);
449       setOperationAction(ISD::VAARG, MVT::i64, Custom);
450     }
451   } else
452     setOperationAction(ISD::VAARG, MVT::Other, Expand);
453 
454   if (Subtarget.isSVR4ABI() && !isPPC64)
455     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
456     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
457   else
458     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
459 
460   // Use the default implementation.
461   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
462   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
463   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
464   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
465   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
466   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
467   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
468   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
469   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
470 
471   // We want to custom lower some of our intrinsics.
472   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
473 
474   // To handle counter-based loop conditions.
475   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
476 
477   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
478   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
479   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
480   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
481 
482   // Comparisons that require checking two conditions.
483   if (Subtarget.hasSPE()) {
484     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
485     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
486     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
487     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
488   }
489   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
490   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
491   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
492   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
493   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
494   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
495   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
496   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
497   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
498   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
499   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
500   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
501 
502   if (Subtarget.has64BitSupport()) {
503     // They also have instructions for converting between i64 and fp.
504     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
505     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
506     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
507     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
508     // This is just the low 32 bits of a (signed) fp->i64 conversion.
509     // We cannot do this with Promote because i64 is not a legal type.
510     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
511 
512     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
513       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
514   } else {
515     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
516     if (Subtarget.hasSPE())
517       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
518     else
519       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
520   }
521 
522   // With the instructions enabled under FPCVT, we can do everything.
523   if (Subtarget.hasFPCVT()) {
524     if (Subtarget.has64BitSupport()) {
525       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
526       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
527       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
528       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
529     }
530 
531     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
532     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
533     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
534     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
535   }
536 
537   if (Subtarget.use64BitRegs()) {
538     // 64-bit PowerPC implementations can support i64 types directly
539     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
540     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
541     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
542     // 64-bit PowerPC wants to expand i128 shifts itself.
543     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
544     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
545     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
546   } else {
547     // 32-bit PowerPC wants to expand i64 shifts itself.
548     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
549     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
550     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
551   }
552 
553   if (Subtarget.hasAltivec()) {
554     // First set operation action for all vector types to expand. Then we
555     // will selectively turn on ones that can be effectively codegen'd.
556     for (MVT VT : MVT::vector_valuetypes()) {
557       // add/sub are legal for all supported vector VT's.
558       setOperationAction(ISD::ADD, VT, Legal);
559       setOperationAction(ISD::SUB, VT, Legal);
560 
561       // For v2i64, these are only valid with P8Vector. This is corrected after
562       // the loop.
563       setOperationAction(ISD::SMAX, VT, Legal);
564       setOperationAction(ISD::SMIN, VT, Legal);
565       setOperationAction(ISD::UMAX, VT, Legal);
566       setOperationAction(ISD::UMIN, VT, Legal);
567 
568       if (Subtarget.hasVSX()) {
569         setOperationAction(ISD::FMAXNUM, VT, Legal);
570         setOperationAction(ISD::FMINNUM, VT, Legal);
571       }
572 
573       // Vector instructions introduced in P8
574       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
575         setOperationAction(ISD::CTPOP, VT, Legal);
576         setOperationAction(ISD::CTLZ, VT, Legal);
577       }
578       else {
579         setOperationAction(ISD::CTPOP, VT, Expand);
580         setOperationAction(ISD::CTLZ, VT, Expand);
581       }
582 
583       // Vector instructions introduced in P9
584       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
585         setOperationAction(ISD::CTTZ, VT, Legal);
586       else
587         setOperationAction(ISD::CTTZ, VT, Expand);
588 
589       // We promote all shuffles to v16i8.
590       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
591       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
592 
593       // We promote all non-typed operations to v4i32.
594       setOperationAction(ISD::AND   , VT, Promote);
595       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
596       setOperationAction(ISD::OR    , VT, Promote);
597       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
598       setOperationAction(ISD::XOR   , VT, Promote);
599       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
600       setOperationAction(ISD::LOAD  , VT, Promote);
601       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
602       setOperationAction(ISD::SELECT, VT, Promote);
603       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
604       setOperationAction(ISD::VSELECT, VT, Legal);
605       setOperationAction(ISD::SELECT_CC, VT, Promote);
606       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
607       setOperationAction(ISD::STORE, VT, Promote);
608       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
609 
610       // No other operations are legal.
611       setOperationAction(ISD::MUL , VT, Expand);
612       setOperationAction(ISD::SDIV, VT, Expand);
613       setOperationAction(ISD::SREM, VT, Expand);
614       setOperationAction(ISD::UDIV, VT, Expand);
615       setOperationAction(ISD::UREM, VT, Expand);
616       setOperationAction(ISD::FDIV, VT, Expand);
617       setOperationAction(ISD::FREM, VT, Expand);
618       setOperationAction(ISD::FNEG, VT, Expand);
619       setOperationAction(ISD::FSQRT, VT, Expand);
620       setOperationAction(ISD::FLOG, VT, Expand);
621       setOperationAction(ISD::FLOG10, VT, Expand);
622       setOperationAction(ISD::FLOG2, VT, Expand);
623       setOperationAction(ISD::FEXP, VT, Expand);
624       setOperationAction(ISD::FEXP2, VT, Expand);
625       setOperationAction(ISD::FSIN, VT, Expand);
626       setOperationAction(ISD::FCOS, VT, Expand);
627       setOperationAction(ISD::FABS, VT, Expand);
628       setOperationAction(ISD::FFLOOR, VT, Expand);
629       setOperationAction(ISD::FCEIL,  VT, Expand);
630       setOperationAction(ISD::FTRUNC, VT, Expand);
631       setOperationAction(ISD::FRINT,  VT, Expand);
632       setOperationAction(ISD::FNEARBYINT, VT, Expand);
633       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
634       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
635       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
636       setOperationAction(ISD::MULHU, VT, Expand);
637       setOperationAction(ISD::MULHS, VT, Expand);
638       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
639       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
640       setOperationAction(ISD::UDIVREM, VT, Expand);
641       setOperationAction(ISD::SDIVREM, VT, Expand);
642       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
643       setOperationAction(ISD::FPOW, VT, Expand);
644       setOperationAction(ISD::BSWAP, VT, Expand);
645       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
646       setOperationAction(ISD::ROTL, VT, Expand);
647       setOperationAction(ISD::ROTR, VT, Expand);
648 
649       for (MVT InnerVT : MVT::vector_valuetypes()) {
650         setTruncStoreAction(VT, InnerVT, Expand);
651         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
652         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
653         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
654       }
655     }
656     if (!Subtarget.hasP8Vector()) {
657       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
658       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
659       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
660       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
661     }
662 
663     for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
664       setOperationAction(ISD::ABS, VT, Custom);
665 
666     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
667     // with merges, splats, etc.
668     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
669 
670     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
671     // are cheap, so handle them before they get expanded to scalar.
672     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
673     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
674     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
675     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
676     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
677 
678     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
679     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
680     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
681     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
682     setOperationAction(ISD::SELECT, MVT::v4i32,
683                        Subtarget.useCRBits() ? Legal : Expand);
684     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
685     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
686     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
687     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
688     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
689     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
690     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
691     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
692     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
693 
694     // Without hasP8Altivec set, v2i64 SMAX isn't available.
695     // But ABS custom lowering requires SMAX support.
696     if (!Subtarget.hasP8Altivec())
697       setOperationAction(ISD::ABS, MVT::v2i64, Expand);
698 
699     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
700     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
701     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
702     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
703 
704     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
705     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
706 
707     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
708       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
709       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
710     }
711 
712     if (Subtarget.hasP8Altivec())
713       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
714     else
715       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
716 
717     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
718     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
719 
720     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
721     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
722 
723     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
724     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
725     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
726     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
727 
728     // Altivec does not contain unordered floating-point compare instructions
729     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
730     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
731     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
732     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
733 
734     if (Subtarget.hasVSX()) {
735       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
736       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
737       if (Subtarget.hasP8Vector()) {
738         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
739         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
740       }
741       if (Subtarget.hasDirectMove() && isPPC64) {
742         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
743         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
744         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
745         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
746         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
747         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
748         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
749         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
750       }
751       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
752 
753       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
754       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
755       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
756       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
757       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
758 
759       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
760 
761       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
762       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
763 
764       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
765       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
766 
767       // Share the Altivec comparison restrictions.
768       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
769       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
770       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
771       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
772 
773       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
774       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
775 
776       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
777 
778       if (Subtarget.hasP8Vector())
779         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
780 
781       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
782 
783       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
784       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
785       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
786 
787       if (Subtarget.hasP8Altivec()) {
788         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
789         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
790         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
791 
792         // 128 bit shifts can be accomplished via 3 instructions for SHL and
793         // SRL, but not for SRA because of the instructions available:
794         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
795         // doing
796         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
797         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
798         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
799 
800         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
801       }
802       else {
803         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
804         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
805         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
806 
807         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
808 
809         // VSX v2i64 only supports non-arithmetic operations.
810         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
811         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
812       }
813 
814       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
815       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
816       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
817       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
818 
819       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
820 
821       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
822       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
823       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
824       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
825 
826       // Custom handling for partial vectors of integers converted to
827       // floating point. We already have optimal handling for v2i32 through
828       // the DAG combine, so those aren't necessary.
829       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
830       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
831       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
832       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
833       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
834       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
835       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
836       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
837 
838       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
839       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
840       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
841       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
842       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
843       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
844 
845       if (Subtarget.hasDirectMove())
846         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
847       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
848 
849       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
850     }
851 
852     if (Subtarget.hasP8Altivec()) {
853       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
854       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
855     }
856 
857     if (Subtarget.hasP9Vector()) {
858       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
859       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
860 
861       // 128 bit shifts can be accomplished via 3 instructions for SHL and
862       // SRL, but not for SRA because of the instructions available:
863       // VS{RL} and VS{RL}O.
864       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
865       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
866       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
867 
868       if (EnableQuadPrecision) {
869         addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
870         setOperationAction(ISD::FADD, MVT::f128, Legal);
871         setOperationAction(ISD::FSUB, MVT::f128, Legal);
872         setOperationAction(ISD::FDIV, MVT::f128, Legal);
873         setOperationAction(ISD::FMUL, MVT::f128, Legal);
874         setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
875         // No extending loads to f128 on PPC.
876         for (MVT FPT : MVT::fp_valuetypes())
877           setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
878         setOperationAction(ISD::FMA, MVT::f128, Legal);
879         setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
880         setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
881         setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
882         setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
883         setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
884         setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
885 
886         setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
887         setOperationAction(ISD::FRINT, MVT::f128, Legal);
888         setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
889         setOperationAction(ISD::FCEIL, MVT::f128, Legal);
890         setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
891         setOperationAction(ISD::FROUND, MVT::f128, Legal);
892 
893         setOperationAction(ISD::SELECT, MVT::f128, Expand);
894         setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
895         setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
896         setTruncStoreAction(MVT::f128, MVT::f64, Expand);
897         setTruncStoreAction(MVT::f128, MVT::f32, Expand);
898         setOperationAction(ISD::BITCAST, MVT::i128, Custom);
899         // No implementation for these ops for PowerPC.
900         setOperationAction(ISD::FSIN , MVT::f128, Expand);
901         setOperationAction(ISD::FCOS , MVT::f128, Expand);
902         setOperationAction(ISD::FPOW, MVT::f128, Expand);
903         setOperationAction(ISD::FPOWI, MVT::f128, Expand);
904         setOperationAction(ISD::FREM, MVT::f128, Expand);
905       }
906       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
907 
908     }
909 
910     if (Subtarget.hasP9Altivec()) {
911       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
912       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
913     }
914   }
915 
916   if (Subtarget.hasQPX()) {
917     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
918     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
919     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
920     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
921 
922     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
923     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
924 
925     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
926     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
927 
928     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
929     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
930 
931     if (!Subtarget.useCRBits())
932       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
933     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
934 
935     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
936     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
937     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
938     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
939     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
940     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
941     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
942 
943     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
944     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
945 
946     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
947     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
948     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
949 
950     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
951     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
952     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
953     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
954     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
955     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
956     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
957     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
958     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
959     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
960 
961     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
962     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
963 
964     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
965     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
966 
967     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
968 
969     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
970     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
971     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
972     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
973 
974     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
975     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
976 
977     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
978     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
979 
980     if (!Subtarget.useCRBits())
981       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
982     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
983 
984     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
985     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
986     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
987     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
988     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
989     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
990     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
991 
992     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
993     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
994 
995     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
996     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
997     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
998     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
999     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
1000     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
1001     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
1002     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
1003     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
1004     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
1005 
1006     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1007     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1008 
1009     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
1010     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
1011 
1012     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
1013 
1014     setOperationAction(ISD::AND , MVT::v4i1, Legal);
1015     setOperationAction(ISD::OR , MVT::v4i1, Legal);
1016     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
1017 
1018     if (!Subtarget.useCRBits())
1019       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
1020     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
1021 
1022     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
1023     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
1024 
1025     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
1026     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
1027     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
1028     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
1029     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
1030     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
1031     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1032 
1033     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1034     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1035 
1036     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
1037 
1038     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1039     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
1040     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1041     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
1042 
1043     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1044     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
1045     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1046     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1047 
1048     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
1049     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
1050 
1051     // These need to set FE_INEXACT, and so cannot be vectorized here.
1052     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
1053     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
1054 
1055     if (TM.Options.UnsafeFPMath) {
1056       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1057       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1058 
1059       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
1060       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
1061     } else {
1062       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
1063       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
1064 
1065       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
1066       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
1067     }
1068   }
1069 
1070   if (Subtarget.has64BitSupport())
1071     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1072 
1073   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1074 
1075   if (!isPPC64) {
1076     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1077     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1078   }
1079 
1080   setBooleanContents(ZeroOrOneBooleanContent);
1081 
1082   if (Subtarget.hasAltivec()) {
1083     // Altivec instructions set fields to all zeros or all ones.
1084     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1085   }
1086 
1087   if (!isPPC64) {
1088     // These libcalls are not available in 32-bit.
1089     setLibcallName(RTLIB::SHL_I128, nullptr);
1090     setLibcallName(RTLIB::SRL_I128, nullptr);
1091     setLibcallName(RTLIB::SRA_I128, nullptr);
1092   }
1093 
1094   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1095 
1096   // We have target-specific dag combine patterns for the following nodes:
1097   setTargetDAGCombine(ISD::ADD);
1098   setTargetDAGCombine(ISD::SHL);
1099   setTargetDAGCombine(ISD::SRA);
1100   setTargetDAGCombine(ISD::SRL);
1101   setTargetDAGCombine(ISD::MUL);
1102   setTargetDAGCombine(ISD::SINT_TO_FP);
1103   setTargetDAGCombine(ISD::BUILD_VECTOR);
1104   if (Subtarget.hasFPCVT())
1105     setTargetDAGCombine(ISD::UINT_TO_FP);
1106   setTargetDAGCombine(ISD::LOAD);
1107   setTargetDAGCombine(ISD::STORE);
1108   setTargetDAGCombine(ISD::BR_CC);
1109   if (Subtarget.useCRBits())
1110     setTargetDAGCombine(ISD::BRCOND);
1111   setTargetDAGCombine(ISD::BSWAP);
1112   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1113   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1114   setTargetDAGCombine(ISD::INTRINSIC_VOID);
1115 
1116   setTargetDAGCombine(ISD::SIGN_EXTEND);
1117   setTargetDAGCombine(ISD::ZERO_EXTEND);
1118   setTargetDAGCombine(ISD::ANY_EXTEND);
1119 
1120   setTargetDAGCombine(ISD::TRUNCATE);
1121 
1122   if (Subtarget.useCRBits()) {
1123     setTargetDAGCombine(ISD::TRUNCATE);
1124     setTargetDAGCombine(ISD::SETCC);
1125     setTargetDAGCombine(ISD::SELECT_CC);
1126   }
1127 
1128   // Use reciprocal estimates.
1129   if (TM.Options.UnsafeFPMath) {
1130     setTargetDAGCombine(ISD::FDIV);
1131     setTargetDAGCombine(ISD::FSQRT);
1132   }
1133 
1134   if (Subtarget.hasP9Altivec()) {
1135     setTargetDAGCombine(ISD::ABS);
1136     setTargetDAGCombine(ISD::VSELECT);
1137   }
1138 
1139   // Darwin long double math library functions have $LDBL128 appended.
1140   if (Subtarget.isDarwin()) {
1141     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
1142     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
1143     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
1144     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
1145     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
1146     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
1147     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1148     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1149     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1150     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1151   }
1152 
1153   if (EnableQuadPrecision) {
1154     setLibcallName(RTLIB::LOG_F128, "logf128");
1155     setLibcallName(RTLIB::LOG2_F128, "log2f128");
1156     setLibcallName(RTLIB::LOG10_F128, "log10f128");
1157     setLibcallName(RTLIB::EXP_F128, "expf128");
1158     setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1159     setLibcallName(RTLIB::SIN_F128, "sinf128");
1160     setLibcallName(RTLIB::COS_F128, "cosf128");
1161     setLibcallName(RTLIB::POW_F128, "powf128");
1162     setLibcallName(RTLIB::FMIN_F128, "fminf128");
1163     setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1164     setLibcallName(RTLIB::POWI_F128, "__powikf2");
1165     setLibcallName(RTLIB::REM_F128, "fmodf128");
1166   }
1167 
1168   // With 32 condition bits, we don't need to sink (and duplicate) compares
1169   // aggressively in CodeGenPrep.
1170   if (Subtarget.useCRBits()) {
1171     setHasMultipleConditionRegisters();
1172     setJumpIsExpensive();
1173   }
1174 
1175   setMinFunctionAlignment(2);
1176   if (Subtarget.isDarwin())
1177     setPrefFunctionAlignment(4);
1178 
1179   switch (Subtarget.getDarwinDirective()) {
1180   default: break;
1181   case PPC::DIR_970:
1182   case PPC::DIR_A2:
1183   case PPC::DIR_E500:
1184   case PPC::DIR_E500mc:
1185   case PPC::DIR_E5500:
1186   case PPC::DIR_PWR4:
1187   case PPC::DIR_PWR5:
1188   case PPC::DIR_PWR5X:
1189   case PPC::DIR_PWR6:
1190   case PPC::DIR_PWR6X:
1191   case PPC::DIR_PWR7:
1192   case PPC::DIR_PWR8:
1193   case PPC::DIR_PWR9:
1194     setPrefFunctionAlignment(4);
1195     setPrefLoopAlignment(4);
1196     break;
1197   }
1198 
1199   if (Subtarget.enableMachineScheduler())
1200     setSchedulingPreference(Sched::Source);
1201   else
1202     setSchedulingPreference(Sched::Hybrid);
1203 
1204   computeRegisterProperties(STI.getRegisterInfo());
1205 
1206   // The Freescale cores do better with aggressive inlining of memcpy and
1207   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1208   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1209       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
1210     MaxStoresPerMemset = 32;
1211     MaxStoresPerMemsetOptSize = 16;
1212     MaxStoresPerMemcpy = 32;
1213     MaxStoresPerMemcpyOptSize = 8;
1214     MaxStoresPerMemmove = 32;
1215     MaxStoresPerMemmoveOptSize = 8;
1216   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1217     // The A2 also benefits from (very) aggressive inlining of memcpy and
1218     // friends. The overhead of a the function call, even when warm, can be
1219     // over one hundred cycles.
1220     MaxStoresPerMemset = 128;
1221     MaxStoresPerMemcpy = 128;
1222     MaxStoresPerMemmove = 128;
1223     MaxLoadsPerMemcmp = 128;
1224   } else {
1225     MaxLoadsPerMemcmp = 8;
1226     MaxLoadsPerMemcmpOptSize = 4;
1227   }
1228 }
1229 
1230 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1231 /// the desired ByVal argument alignment.
1232 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1233                              unsigned MaxMaxAlign) {
1234   if (MaxAlign == MaxMaxAlign)
1235     return;
1236   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1237     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
1238       MaxAlign = 32;
1239     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1240       MaxAlign = 16;
1241   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1242     unsigned EltAlign = 0;
1243     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1244     if (EltAlign > MaxAlign)
1245       MaxAlign = EltAlign;
1246   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1247     for (auto *EltTy : STy->elements()) {
1248       unsigned EltAlign = 0;
1249       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1250       if (EltAlign > MaxAlign)
1251         MaxAlign = EltAlign;
1252       if (MaxAlign == MaxMaxAlign)
1253         break;
1254     }
1255   }
1256 }
1257 
1258 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1259 /// function arguments in the caller parameter area.
1260 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1261                                                   const DataLayout &DL) const {
1262   // Darwin passes everything on 4 byte boundary.
1263   if (Subtarget.isDarwin())
1264     return 4;
1265 
1266   // 16byte and wider vectors are passed on 16byte boundary.
1267   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1268   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1269   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1270     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1271   return Align;
1272 }
1273 
1274 bool PPCTargetLowering::useSoftFloat() const {
1275   return Subtarget.useSoftFloat();
1276 }
1277 
1278 bool PPCTargetLowering::hasSPE() const {
1279   return Subtarget.hasSPE();
1280 }
1281 
1282 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1283   switch ((PPCISD::NodeType)Opcode) {
1284   case PPCISD::FIRST_NUMBER:    break;
1285   case PPCISD::FSEL:            return "PPCISD::FSEL";
1286   case PPCISD::FCFID:           return "PPCISD::FCFID";
1287   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1288   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1289   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1290   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1291   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1292   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1293   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1294   case PPCISD::FP_TO_UINT_IN_VSR:
1295                                 return "PPCISD::FP_TO_UINT_IN_VSR,";
1296   case PPCISD::FP_TO_SINT_IN_VSR:
1297                                 return "PPCISD::FP_TO_SINT_IN_VSR";
1298   case PPCISD::FRE:             return "PPCISD::FRE";
1299   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1300   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1301   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1302   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1303   case PPCISD::VPERM:           return "PPCISD::VPERM";
1304   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1305   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1306   case PPCISD::XXREVERSE:       return "PPCISD::XXREVERSE";
1307   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1308   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1309   case PPCISD::CMPB:            return "PPCISD::CMPB";
1310   case PPCISD::Hi:              return "PPCISD::Hi";
1311   case PPCISD::Lo:              return "PPCISD::Lo";
1312   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1313   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1314   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1315   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1316   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1317   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1318   case PPCISD::SRL:             return "PPCISD::SRL";
1319   case PPCISD::SRA:             return "PPCISD::SRA";
1320   case PPCISD::SHL:             return "PPCISD::SHL";
1321   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1322   case PPCISD::CALL:            return "PPCISD::CALL";
1323   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1324   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1325   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1326   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1327   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1328   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1329   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1330   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1331   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1332   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1333   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1334   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1335   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1336   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1337   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1338   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1339   case PPCISD::VCMP:            return "PPCISD::VCMP";
1340   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1341   case PPCISD::LBRX:            return "PPCISD::LBRX";
1342   case PPCISD::STBRX:           return "PPCISD::STBRX";
1343   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1344   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1345   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1346   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1347   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1348   case PPCISD::SExtVElems:      return "PPCISD::SExtVElems";
1349   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1350   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1351   case PPCISD::ST_VSR_SCAL_INT:
1352                                 return "PPCISD::ST_VSR_SCAL_INT";
1353   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1354   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1355   case PPCISD::BDZ:             return "PPCISD::BDZ";
1356   case PPCISD::MFFS:            return "PPCISD::MFFS";
1357   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1358   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1359   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1360   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1361   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1362   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1363   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1364   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1365   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1366   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1367   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1368   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1369   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1370   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1371   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1372   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1373   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1374   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1375   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1376   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1377   case PPCISD::SC:              return "PPCISD::SC";
1378   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1379   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1380   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1381   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1382   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1383   case PPCISD::VABSD:           return "PPCISD::VABSD";
1384   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1385   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1386   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1387   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1388   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1389   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1390   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1391   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1392   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1393   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1394   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1395   case PPCISD::FP_EXTEND_LH:    return "PPCISD::FP_EXTEND_LH";
1396   }
1397   return nullptr;
1398 }
1399 
1400 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1401                                           EVT VT) const {
1402   if (!VT.isVector())
1403     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1404 
1405   if (Subtarget.hasQPX())
1406     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1407 
1408   return VT.changeVectorElementTypeToInteger();
1409 }
1410 
1411 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1412   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1413   return true;
1414 }
1415 
1416 //===----------------------------------------------------------------------===//
1417 // Node matching predicates, for use by the tblgen matching code.
1418 //===----------------------------------------------------------------------===//
1419 
1420 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1421 static bool isFloatingPointZero(SDValue Op) {
1422   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1423     return CFP->getValueAPF().isZero();
1424   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1425     // Maybe this has already been legalized into the constant pool?
1426     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1427       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1428         return CFP->getValueAPF().isZero();
1429   }
1430   return false;
1431 }
1432 
1433 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1434 /// true if Op is undef or if it matches the specified value.
1435 static bool isConstantOrUndef(int Op, int Val) {
1436   return Op < 0 || Op == Val;
1437 }
1438 
1439 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1440 /// VPKUHUM instruction.
1441 /// The ShuffleKind distinguishes between big-endian operations with
1442 /// two different inputs (0), either-endian operations with two identical
1443 /// inputs (1), and little-endian operations with two different inputs (2).
1444 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1445 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1446                                SelectionDAG &DAG) {
1447   bool IsLE = DAG.getDataLayout().isLittleEndian();
1448   if (ShuffleKind == 0) {
1449     if (IsLE)
1450       return false;
1451     for (unsigned i = 0; i != 16; ++i)
1452       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1453         return false;
1454   } else if (ShuffleKind == 2) {
1455     if (!IsLE)
1456       return false;
1457     for (unsigned i = 0; i != 16; ++i)
1458       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1459         return false;
1460   } else if (ShuffleKind == 1) {
1461     unsigned j = IsLE ? 0 : 1;
1462     for (unsigned i = 0; i != 8; ++i)
1463       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1464           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1465         return false;
1466   }
1467   return true;
1468 }
1469 
1470 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1471 /// VPKUWUM instruction.
1472 /// The ShuffleKind distinguishes between big-endian operations with
1473 /// two different inputs (0), either-endian operations with two identical
1474 /// inputs (1), and little-endian operations with two different inputs (2).
1475 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1476 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1477                                SelectionDAG &DAG) {
1478   bool IsLE = DAG.getDataLayout().isLittleEndian();
1479   if (ShuffleKind == 0) {
1480     if (IsLE)
1481       return false;
1482     for (unsigned i = 0; i != 16; i += 2)
1483       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1484           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1485         return false;
1486   } else if (ShuffleKind == 2) {
1487     if (!IsLE)
1488       return false;
1489     for (unsigned i = 0; i != 16; i += 2)
1490       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1491           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1492         return false;
1493   } else if (ShuffleKind == 1) {
1494     unsigned j = IsLE ? 0 : 2;
1495     for (unsigned i = 0; i != 8; i += 2)
1496       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1497           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1498           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1499           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1500         return false;
1501   }
1502   return true;
1503 }
1504 
1505 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1506 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1507 /// current subtarget.
1508 ///
1509 /// The ShuffleKind distinguishes between big-endian operations with
1510 /// two different inputs (0), either-endian operations with two identical
1511 /// inputs (1), and little-endian operations with two different inputs (2).
1512 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1513 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1514                                SelectionDAG &DAG) {
1515   const PPCSubtarget& Subtarget =
1516     static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1517   if (!Subtarget.hasP8Vector())
1518     return false;
1519 
1520   bool IsLE = DAG.getDataLayout().isLittleEndian();
1521   if (ShuffleKind == 0) {
1522     if (IsLE)
1523       return false;
1524     for (unsigned i = 0; i != 16; i += 4)
1525       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1526           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1527           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1528           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1529         return false;
1530   } else if (ShuffleKind == 2) {
1531     if (!IsLE)
1532       return false;
1533     for (unsigned i = 0; i != 16; i += 4)
1534       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1535           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1536           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1537           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1538         return false;
1539   } else if (ShuffleKind == 1) {
1540     unsigned j = IsLE ? 0 : 4;
1541     for (unsigned i = 0; i != 8; i += 4)
1542       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1543           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1544           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1545           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1546           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1547           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1548           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1549           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1550         return false;
1551   }
1552   return true;
1553 }
1554 
1555 /// isVMerge - Common function, used to match vmrg* shuffles.
1556 ///
1557 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1558                      unsigned LHSStart, unsigned RHSStart) {
1559   if (N->getValueType(0) != MVT::v16i8)
1560     return false;
1561   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1562          "Unsupported merge size!");
1563 
1564   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1565     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1566       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1567                              LHSStart+j+i*UnitSize) ||
1568           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1569                              RHSStart+j+i*UnitSize))
1570         return false;
1571     }
1572   return true;
1573 }
1574 
1575 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1576 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1577 /// The ShuffleKind distinguishes between big-endian merges with two
1578 /// different inputs (0), either-endian merges with two identical inputs (1),
1579 /// and little-endian merges with two different inputs (2).  For the latter,
1580 /// the input operands are swapped (see PPCInstrAltivec.td).
1581 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1582                              unsigned ShuffleKind, SelectionDAG &DAG) {
1583   if (DAG.getDataLayout().isLittleEndian()) {
1584     if (ShuffleKind == 1) // unary
1585       return isVMerge(N, UnitSize, 0, 0);
1586     else if (ShuffleKind == 2) // swapped
1587       return isVMerge(N, UnitSize, 0, 16);
1588     else
1589       return false;
1590   } else {
1591     if (ShuffleKind == 1) // unary
1592       return isVMerge(N, UnitSize, 8, 8);
1593     else if (ShuffleKind == 0) // normal
1594       return isVMerge(N, UnitSize, 8, 24);
1595     else
1596       return false;
1597   }
1598 }
1599 
1600 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1601 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1602 /// The ShuffleKind distinguishes between big-endian merges with two
1603 /// different inputs (0), either-endian merges with two identical inputs (1),
1604 /// and little-endian merges with two different inputs (2).  For the latter,
1605 /// the input operands are swapped (see PPCInstrAltivec.td).
1606 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1607                              unsigned ShuffleKind, SelectionDAG &DAG) {
1608   if (DAG.getDataLayout().isLittleEndian()) {
1609     if (ShuffleKind == 1) // unary
1610       return isVMerge(N, UnitSize, 8, 8);
1611     else if (ShuffleKind == 2) // swapped
1612       return isVMerge(N, UnitSize, 8, 24);
1613     else
1614       return false;
1615   } else {
1616     if (ShuffleKind == 1) // unary
1617       return isVMerge(N, UnitSize, 0, 0);
1618     else if (ShuffleKind == 0) // normal
1619       return isVMerge(N, UnitSize, 0, 16);
1620     else
1621       return false;
1622   }
1623 }
1624 
1625 /**
1626  * Common function used to match vmrgew and vmrgow shuffles
1627  *
1628  * The indexOffset determines whether to look for even or odd words in
1629  * the shuffle mask. This is based on the of the endianness of the target
1630  * machine.
1631  *   - Little Endian:
1632  *     - Use offset of 0 to check for odd elements
1633  *     - Use offset of 4 to check for even elements
1634  *   - Big Endian:
1635  *     - Use offset of 0 to check for even elements
1636  *     - Use offset of 4 to check for odd elements
1637  * A detailed description of the vector element ordering for little endian and
1638  * big endian can be found at
1639  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1640  * Targeting your applications - what little endian and big endian IBM XL C/C++
1641  * compiler differences mean to you
1642  *
1643  * The mask to the shuffle vector instruction specifies the indices of the
1644  * elements from the two input vectors to place in the result. The elements are
1645  * numbered in array-access order, starting with the first vector. These vectors
1646  * are always of type v16i8, thus each vector will contain 16 elements of size
1647  * 8. More info on the shuffle vector can be found in the
1648  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1649  * Language Reference.
1650  *
1651  * The RHSStartValue indicates whether the same input vectors are used (unary)
1652  * or two different input vectors are used, based on the following:
1653  *   - If the instruction uses the same vector for both inputs, the range of the
1654  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1655  *     be 0.
1656  *   - If the instruction has two different vectors then the range of the
1657  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1658  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1659  *     to 31 specify elements in the second vector).
1660  *
1661  * \param[in] N The shuffle vector SD Node to analyze
1662  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1663  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1664  * vector to the shuffle_vector instruction
1665  * \return true iff this shuffle vector represents an even or odd word merge
1666  */
1667 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1668                      unsigned RHSStartValue) {
1669   if (N->getValueType(0) != MVT::v16i8)
1670     return false;
1671 
1672   for (unsigned i = 0; i < 2; ++i)
1673     for (unsigned j = 0; j < 4; ++j)
1674       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1675                              i*RHSStartValue+j+IndexOffset) ||
1676           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1677                              i*RHSStartValue+j+IndexOffset+8))
1678         return false;
1679   return true;
1680 }
1681 
1682 /**
1683  * Determine if the specified shuffle mask is suitable for the vmrgew or
1684  * vmrgow instructions.
1685  *
1686  * \param[in] N The shuffle vector SD Node to analyze
1687  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1688  * \param[in] ShuffleKind Identify the type of merge:
1689  *   - 0 = big-endian merge with two different inputs;
1690  *   - 1 = either-endian merge with two identical inputs;
1691  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1692  *     little-endian merges).
1693  * \param[in] DAG The current SelectionDAG
1694  * \return true iff this shuffle mask
1695  */
1696 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1697                               unsigned ShuffleKind, SelectionDAG &DAG) {
1698   if (DAG.getDataLayout().isLittleEndian()) {
1699     unsigned indexOffset = CheckEven ? 4 : 0;
1700     if (ShuffleKind == 1) // Unary
1701       return isVMerge(N, indexOffset, 0);
1702     else if (ShuffleKind == 2) // swapped
1703       return isVMerge(N, indexOffset, 16);
1704     else
1705       return false;
1706   }
1707   else {
1708     unsigned indexOffset = CheckEven ? 0 : 4;
1709     if (ShuffleKind == 1) // Unary
1710       return isVMerge(N, indexOffset, 0);
1711     else if (ShuffleKind == 0) // Normal
1712       return isVMerge(N, indexOffset, 16);
1713     else
1714       return false;
1715   }
1716   return false;
1717 }
1718 
1719 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1720 /// amount, otherwise return -1.
1721 /// The ShuffleKind distinguishes between big-endian operations with two
1722 /// different inputs (0), either-endian operations with two identical inputs
1723 /// (1), and little-endian operations with two different inputs (2).  For the
1724 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1725 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1726                              SelectionDAG &DAG) {
1727   if (N->getValueType(0) != MVT::v16i8)
1728     return -1;
1729 
1730   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1731 
1732   // Find the first non-undef value in the shuffle mask.
1733   unsigned i;
1734   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1735     /*search*/;
1736 
1737   if (i == 16) return -1;  // all undef.
1738 
1739   // Otherwise, check to see if the rest of the elements are consecutively
1740   // numbered from this value.
1741   unsigned ShiftAmt = SVOp->getMaskElt(i);
1742   if (ShiftAmt < i) return -1;
1743 
1744   ShiftAmt -= i;
1745   bool isLE = DAG.getDataLayout().isLittleEndian();
1746 
1747   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1748     // Check the rest of the elements to see if they are consecutive.
1749     for (++i; i != 16; ++i)
1750       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1751         return -1;
1752   } else if (ShuffleKind == 1) {
1753     // Check the rest of the elements to see if they are consecutive.
1754     for (++i; i != 16; ++i)
1755       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1756         return -1;
1757   } else
1758     return -1;
1759 
1760   if (isLE)
1761     ShiftAmt = 16 - ShiftAmt;
1762 
1763   return ShiftAmt;
1764 }
1765 
1766 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1767 /// specifies a splat of a single element that is suitable for input to
1768 /// VSPLTB/VSPLTH/VSPLTW.
1769 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1770   assert(N->getValueType(0) == MVT::v16i8 &&
1771          (EltSize == 1 || EltSize == 2 || EltSize == 4));
1772 
1773   // The consecutive indices need to specify an element, not part of two
1774   // different elements.  So abandon ship early if this isn't the case.
1775   if (N->getMaskElt(0) % EltSize != 0)
1776     return false;
1777 
1778   // This is a splat operation if each element of the permute is the same, and
1779   // if the value doesn't reference the second vector.
1780   unsigned ElementBase = N->getMaskElt(0);
1781 
1782   // FIXME: Handle UNDEF elements too!
1783   if (ElementBase >= 16)
1784     return false;
1785 
1786   // Check that the indices are consecutive, in the case of a multi-byte element
1787   // splatted with a v16i8 mask.
1788   for (unsigned i = 1; i != EltSize; ++i)
1789     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1790       return false;
1791 
1792   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1793     if (N->getMaskElt(i) < 0) continue;
1794     for (unsigned j = 0; j != EltSize; ++j)
1795       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1796         return false;
1797   }
1798   return true;
1799 }
1800 
1801 /// Check that the mask is shuffling N byte elements. Within each N byte
1802 /// element of the mask, the indices could be either in increasing or
1803 /// decreasing order as long as they are consecutive.
1804 /// \param[in] N the shuffle vector SD Node to analyze
1805 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1806 /// Word/DoubleWord/QuadWord).
1807 /// \param[in] StepLen the delta indices number among the N byte element, if
1808 /// the mask is in increasing/decreasing order then it is 1/-1.
1809 /// \return true iff the mask is shuffling N byte elements.
1810 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1811                                    int StepLen) {
1812   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1813          "Unexpected element width.");
1814   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1815 
1816   unsigned NumOfElem = 16 / Width;
1817   unsigned MaskVal[16]; //  Width is never greater than 16
1818   for (unsigned i = 0; i < NumOfElem; ++i) {
1819     MaskVal[0] = N->getMaskElt(i * Width);
1820     if ((StepLen == 1) && (MaskVal[0] % Width)) {
1821       return false;
1822     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1823       return false;
1824     }
1825 
1826     for (unsigned int j = 1; j < Width; ++j) {
1827       MaskVal[j] = N->getMaskElt(i * Width + j);
1828       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1829         return false;
1830       }
1831     }
1832   }
1833 
1834   return true;
1835 }
1836 
1837 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1838                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1839   if (!isNByteElemShuffleMask(N, 4, 1))
1840     return false;
1841 
1842   // Now we look at mask elements 0,4,8,12
1843   unsigned M0 = N->getMaskElt(0) / 4;
1844   unsigned M1 = N->getMaskElt(4) / 4;
1845   unsigned M2 = N->getMaskElt(8) / 4;
1846   unsigned M3 = N->getMaskElt(12) / 4;
1847   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1848   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1849 
1850   // Below, let H and L be arbitrary elements of the shuffle mask
1851   // where H is in the range [4,7] and L is in the range [0,3].
1852   // H, 1, 2, 3 or L, 5, 6, 7
1853   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1854       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1855     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1856     InsertAtByte = IsLE ? 12 : 0;
1857     Swap = M0 < 4;
1858     return true;
1859   }
1860   // 0, H, 2, 3 or 4, L, 6, 7
1861   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1862       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1863     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1864     InsertAtByte = IsLE ? 8 : 4;
1865     Swap = M1 < 4;
1866     return true;
1867   }
1868   // 0, 1, H, 3 or 4, 5, L, 7
1869   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1870       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1871     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1872     InsertAtByte = IsLE ? 4 : 8;
1873     Swap = M2 < 4;
1874     return true;
1875   }
1876   // 0, 1, 2, H or 4, 5, 6, L
1877   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1878       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1879     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1880     InsertAtByte = IsLE ? 0 : 12;
1881     Swap = M3 < 4;
1882     return true;
1883   }
1884 
1885   // If both vector operands for the shuffle are the same vector, the mask will
1886   // contain only elements from the first one and the second one will be undef.
1887   if (N->getOperand(1).isUndef()) {
1888     ShiftElts = 0;
1889     Swap = true;
1890     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1891     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1892       InsertAtByte = IsLE ? 12 : 0;
1893       return true;
1894     }
1895     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1896       InsertAtByte = IsLE ? 8 : 4;
1897       return true;
1898     }
1899     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1900       InsertAtByte = IsLE ? 4 : 8;
1901       return true;
1902     }
1903     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1904       InsertAtByte = IsLE ? 0 : 12;
1905       return true;
1906     }
1907   }
1908 
1909   return false;
1910 }
1911 
1912 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1913                                bool &Swap, bool IsLE) {
1914   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1915   // Ensure each byte index of the word is consecutive.
1916   if (!isNByteElemShuffleMask(N, 4, 1))
1917     return false;
1918 
1919   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1920   unsigned M0 = N->getMaskElt(0) / 4;
1921   unsigned M1 = N->getMaskElt(4) / 4;
1922   unsigned M2 = N->getMaskElt(8) / 4;
1923   unsigned M3 = N->getMaskElt(12) / 4;
1924 
1925   // If both vector operands for the shuffle are the same vector, the mask will
1926   // contain only elements from the first one and the second one will be undef.
1927   if (N->getOperand(1).isUndef()) {
1928     assert(M0 < 4 && "Indexing into an undef vector?");
1929     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
1930       return false;
1931 
1932     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
1933     Swap = false;
1934     return true;
1935   }
1936 
1937   // Ensure each word index of the ShuffleVector Mask is consecutive.
1938   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
1939     return false;
1940 
1941   if (IsLE) {
1942     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
1943       // Input vectors don't need to be swapped if the leading element
1944       // of the result is one of the 3 left elements of the second vector
1945       // (or if there is no shift to be done at all).
1946       Swap = false;
1947       ShiftElts = (8 - M0) % 8;
1948     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
1949       // Input vectors need to be swapped if the leading element
1950       // of the result is one of the 3 left elements of the first vector
1951       // (or if we're shifting by 4 - thereby simply swapping the vectors).
1952       Swap = true;
1953       ShiftElts = (4 - M0) % 4;
1954     }
1955 
1956     return true;
1957   } else {                                          // BE
1958     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
1959       // Input vectors don't need to be swapped if the leading element
1960       // of the result is one of the 4 elements of the first vector.
1961       Swap = false;
1962       ShiftElts = M0;
1963     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
1964       // Input vectors need to be swapped if the leading element
1965       // of the result is one of the 4 elements of the right vector.
1966       Swap = true;
1967       ShiftElts = M0 - 4;
1968     }
1969 
1970     return true;
1971   }
1972 }
1973 
1974 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1975   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1976 
1977   if (!isNByteElemShuffleMask(N, Width, -1))
1978     return false;
1979 
1980   for (int i = 0; i < 16; i += Width)
1981     if (N->getMaskElt(i) != i + Width - 1)
1982       return false;
1983 
1984   return true;
1985 }
1986 
1987 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
1988   return isXXBRShuffleMaskHelper(N, 2);
1989 }
1990 
1991 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
1992   return isXXBRShuffleMaskHelper(N, 4);
1993 }
1994 
1995 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
1996   return isXXBRShuffleMaskHelper(N, 8);
1997 }
1998 
1999 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2000   return isXXBRShuffleMaskHelper(N, 16);
2001 }
2002 
2003 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2004 /// if the inputs to the instruction should be swapped and set \p DM to the
2005 /// value for the immediate.
2006 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2007 /// AND element 0 of the result comes from the first input (LE) or second input
2008 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2009 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2010 /// mask.
2011 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2012                                bool &Swap, bool IsLE) {
2013   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2014 
2015   // Ensure each byte index of the double word is consecutive.
2016   if (!isNByteElemShuffleMask(N, 8, 1))
2017     return false;
2018 
2019   unsigned M0 = N->getMaskElt(0) / 8;
2020   unsigned M1 = N->getMaskElt(8) / 8;
2021   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2022 
2023   // If both vector operands for the shuffle are the same vector, the mask will
2024   // contain only elements from the first one and the second one will be undef.
2025   if (N->getOperand(1).isUndef()) {
2026     if ((M0 | M1) < 2) {
2027       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2028       Swap = false;
2029       return true;
2030     } else
2031       return false;
2032   }
2033 
2034   if (IsLE) {
2035     if (M0 > 1 && M1 < 2) {
2036       Swap = false;
2037     } else if (M0 < 2 && M1 > 1) {
2038       M0 = (M0 + 2) % 4;
2039       M1 = (M1 + 2) % 4;
2040       Swap = true;
2041     } else
2042       return false;
2043 
2044     // Note: if control flow comes here that means Swap is already set above
2045     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2046     return true;
2047   } else { // BE
2048     if (M0 < 2 && M1 > 1) {
2049       Swap = false;
2050     } else if (M0 > 1 && M1 < 2) {
2051       M0 = (M0 + 2) % 4;
2052       M1 = (M1 + 2) % 4;
2053       Swap = true;
2054     } else
2055       return false;
2056 
2057     // Note: if control flow comes here that means Swap is already set above
2058     DM = (M0 << 1) + (M1 & 1);
2059     return true;
2060   }
2061 }
2062 
2063 
2064 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2065 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2066 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
2067                                 SelectionDAG &DAG) {
2068   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2069   assert(isSplatShuffleMask(SVOp, EltSize));
2070   if (DAG.getDataLayout().isLittleEndian())
2071     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2072   else
2073     return SVOp->getMaskElt(0) / EltSize;
2074 }
2075 
2076 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2077 /// by using a vspltis[bhw] instruction of the specified element size, return
2078 /// the constant being splatted.  The ByteSize field indicates the number of
2079 /// bytes of each element [124] -> [bhw].
2080 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2081   SDValue OpVal(nullptr, 0);
2082 
2083   // If ByteSize of the splat is bigger than the element size of the
2084   // build_vector, then we have a case where we are checking for a splat where
2085   // multiple elements of the buildvector are folded together into a single
2086   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2087   unsigned EltSize = 16/N->getNumOperands();
2088   if (EltSize < ByteSize) {
2089     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2090     SDValue UniquedVals[4];
2091     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2092 
2093     // See if all of the elements in the buildvector agree across.
2094     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2095       if (N->getOperand(i).isUndef()) continue;
2096       // If the element isn't a constant, bail fully out.
2097       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2098 
2099       if (!UniquedVals[i&(Multiple-1)].getNode())
2100         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2101       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2102         return SDValue();  // no match.
2103     }
2104 
2105     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2106     // either constant or undef values that are identical for each chunk.  See
2107     // if these chunks can form into a larger vspltis*.
2108 
2109     // Check to see if all of the leading entries are either 0 or -1.  If
2110     // neither, then this won't fit into the immediate field.
2111     bool LeadingZero = true;
2112     bool LeadingOnes = true;
2113     for (unsigned i = 0; i != Multiple-1; ++i) {
2114       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2115 
2116       LeadingZero &= isNullConstant(UniquedVals[i]);
2117       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2118     }
2119     // Finally, check the least significant entry.
2120     if (LeadingZero) {
2121       if (!UniquedVals[Multiple-1].getNode())
2122         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2123       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2124       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2125         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2126     }
2127     if (LeadingOnes) {
2128       if (!UniquedVals[Multiple-1].getNode())
2129         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2130       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2131       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2132         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2133     }
2134 
2135     return SDValue();
2136   }
2137 
2138   // Check to see if this buildvec has a single non-undef value in its elements.
2139   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2140     if (N->getOperand(i).isUndef()) continue;
2141     if (!OpVal.getNode())
2142       OpVal = N->getOperand(i);
2143     else if (OpVal != N->getOperand(i))
2144       return SDValue();
2145   }
2146 
2147   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2148 
2149   unsigned ValSizeInBytes = EltSize;
2150   uint64_t Value = 0;
2151   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2152     Value = CN->getZExtValue();
2153   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2154     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2155     Value = FloatToBits(CN->getValueAPF().convertToFloat());
2156   }
2157 
2158   // If the splat value is larger than the element value, then we can never do
2159   // this splat.  The only case that we could fit the replicated bits into our
2160   // immediate field for would be zero, and we prefer to use vxor for it.
2161   if (ValSizeInBytes < ByteSize) return SDValue();
2162 
2163   // If the element value is larger than the splat value, check if it consists
2164   // of a repeated bit pattern of size ByteSize.
2165   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2166     return SDValue();
2167 
2168   // Properly sign extend the value.
2169   int MaskVal = SignExtend32(Value, ByteSize * 8);
2170 
2171   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2172   if (MaskVal == 0) return SDValue();
2173 
2174   // Finally, if this value fits in a 5 bit sext field, return it
2175   if (SignExtend32<5>(MaskVal) == MaskVal)
2176     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2177   return SDValue();
2178 }
2179 
2180 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2181 /// amount, otherwise return -1.
2182 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2183   EVT VT = N->getValueType(0);
2184   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2185     return -1;
2186 
2187   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2188 
2189   // Find the first non-undef value in the shuffle mask.
2190   unsigned i;
2191   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2192     /*search*/;
2193 
2194   if (i == 4) return -1;  // all undef.
2195 
2196   // Otherwise, check to see if the rest of the elements are consecutively
2197   // numbered from this value.
2198   unsigned ShiftAmt = SVOp->getMaskElt(i);
2199   if (ShiftAmt < i) return -1;
2200   ShiftAmt -= i;
2201 
2202   // Check the rest of the elements to see if they are consecutive.
2203   for (++i; i != 4; ++i)
2204     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2205       return -1;
2206 
2207   return ShiftAmt;
2208 }
2209 
2210 //===----------------------------------------------------------------------===//
2211 //  Addressing Mode Selection
2212 //===----------------------------------------------------------------------===//
2213 
2214 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2215 /// or 64-bit immediate, and if the value can be accurately represented as a
2216 /// sign extension from a 16-bit value.  If so, this returns true and the
2217 /// immediate.
2218 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2219   if (!isa<ConstantSDNode>(N))
2220     return false;
2221 
2222   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2223   if (N->getValueType(0) == MVT::i32)
2224     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2225   else
2226     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2227 }
2228 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2229   return isIntS16Immediate(Op.getNode(), Imm);
2230 }
2231 
2232 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2233 /// can be represented as an indexed [r+r] operation.  Returns false if it
2234 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2235 /// non-zero and N can be represented by a base register plus a signed 16-bit
2236 /// displacement, make a more precise judgement by checking (displacement % \p
2237 /// EncodingAlignment).
2238 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2239                                             SDValue &Index, SelectionDAG &DAG,
2240                                             unsigned EncodingAlignment) const {
2241   int16_t imm = 0;
2242   if (N.getOpcode() == ISD::ADD) {
2243     if (isIntS16Immediate(N.getOperand(1), imm) &&
2244         (!EncodingAlignment || !(imm % EncodingAlignment)))
2245       return false; // r+i
2246     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2247       return false;    // r+i
2248 
2249     Base = N.getOperand(0);
2250     Index = N.getOperand(1);
2251     return true;
2252   } else if (N.getOpcode() == ISD::OR) {
2253     if (isIntS16Immediate(N.getOperand(1), imm) &&
2254         (!EncodingAlignment || !(imm % EncodingAlignment)))
2255       return false; // r+i can fold it if we can.
2256 
2257     // If this is an or of disjoint bitfields, we can codegen this as an add
2258     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2259     // disjoint.
2260     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2261 
2262     if (LHSKnown.Zero.getBoolValue()) {
2263       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2264       // If all of the bits are known zero on the LHS or RHS, the add won't
2265       // carry.
2266       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2267         Base = N.getOperand(0);
2268         Index = N.getOperand(1);
2269         return true;
2270       }
2271     }
2272   }
2273 
2274   return false;
2275 }
2276 
2277 // If we happen to be doing an i64 load or store into a stack slot that has
2278 // less than a 4-byte alignment, then the frame-index elimination may need to
2279 // use an indexed load or store instruction (because the offset may not be a
2280 // multiple of 4). The extra register needed to hold the offset comes from the
2281 // register scavenger, and it is possible that the scavenger will need to use
2282 // an emergency spill slot. As a result, we need to make sure that a spill slot
2283 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2284 // stack slot.
2285 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2286   // FIXME: This does not handle the LWA case.
2287   if (VT != MVT::i64)
2288     return;
2289 
2290   // NOTE: We'll exclude negative FIs here, which come from argument
2291   // lowering, because there are no known test cases triggering this problem
2292   // using packed structures (or similar). We can remove this exclusion if
2293   // we find such a test case. The reason why this is so test-case driven is
2294   // because this entire 'fixup' is only to prevent crashes (from the
2295   // register scavenger) on not-really-valid inputs. For example, if we have:
2296   //   %a = alloca i1
2297   //   %b = bitcast i1* %a to i64*
2298   //   store i64* a, i64 b
2299   // then the store should really be marked as 'align 1', but is not. If it
2300   // were marked as 'align 1' then the indexed form would have been
2301   // instruction-selected initially, and the problem this 'fixup' is preventing
2302   // won't happen regardless.
2303   if (FrameIdx < 0)
2304     return;
2305 
2306   MachineFunction &MF = DAG.getMachineFunction();
2307   MachineFrameInfo &MFI = MF.getFrameInfo();
2308 
2309   unsigned Align = MFI.getObjectAlignment(FrameIdx);
2310   if (Align >= 4)
2311     return;
2312 
2313   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2314   FuncInfo->setHasNonRISpills();
2315 }
2316 
2317 /// Returns true if the address N can be represented by a base register plus
2318 /// a signed 16-bit displacement [r+imm], and if it is not better
2319 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2320 /// displacements that are multiples of that value.
2321 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2322                                             SDValue &Base,
2323                                             SelectionDAG &DAG,
2324                                             unsigned EncodingAlignment) const {
2325   // FIXME dl should come from parent load or store, not from address
2326   SDLoc dl(N);
2327   // If this can be more profitably realized as r+r, fail.
2328   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2329     return false;
2330 
2331   if (N.getOpcode() == ISD::ADD) {
2332     int16_t imm = 0;
2333     if (isIntS16Immediate(N.getOperand(1), imm) &&
2334         (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2335       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2336       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2337         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2338         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2339       } else {
2340         Base = N.getOperand(0);
2341       }
2342       return true; // [r+i]
2343     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2344       // Match LOAD (ADD (X, Lo(G))).
2345       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2346              && "Cannot handle constant offsets yet!");
2347       Disp = N.getOperand(1).getOperand(0);  // The global address.
2348       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2349              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2350              Disp.getOpcode() == ISD::TargetConstantPool ||
2351              Disp.getOpcode() == ISD::TargetJumpTable);
2352       Base = N.getOperand(0);
2353       return true;  // [&g+r]
2354     }
2355   } else if (N.getOpcode() == ISD::OR) {
2356     int16_t imm = 0;
2357     if (isIntS16Immediate(N.getOperand(1), imm) &&
2358         (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2359       // If this is an or of disjoint bitfields, we can codegen this as an add
2360       // (for better address arithmetic) if the LHS and RHS of the OR are
2361       // provably disjoint.
2362       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2363 
2364       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2365         // If all of the bits are known zero on the LHS or RHS, the add won't
2366         // carry.
2367         if (FrameIndexSDNode *FI =
2368               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2369           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2370           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2371         } else {
2372           Base = N.getOperand(0);
2373         }
2374         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2375         return true;
2376       }
2377     }
2378   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2379     // Loading from a constant address.
2380 
2381     // If this address fits entirely in a 16-bit sext immediate field, codegen
2382     // this as "d, 0"
2383     int16_t Imm;
2384     if (isIntS16Immediate(CN, Imm) &&
2385         (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
2386       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2387       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2388                              CN->getValueType(0));
2389       return true;
2390     }
2391 
2392     // Handle 32-bit sext immediates with LIS + addr mode.
2393     if ((CN->getValueType(0) == MVT::i32 ||
2394          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2395         (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
2396       int Addr = (int)CN->getZExtValue();
2397 
2398       // Otherwise, break this down into an LIS + disp.
2399       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2400 
2401       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2402                                    MVT::i32);
2403       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2404       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2405       return true;
2406     }
2407   }
2408 
2409   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2410   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2411     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2412     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2413   } else
2414     Base = N;
2415   return true;      // [r+0]
2416 }
2417 
2418 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2419 /// represented as an indexed [r+r] operation.
2420 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2421                                                 SDValue &Index,
2422                                                 SelectionDAG &DAG) const {
2423   // Check to see if we can easily represent this as an [r+r] address.  This
2424   // will fail if it thinks that the address is more profitably represented as
2425   // reg+imm, e.g. where imm = 0.
2426   if (SelectAddressRegReg(N, Base, Index, DAG))
2427     return true;
2428 
2429   // If the address is the result of an add, we will utilize the fact that the
2430   // address calculation includes an implicit add.  However, we can reduce
2431   // register pressure if we do not materialize a constant just for use as the
2432   // index register.  We only get rid of the add if it is not an add of a
2433   // value and a 16-bit signed constant and both have a single use.
2434   int16_t imm = 0;
2435   if (N.getOpcode() == ISD::ADD &&
2436       (!isIntS16Immediate(N.getOperand(1), imm) ||
2437        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2438     Base = N.getOperand(0);
2439     Index = N.getOperand(1);
2440     return true;
2441   }
2442 
2443   // Otherwise, do it the hard way, using R0 as the base register.
2444   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2445                          N.getValueType());
2446   Index = N;
2447   return true;
2448 }
2449 
2450 /// Returns true if we should use a direct load into vector instruction
2451 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2452 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2453 
2454   // If there are any other uses other than scalar to vector, then we should
2455   // keep it as a scalar load -> direct move pattern to prevent multiple
2456   // loads.
2457   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2458   if (!LD)
2459     return false;
2460 
2461   EVT MemVT = LD->getMemoryVT();
2462   if (!MemVT.isSimple())
2463     return false;
2464   switch(MemVT.getSimpleVT().SimpleTy) {
2465   case MVT::i64:
2466     break;
2467   case MVT::i32:
2468     if (!ST.hasP8Vector())
2469       return false;
2470     break;
2471   case MVT::i16:
2472   case MVT::i8:
2473     if (!ST.hasP9Vector())
2474       return false;
2475     break;
2476   default:
2477     return false;
2478   }
2479 
2480   SDValue LoadedVal(N, 0);
2481   if (!LoadedVal.hasOneUse())
2482     return false;
2483 
2484   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2485        UI != UE; ++UI)
2486     if (UI.getUse().get().getResNo() == 0 &&
2487         UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
2488       return false;
2489 
2490   return true;
2491 }
2492 
2493 /// getPreIndexedAddressParts - returns true by value, base pointer and
2494 /// offset pointer and addressing mode by reference if the node's address
2495 /// can be legally represented as pre-indexed load / store address.
2496 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2497                                                   SDValue &Offset,
2498                                                   ISD::MemIndexedMode &AM,
2499                                                   SelectionDAG &DAG) const {
2500   if (DisablePPCPreinc) return false;
2501 
2502   bool isLoad = true;
2503   SDValue Ptr;
2504   EVT VT;
2505   unsigned Alignment;
2506   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2507     Ptr = LD->getBasePtr();
2508     VT = LD->getMemoryVT();
2509     Alignment = LD->getAlignment();
2510   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2511     Ptr = ST->getBasePtr();
2512     VT  = ST->getMemoryVT();
2513     Alignment = ST->getAlignment();
2514     isLoad = false;
2515   } else
2516     return false;
2517 
2518   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2519   // instructions because we can fold these into a more efficient instruction
2520   // instead, (such as LXSD).
2521   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2522     return false;
2523   }
2524 
2525   // PowerPC doesn't have preinc load/store instructions for vectors (except
2526   // for QPX, which does have preinc r+r forms).
2527   if (VT.isVector()) {
2528     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2529       return false;
2530     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2531       AM = ISD::PRE_INC;
2532       return true;
2533     }
2534   }
2535 
2536   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2537     // Common code will reject creating a pre-inc form if the base pointer
2538     // is a frame index, or if N is a store and the base pointer is either
2539     // the same as or a predecessor of the value being stored.  Check for
2540     // those situations here, and try with swapped Base/Offset instead.
2541     bool Swap = false;
2542 
2543     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2544       Swap = true;
2545     else if (!isLoad) {
2546       SDValue Val = cast<StoreSDNode>(N)->getValue();
2547       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2548         Swap = true;
2549     }
2550 
2551     if (Swap)
2552       std::swap(Base, Offset);
2553 
2554     AM = ISD::PRE_INC;
2555     return true;
2556   }
2557 
2558   // LDU/STU can only handle immediates that are a multiple of 4.
2559   if (VT != MVT::i64) {
2560     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2561       return false;
2562   } else {
2563     // LDU/STU need an address with at least 4-byte alignment.
2564     if (Alignment < 4)
2565       return false;
2566 
2567     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
2568       return false;
2569   }
2570 
2571   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2572     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2573     // sext i32 to i64 when addr mode is r+i.
2574     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2575         LD->getExtensionType() == ISD::SEXTLOAD &&
2576         isa<ConstantSDNode>(Offset))
2577       return false;
2578   }
2579 
2580   AM = ISD::PRE_INC;
2581   return true;
2582 }
2583 
2584 //===----------------------------------------------------------------------===//
2585 //  LowerOperation implementation
2586 //===----------------------------------------------------------------------===//
2587 
2588 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2589 /// and LoOpFlags to the target MO flags.
2590 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2591                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2592                                const GlobalValue *GV = nullptr) {
2593   HiOpFlags = PPCII::MO_HA;
2594   LoOpFlags = PPCII::MO_LO;
2595 
2596   // Don't use the pic base if not in PIC relocation model.
2597   if (IsPIC) {
2598     HiOpFlags |= PPCII::MO_PIC_FLAG;
2599     LoOpFlags |= PPCII::MO_PIC_FLAG;
2600   }
2601 
2602   // If this is a reference to a global value that requires a non-lazy-ptr, make
2603   // sure that instruction lowering adds it.
2604   if (GV && Subtarget.hasLazyResolverStub(GV)) {
2605     HiOpFlags |= PPCII::MO_NLP_FLAG;
2606     LoOpFlags |= PPCII::MO_NLP_FLAG;
2607 
2608     if (GV->hasHiddenVisibility()) {
2609       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2610       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2611     }
2612   }
2613 }
2614 
2615 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2616                              SelectionDAG &DAG) {
2617   SDLoc DL(HiPart);
2618   EVT PtrVT = HiPart.getValueType();
2619   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2620 
2621   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2622   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2623 
2624   // With PIC, the first instruction is actually "GR+hi(&G)".
2625   if (isPIC)
2626     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2627                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2628 
2629   // Generate non-pic code that has direct accesses to the constant pool.
2630   // The address of the global is just (hi(&g)+lo(&g)).
2631   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2632 }
2633 
2634 static void setUsesTOCBasePtr(MachineFunction &MF) {
2635   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2636   FuncInfo->setUsesTOCBasePtr();
2637 }
2638 
2639 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2640   setUsesTOCBasePtr(DAG.getMachineFunction());
2641 }
2642 
2643 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2644                            SDValue GA) {
2645   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2646   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2647                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2648 
2649   SDValue Ops[] = { GA, Reg };
2650   return DAG.getMemIntrinsicNode(
2651       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2652       MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0,
2653       MachineMemOperand::MOLoad);
2654 }
2655 
2656 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2657                                              SelectionDAG &DAG) const {
2658   EVT PtrVT = Op.getValueType();
2659   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2660   const Constant *C = CP->getConstVal();
2661 
2662   // 64-bit SVR4 ABI code is always position-independent.
2663   // The actual address of the GlobalValue is stored in the TOC.
2664   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2665     setUsesTOCBasePtr(DAG);
2666     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2667     return getTOCEntry(DAG, SDLoc(CP), true, GA);
2668   }
2669 
2670   unsigned MOHiFlag, MOLoFlag;
2671   bool IsPIC = isPositionIndependent();
2672   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2673 
2674   if (IsPIC && Subtarget.isSVR4ABI()) {
2675     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2676                                            PPCII::MO_PIC_FLAG);
2677     return getTOCEntry(DAG, SDLoc(CP), false, GA);
2678   }
2679 
2680   SDValue CPIHi =
2681     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2682   SDValue CPILo =
2683     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2684   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2685 }
2686 
2687 // For 64-bit PowerPC, prefer the more compact relative encodings.
2688 // This trades 32 bits per jump table entry for one or two instructions
2689 // on the jump site.
2690 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2691   if (isJumpTableRelative())
2692     return MachineJumpTableInfo::EK_LabelDifference32;
2693 
2694   return TargetLowering::getJumpTableEncoding();
2695 }
2696 
2697 bool PPCTargetLowering::isJumpTableRelative() const {
2698   if (Subtarget.isPPC64())
2699     return true;
2700   return TargetLowering::isJumpTableRelative();
2701 }
2702 
2703 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2704                                                     SelectionDAG &DAG) const {
2705   if (!Subtarget.isPPC64())
2706     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2707 
2708   switch (getTargetMachine().getCodeModel()) {
2709   case CodeModel::Small:
2710   case CodeModel::Medium:
2711     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2712   default:
2713     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2714                        getPointerTy(DAG.getDataLayout()));
2715   }
2716 }
2717 
2718 const MCExpr *
2719 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2720                                                 unsigned JTI,
2721                                                 MCContext &Ctx) const {
2722   if (!Subtarget.isPPC64())
2723     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2724 
2725   switch (getTargetMachine().getCodeModel()) {
2726   case CodeModel::Small:
2727   case CodeModel::Medium:
2728     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2729   default:
2730     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2731   }
2732 }
2733 
2734 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2735   EVT PtrVT = Op.getValueType();
2736   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2737 
2738   // 64-bit SVR4 ABI code is always position-independent.
2739   // The actual address of the GlobalValue is stored in the TOC.
2740   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2741     setUsesTOCBasePtr(DAG);
2742     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2743     return getTOCEntry(DAG, SDLoc(JT), true, GA);
2744   }
2745 
2746   unsigned MOHiFlag, MOLoFlag;
2747   bool IsPIC = isPositionIndependent();
2748   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2749 
2750   if (IsPIC && Subtarget.isSVR4ABI()) {
2751     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2752                                         PPCII::MO_PIC_FLAG);
2753     return getTOCEntry(DAG, SDLoc(GA), false, GA);
2754   }
2755 
2756   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2757   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2758   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2759 }
2760 
2761 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2762                                              SelectionDAG &DAG) const {
2763   EVT PtrVT = Op.getValueType();
2764   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2765   const BlockAddress *BA = BASDN->getBlockAddress();
2766 
2767   // 64-bit SVR4 ABI code is always position-independent.
2768   // The actual BlockAddress is stored in the TOC.
2769   if (Subtarget.isSVR4ABI() &&
2770       (Subtarget.isPPC64() || isPositionIndependent())) {
2771     if (Subtarget.isPPC64())
2772       setUsesTOCBasePtr(DAG);
2773     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2774     return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA);
2775   }
2776 
2777   unsigned MOHiFlag, MOLoFlag;
2778   bool IsPIC = isPositionIndependent();
2779   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2780   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2781   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2782   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2783 }
2784 
2785 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2786                                               SelectionDAG &DAG) const {
2787   // FIXME: TLS addresses currently use medium model code sequences,
2788   // which is the most useful form.  Eventually support for small and
2789   // large models could be added if users need it, at the cost of
2790   // additional complexity.
2791   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2792   if (DAG.getTarget().useEmulatedTLS())
2793     return LowerToTLSEmulatedModel(GA, DAG);
2794 
2795   SDLoc dl(GA);
2796   const GlobalValue *GV = GA->getGlobal();
2797   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2798   bool is64bit = Subtarget.isPPC64();
2799   const Module *M = DAG.getMachineFunction().getFunction().getParent();
2800   PICLevel::Level picLevel = M->getPICLevel();
2801 
2802   const TargetMachine &TM = getTargetMachine();
2803   TLSModel::Model Model = TM.getTLSModel(GV);
2804 
2805   if (Model == TLSModel::LocalExec) {
2806     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2807                                                PPCII::MO_TPREL_HA);
2808     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2809                                                PPCII::MO_TPREL_LO);
2810     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2811                              : DAG.getRegister(PPC::R2, MVT::i32);
2812 
2813     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2814     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2815   }
2816 
2817   if (Model == TLSModel::InitialExec) {
2818     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2819     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2820                                                 PPCII::MO_TLS);
2821     SDValue GOTPtr;
2822     if (is64bit) {
2823       setUsesTOCBasePtr(DAG);
2824       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2825       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2826                            PtrVT, GOTReg, TGA);
2827     } else {
2828       if (!TM.isPositionIndependent())
2829         GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2830       else if (picLevel == PICLevel::SmallPIC)
2831         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2832       else
2833         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2834     }
2835     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2836                                    PtrVT, TGA, GOTPtr);
2837     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2838   }
2839 
2840   if (Model == TLSModel::GeneralDynamic) {
2841     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2842     SDValue GOTPtr;
2843     if (is64bit) {
2844       setUsesTOCBasePtr(DAG);
2845       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2846       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2847                                    GOTReg, TGA);
2848     } else {
2849       if (picLevel == PICLevel::SmallPIC)
2850         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2851       else
2852         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2853     }
2854     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2855                        GOTPtr, TGA, TGA);
2856   }
2857 
2858   if (Model == TLSModel::LocalDynamic) {
2859     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2860     SDValue GOTPtr;
2861     if (is64bit) {
2862       setUsesTOCBasePtr(DAG);
2863       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2864       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2865                            GOTReg, TGA);
2866     } else {
2867       if (picLevel == PICLevel::SmallPIC)
2868         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2869       else
2870         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2871     }
2872     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2873                                   PtrVT, GOTPtr, TGA, TGA);
2874     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2875                                       PtrVT, TLSAddr, TGA);
2876     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2877   }
2878 
2879   llvm_unreachable("Unknown TLS model!");
2880 }
2881 
2882 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2883                                               SelectionDAG &DAG) const {
2884   EVT PtrVT = Op.getValueType();
2885   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2886   SDLoc DL(GSDN);
2887   const GlobalValue *GV = GSDN->getGlobal();
2888 
2889   // 64-bit SVR4 ABI code is always position-independent.
2890   // The actual address of the GlobalValue is stored in the TOC.
2891   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2892     setUsesTOCBasePtr(DAG);
2893     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2894     return getTOCEntry(DAG, DL, true, GA);
2895   }
2896 
2897   unsigned MOHiFlag, MOLoFlag;
2898   bool IsPIC = isPositionIndependent();
2899   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2900 
2901   if (IsPIC && Subtarget.isSVR4ABI()) {
2902     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2903                                             GSDN->getOffset(),
2904                                             PPCII::MO_PIC_FLAG);
2905     return getTOCEntry(DAG, DL, false, GA);
2906   }
2907 
2908   SDValue GAHi =
2909     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2910   SDValue GALo =
2911     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2912 
2913   SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2914 
2915   // If the global reference is actually to a non-lazy-pointer, we have to do an
2916   // extra load to get the address of the global.
2917   if (MOHiFlag & PPCII::MO_NLP_FLAG)
2918     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2919   return Ptr;
2920 }
2921 
2922 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2923   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2924   SDLoc dl(Op);
2925 
2926   if (Op.getValueType() == MVT::v2i64) {
2927     // When the operands themselves are v2i64 values, we need to do something
2928     // special because VSX has no underlying comparison operations for these.
2929     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2930       // Equality can be handled by casting to the legal type for Altivec
2931       // comparisons, everything else needs to be expanded.
2932       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2933         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2934                  DAG.getSetCC(dl, MVT::v4i32,
2935                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2936                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2937                    CC));
2938       }
2939 
2940       return SDValue();
2941     }
2942 
2943     // We handle most of these in the usual way.
2944     return Op;
2945   }
2946 
2947   // If we're comparing for equality to zero, expose the fact that this is
2948   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2949   // fold the new nodes.
2950   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2951     return V;
2952 
2953   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2954     // Leave comparisons against 0 and -1 alone for now, since they're usually
2955     // optimized.  FIXME: revisit this when we can custom lower all setcc
2956     // optimizations.
2957     if (C->isAllOnesValue() || C->isNullValue())
2958       return SDValue();
2959   }
2960 
2961   // If we have an integer seteq/setne, turn it into a compare against zero
2962   // by xor'ing the rhs with the lhs, which is faster than setting a
2963   // condition register, reading it back out, and masking the correct bit.  The
2964   // normal approach here uses sub to do this instead of xor.  Using xor exposes
2965   // the result to other bit-twiddling opportunities.
2966   EVT LHSVT = Op.getOperand(0).getValueType();
2967   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2968     EVT VT = Op.getValueType();
2969     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2970                                 Op.getOperand(1));
2971     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2972   }
2973   return SDValue();
2974 }
2975 
2976 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2977   SDNode *Node = Op.getNode();
2978   EVT VT = Node->getValueType(0);
2979   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2980   SDValue InChain = Node->getOperand(0);
2981   SDValue VAListPtr = Node->getOperand(1);
2982   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2983   SDLoc dl(Node);
2984 
2985   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2986 
2987   // gpr_index
2988   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2989                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
2990   InChain = GprIndex.getValue(1);
2991 
2992   if (VT == MVT::i64) {
2993     // Check if GprIndex is even
2994     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2995                                  DAG.getConstant(1, dl, MVT::i32));
2996     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2997                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2998     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2999                                           DAG.getConstant(1, dl, MVT::i32));
3000     // Align GprIndex to be even if it isn't
3001     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3002                            GprIndex);
3003   }
3004 
3005   // fpr index is 1 byte after gpr
3006   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3007                                DAG.getConstant(1, dl, MVT::i32));
3008 
3009   // fpr
3010   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3011                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3012   InChain = FprIndex.getValue(1);
3013 
3014   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3015                                        DAG.getConstant(8, dl, MVT::i32));
3016 
3017   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3018                                         DAG.getConstant(4, dl, MVT::i32));
3019 
3020   // areas
3021   SDValue OverflowArea =
3022       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3023   InChain = OverflowArea.getValue(1);
3024 
3025   SDValue RegSaveArea =
3026       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3027   InChain = RegSaveArea.getValue(1);
3028 
3029   // select overflow_area if index > 8
3030   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3031                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3032 
3033   // adjustment constant gpr_index * 4/8
3034   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3035                                     VT.isInteger() ? GprIndex : FprIndex,
3036                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3037                                                     MVT::i32));
3038 
3039   // OurReg = RegSaveArea + RegConstant
3040   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3041                                RegConstant);
3042 
3043   // Floating types are 32 bytes into RegSaveArea
3044   if (VT.isFloatingPoint())
3045     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3046                          DAG.getConstant(32, dl, MVT::i32));
3047 
3048   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3049   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3050                                    VT.isInteger() ? GprIndex : FprIndex,
3051                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3052                                                    MVT::i32));
3053 
3054   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3055                               VT.isInteger() ? VAListPtr : FprPtr,
3056                               MachinePointerInfo(SV), MVT::i8);
3057 
3058   // determine if we should load from reg_save_area or overflow_area
3059   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3060 
3061   // increase overflow_area by 4/8 if gpr/fpr > 8
3062   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3063                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3064                                           dl, MVT::i32));
3065 
3066   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3067                              OverflowAreaPlusN);
3068 
3069   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3070                               MachinePointerInfo(), MVT::i32);
3071 
3072   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3073 }
3074 
3075 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3076   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3077 
3078   // We have to copy the entire va_list struct:
3079   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3080   return DAG.getMemcpy(Op.getOperand(0), Op,
3081                        Op.getOperand(1), Op.getOperand(2),
3082                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
3083                        false, MachinePointerInfo(), MachinePointerInfo());
3084 }
3085 
3086 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3087                                                   SelectionDAG &DAG) const {
3088   return Op.getOperand(0);
3089 }
3090 
3091 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3092                                                 SelectionDAG &DAG) const {
3093   SDValue Chain = Op.getOperand(0);
3094   SDValue Trmp = Op.getOperand(1); // trampoline
3095   SDValue FPtr = Op.getOperand(2); // nested function
3096   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3097   SDLoc dl(Op);
3098 
3099   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3100   bool isPPC64 = (PtrVT == MVT::i64);
3101   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3102 
3103   TargetLowering::ArgListTy Args;
3104   TargetLowering::ArgListEntry Entry;
3105 
3106   Entry.Ty = IntPtrTy;
3107   Entry.Node = Trmp; Args.push_back(Entry);
3108 
3109   // TrampSize == (isPPC64 ? 48 : 40);
3110   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3111                                isPPC64 ? MVT::i64 : MVT::i32);
3112   Args.push_back(Entry);
3113 
3114   Entry.Node = FPtr; Args.push_back(Entry);
3115   Entry.Node = Nest; Args.push_back(Entry);
3116 
3117   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3118   TargetLowering::CallLoweringInfo CLI(DAG);
3119   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3120       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3121       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3122 
3123   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3124   return CallResult.second;
3125 }
3126 
3127 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3128   MachineFunction &MF = DAG.getMachineFunction();
3129   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3130   EVT PtrVT = getPointerTy(MF.getDataLayout());
3131 
3132   SDLoc dl(Op);
3133 
3134   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
3135     // vastart just stores the address of the VarArgsFrameIndex slot into the
3136     // memory location argument.
3137     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3138     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3139     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3140                         MachinePointerInfo(SV));
3141   }
3142 
3143   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3144   // We suppose the given va_list is already allocated.
3145   //
3146   // typedef struct {
3147   //  char gpr;     /* index into the array of 8 GPRs
3148   //                 * stored in the register save area
3149   //                 * gpr=0 corresponds to r3,
3150   //                 * gpr=1 to r4, etc.
3151   //                 */
3152   //  char fpr;     /* index into the array of 8 FPRs
3153   //                 * stored in the register save area
3154   //                 * fpr=0 corresponds to f1,
3155   //                 * fpr=1 to f2, etc.
3156   //                 */
3157   //  char *overflow_arg_area;
3158   //                /* location on stack that holds
3159   //                 * the next overflow argument
3160   //                 */
3161   //  char *reg_save_area;
3162   //               /* where r3:r10 and f1:f8 (if saved)
3163   //                * are stored
3164   //                */
3165   // } va_list[1];
3166 
3167   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3168   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3169   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3170                                             PtrVT);
3171   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3172                                  PtrVT);
3173 
3174   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3175   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3176 
3177   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3178   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3179 
3180   uint64_t FPROffset = 1;
3181   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3182 
3183   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3184 
3185   // Store first byte : number of int regs
3186   SDValue firstStore =
3187       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3188                         MachinePointerInfo(SV), MVT::i8);
3189   uint64_t nextOffset = FPROffset;
3190   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3191                                   ConstFPROffset);
3192 
3193   // Store second byte : number of float regs
3194   SDValue secondStore =
3195       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3196                         MachinePointerInfo(SV, nextOffset), MVT::i8);
3197   nextOffset += StackOffset;
3198   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3199 
3200   // Store second word : arguments given on stack
3201   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3202                                     MachinePointerInfo(SV, nextOffset));
3203   nextOffset += FrameOffset;
3204   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3205 
3206   // Store third word : arguments given in registers
3207   return DAG.getStore(thirdStore, dl, FR, nextPtr,
3208                       MachinePointerInfo(SV, nextOffset));
3209 }
3210 
3211 /// FPR - The set of FP registers that should be allocated for arguments,
3212 /// on Darwin.
3213 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3214                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3215                                 PPC::F11, PPC::F12, PPC::F13};
3216 
3217 /// QFPR - The set of QPX registers that should be allocated for arguments.
3218 static const MCPhysReg QFPR[] = {
3219     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
3220     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3221 
3222 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3223 /// the stack.
3224 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3225                                        unsigned PtrByteSize) {
3226   unsigned ArgSize = ArgVT.getStoreSize();
3227   if (Flags.isByVal())
3228     ArgSize = Flags.getByValSize();
3229 
3230   // Round up to multiples of the pointer size, except for array members,
3231   // which are always packed.
3232   if (!Flags.isInConsecutiveRegs())
3233     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3234 
3235   return ArgSize;
3236 }
3237 
3238 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3239 /// on the stack.
3240 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3241                                             ISD::ArgFlagsTy Flags,
3242                                             unsigned PtrByteSize) {
3243   unsigned Align = PtrByteSize;
3244 
3245   // Altivec parameters are padded to a 16 byte boundary.
3246   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3247       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3248       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3249       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3250     Align = 16;
3251   // QPX vector types stored in double-precision are padded to a 32 byte
3252   // boundary.
3253   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3254     Align = 32;
3255 
3256   // ByVal parameters are aligned as requested.
3257   if (Flags.isByVal()) {
3258     unsigned BVAlign = Flags.getByValAlign();
3259     if (BVAlign > PtrByteSize) {
3260       if (BVAlign % PtrByteSize != 0)
3261           llvm_unreachable(
3262             "ByVal alignment is not a multiple of the pointer size");
3263 
3264       Align = BVAlign;
3265     }
3266   }
3267 
3268   // Array members are always packed to their original alignment.
3269   if (Flags.isInConsecutiveRegs()) {
3270     // If the array member was split into multiple registers, the first
3271     // needs to be aligned to the size of the full type.  (Except for
3272     // ppcf128, which is only aligned as its f64 components.)
3273     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3274       Align = OrigVT.getStoreSize();
3275     else
3276       Align = ArgVT.getStoreSize();
3277   }
3278 
3279   return Align;
3280 }
3281 
3282 /// CalculateStackSlotUsed - Return whether this argument will use its
3283 /// stack slot (instead of being passed in registers).  ArgOffset,
3284 /// AvailableFPRs, and AvailableVRs must hold the current argument
3285 /// position, and will be updated to account for this argument.
3286 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3287                                    ISD::ArgFlagsTy Flags,
3288                                    unsigned PtrByteSize,
3289                                    unsigned LinkageSize,
3290                                    unsigned ParamAreaSize,
3291                                    unsigned &ArgOffset,
3292                                    unsigned &AvailableFPRs,
3293                                    unsigned &AvailableVRs, bool HasQPX) {
3294   bool UseMemory = false;
3295 
3296   // Respect alignment of argument on the stack.
3297   unsigned Align =
3298     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3299   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3300   // If there's no space left in the argument save area, we must
3301   // use memory (this check also catches zero-sized arguments).
3302   if (ArgOffset >= LinkageSize + ParamAreaSize)
3303     UseMemory = true;
3304 
3305   // Allocate argument on the stack.
3306   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3307   if (Flags.isInConsecutiveRegsLast())
3308     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3309   // If we overran the argument save area, we must use memory
3310   // (this check catches arguments passed partially in memory)
3311   if (ArgOffset > LinkageSize + ParamAreaSize)
3312     UseMemory = true;
3313 
3314   // However, if the argument is actually passed in an FPR or a VR,
3315   // we don't use memory after all.
3316   if (!Flags.isByVal()) {
3317     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3318         // QPX registers overlap with the scalar FP registers.
3319         (HasQPX && (ArgVT == MVT::v4f32 ||
3320                     ArgVT == MVT::v4f64 ||
3321                     ArgVT == MVT::v4i1)))
3322       if (AvailableFPRs > 0) {
3323         --AvailableFPRs;
3324         return false;
3325       }
3326     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3327         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3328         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3329         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3330       if (AvailableVRs > 0) {
3331         --AvailableVRs;
3332         return false;
3333       }
3334   }
3335 
3336   return UseMemory;
3337 }
3338 
3339 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3340 /// ensure minimum alignment required for target.
3341 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3342                                      unsigned NumBytes) {
3343   unsigned TargetAlign = Lowering->getStackAlignment();
3344   unsigned AlignMask = TargetAlign - 1;
3345   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3346   return NumBytes;
3347 }
3348 
3349 SDValue PPCTargetLowering::LowerFormalArguments(
3350     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3351     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3352     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3353   if (Subtarget.isSVR4ABI()) {
3354     if (Subtarget.isPPC64())
3355       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
3356                                          dl, DAG, InVals);
3357     else
3358       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
3359                                          dl, DAG, InVals);
3360   } else {
3361     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
3362                                        dl, DAG, InVals);
3363   }
3364 }
3365 
3366 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3367     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3368     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3369     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3370 
3371   // 32-bit SVR4 ABI Stack Frame Layout:
3372   //              +-----------------------------------+
3373   //        +-->  |            Back chain             |
3374   //        |     +-----------------------------------+
3375   //        |     | Floating-point register save area |
3376   //        |     +-----------------------------------+
3377   //        |     |    General register save area     |
3378   //        |     +-----------------------------------+
3379   //        |     |          CR save word             |
3380   //        |     +-----------------------------------+
3381   //        |     |         VRSAVE save word          |
3382   //        |     +-----------------------------------+
3383   //        |     |         Alignment padding         |
3384   //        |     +-----------------------------------+
3385   //        |     |     Vector register save area     |
3386   //        |     +-----------------------------------+
3387   //        |     |       Local variable space        |
3388   //        |     +-----------------------------------+
3389   //        |     |        Parameter list area        |
3390   //        |     +-----------------------------------+
3391   //        |     |           LR save word            |
3392   //        |     +-----------------------------------+
3393   // SP-->  +---  |            Back chain             |
3394   //              +-----------------------------------+
3395   //
3396   // Specifications:
3397   //   System V Application Binary Interface PowerPC Processor Supplement
3398   //   AltiVec Technology Programming Interface Manual
3399 
3400   MachineFunction &MF = DAG.getMachineFunction();
3401   MachineFrameInfo &MFI = MF.getFrameInfo();
3402   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3403 
3404   EVT PtrVT = getPointerTy(MF.getDataLayout());
3405   // Potential tail calls could cause overwriting of argument stack slots.
3406   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3407                        (CallConv == CallingConv::Fast));
3408   unsigned PtrByteSize = 4;
3409 
3410   // Assign locations to all of the incoming arguments.
3411   SmallVector<CCValAssign, 16> ArgLocs;
3412   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3413                  *DAG.getContext());
3414 
3415   // Reserve space for the linkage area on the stack.
3416   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3417   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3418   if (useSoftFloat())
3419     CCInfo.PreAnalyzeFormalArguments(Ins);
3420 
3421   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3422   CCInfo.clearWasPPCF128();
3423 
3424   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3425     CCValAssign &VA = ArgLocs[i];
3426 
3427     // Arguments stored in registers.
3428     if (VA.isRegLoc()) {
3429       const TargetRegisterClass *RC;
3430       EVT ValVT = VA.getValVT();
3431 
3432       switch (ValVT.getSimpleVT().SimpleTy) {
3433         default:
3434           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3435         case MVT::i1:
3436         case MVT::i32:
3437           RC = &PPC::GPRCRegClass;
3438           break;
3439         case MVT::f32:
3440           if (Subtarget.hasP8Vector())
3441             RC = &PPC::VSSRCRegClass;
3442           else if (Subtarget.hasSPE())
3443             RC = &PPC::SPE4RCRegClass;
3444           else
3445             RC = &PPC::F4RCRegClass;
3446           break;
3447         case MVT::f64:
3448           if (Subtarget.hasVSX())
3449             RC = &PPC::VSFRCRegClass;
3450           else if (Subtarget.hasSPE())
3451             // SPE passes doubles in GPR pairs.
3452             RC = &PPC::GPRCRegClass;
3453           else
3454             RC = &PPC::F8RCRegClass;
3455           break;
3456         case MVT::v16i8:
3457         case MVT::v8i16:
3458         case MVT::v4i32:
3459           RC = &PPC::VRRCRegClass;
3460           break;
3461         case MVT::v4f32:
3462           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3463           break;
3464         case MVT::v2f64:
3465         case MVT::v2i64:
3466           RC = &PPC::VRRCRegClass;
3467           break;
3468         case MVT::v4f64:
3469           RC = &PPC::QFRCRegClass;
3470           break;
3471         case MVT::v4i1:
3472           RC = &PPC::QBRCRegClass;
3473           break;
3474       }
3475 
3476       SDValue ArgValue;
3477       // Transform the arguments stored in physical registers into
3478       // virtual ones.
3479       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3480         assert(i + 1 < e && "No second half of double precision argument");
3481         unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3482         unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3483         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3484         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3485         if (!Subtarget.isLittleEndian())
3486           std::swap (ArgValueLo, ArgValueHi);
3487         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3488                                ArgValueHi);
3489       } else {
3490         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3491         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3492                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
3493         if (ValVT == MVT::i1)
3494           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3495       }
3496 
3497       InVals.push_back(ArgValue);
3498     } else {
3499       // Argument stored in memory.
3500       assert(VA.isMemLoc());
3501 
3502       // Get the extended size of the argument type in stack
3503       unsigned ArgSize = VA.getLocVT().getStoreSize();
3504       // Get the actual size of the argument type
3505       unsigned ObjSize = VA.getValVT().getStoreSize();
3506       unsigned ArgOffset = VA.getLocMemOffset();
3507       // Stack objects in PPC32 are right justified.
3508       ArgOffset += ArgSize - ObjSize;
3509       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3510 
3511       // Create load nodes to retrieve arguments from the stack.
3512       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3513       InVals.push_back(
3514           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3515     }
3516   }
3517 
3518   // Assign locations to all of the incoming aggregate by value arguments.
3519   // Aggregates passed by value are stored in the local variable space of the
3520   // caller's stack frame, right above the parameter list area.
3521   SmallVector<CCValAssign, 16> ByValArgLocs;
3522   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3523                       ByValArgLocs, *DAG.getContext());
3524 
3525   // Reserve stack space for the allocations in CCInfo.
3526   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3527 
3528   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3529 
3530   // Area that is at least reserved in the caller of this function.
3531   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3532   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3533 
3534   // Set the size that is at least reserved in caller of this function.  Tail
3535   // call optimized function's reserved stack space needs to be aligned so that
3536   // taking the difference between two stack areas will result in an aligned
3537   // stack.
3538   MinReservedArea =
3539       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3540   FuncInfo->setMinReservedArea(MinReservedArea);
3541 
3542   SmallVector<SDValue, 8> MemOps;
3543 
3544   // If the function takes variable number of arguments, make a frame index for
3545   // the start of the first vararg value... for expansion of llvm.va_start.
3546   if (isVarArg) {
3547     static const MCPhysReg GPArgRegs[] = {
3548       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3549       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3550     };
3551     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3552 
3553     static const MCPhysReg FPArgRegs[] = {
3554       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3555       PPC::F8
3556     };
3557     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3558 
3559     if (useSoftFloat() || hasSPE())
3560        NumFPArgRegs = 0;
3561 
3562     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3563     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3564 
3565     // Make room for NumGPArgRegs and NumFPArgRegs.
3566     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3567                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3568 
3569     FuncInfo->setVarArgsStackOffset(
3570       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3571                             CCInfo.getNextStackOffset(), true));
3572 
3573     FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3574     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3575 
3576     // The fixed integer arguments of a variadic function are stored to the
3577     // VarArgsFrameIndex on the stack so that they may be loaded by
3578     // dereferencing the result of va_next.
3579     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3580       // Get an existing live-in vreg, or add a new one.
3581       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3582       if (!VReg)
3583         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3584 
3585       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3586       SDValue Store =
3587           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3588       MemOps.push_back(Store);
3589       // Increment the address by four for the next argument to store
3590       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3591       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3592     }
3593 
3594     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3595     // is set.
3596     // The double arguments are stored to the VarArgsFrameIndex
3597     // on the stack.
3598     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3599       // Get an existing live-in vreg, or add a new one.
3600       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3601       if (!VReg)
3602         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3603 
3604       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3605       SDValue Store =
3606           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3607       MemOps.push_back(Store);
3608       // Increment the address by eight for the next argument to store
3609       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3610                                          PtrVT);
3611       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3612     }
3613   }
3614 
3615   if (!MemOps.empty())
3616     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3617 
3618   return Chain;
3619 }
3620 
3621 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3622 // value to MVT::i64 and then truncate to the correct register size.
3623 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3624                                              EVT ObjectVT, SelectionDAG &DAG,
3625                                              SDValue ArgVal,
3626                                              const SDLoc &dl) const {
3627   if (Flags.isSExt())
3628     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3629                          DAG.getValueType(ObjectVT));
3630   else if (Flags.isZExt())
3631     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3632                          DAG.getValueType(ObjectVT));
3633 
3634   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3635 }
3636 
3637 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3638     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3639     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3640     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3641   // TODO: add description of PPC stack frame format, or at least some docs.
3642   //
3643   bool isELFv2ABI = Subtarget.isELFv2ABI();
3644   bool isLittleEndian = Subtarget.isLittleEndian();
3645   MachineFunction &MF = DAG.getMachineFunction();
3646   MachineFrameInfo &MFI = MF.getFrameInfo();
3647   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3648 
3649   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3650          "fastcc not supported on varargs functions");
3651 
3652   EVT PtrVT = getPointerTy(MF.getDataLayout());
3653   // Potential tail calls could cause overwriting of argument stack slots.
3654   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3655                        (CallConv == CallingConv::Fast));
3656   unsigned PtrByteSize = 8;
3657   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3658 
3659   static const MCPhysReg GPR[] = {
3660     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3661     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3662   };
3663   static const MCPhysReg VR[] = {
3664     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3665     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3666   };
3667 
3668   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3669   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3670   const unsigned Num_VR_Regs  = array_lengthof(VR);
3671   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3672 
3673   // Do a first pass over the arguments to determine whether the ABI
3674   // guarantees that our caller has allocated the parameter save area
3675   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3676   // in the ELFv2 ABI, it is true if this is a vararg function or if
3677   // any parameter is located in a stack slot.
3678 
3679   bool HasParameterArea = !isELFv2ABI || isVarArg;
3680   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3681   unsigned NumBytes = LinkageSize;
3682   unsigned AvailableFPRs = Num_FPR_Regs;
3683   unsigned AvailableVRs = Num_VR_Regs;
3684   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3685     if (Ins[i].Flags.isNest())
3686       continue;
3687 
3688     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3689                                PtrByteSize, LinkageSize, ParamAreaSize,
3690                                NumBytes, AvailableFPRs, AvailableVRs,
3691                                Subtarget.hasQPX()))
3692       HasParameterArea = true;
3693   }
3694 
3695   // Add DAG nodes to load the arguments or copy them out of registers.  On
3696   // entry to a function on PPC, the arguments start after the linkage area,
3697   // although the first ones are often in registers.
3698 
3699   unsigned ArgOffset = LinkageSize;
3700   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3701   unsigned &QFPR_idx = FPR_idx;
3702   SmallVector<SDValue, 8> MemOps;
3703   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3704   unsigned CurArgIdx = 0;
3705   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3706     SDValue ArgVal;
3707     bool needsLoad = false;
3708     EVT ObjectVT = Ins[ArgNo].VT;
3709     EVT OrigVT = Ins[ArgNo].ArgVT;
3710     unsigned ObjSize = ObjectVT.getStoreSize();
3711     unsigned ArgSize = ObjSize;
3712     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3713     if (Ins[ArgNo].isOrigArg()) {
3714       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3715       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3716     }
3717     // We re-align the argument offset for each argument, except when using the
3718     // fast calling convention, when we need to make sure we do that only when
3719     // we'll actually use a stack slot.
3720     unsigned CurArgOffset, Align;
3721     auto ComputeArgOffset = [&]() {
3722       /* Respect alignment of argument on the stack.  */
3723       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3724       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3725       CurArgOffset = ArgOffset;
3726     };
3727 
3728     if (CallConv != CallingConv::Fast) {
3729       ComputeArgOffset();
3730 
3731       /* Compute GPR index associated with argument offset.  */
3732       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3733       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3734     }
3735 
3736     // FIXME the codegen can be much improved in some cases.
3737     // We do not have to keep everything in memory.
3738     if (Flags.isByVal()) {
3739       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3740 
3741       if (CallConv == CallingConv::Fast)
3742         ComputeArgOffset();
3743 
3744       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3745       ObjSize = Flags.getByValSize();
3746       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3747       // Empty aggregate parameters do not take up registers.  Examples:
3748       //   struct { } a;
3749       //   union  { } b;
3750       //   int c[0];
3751       // etc.  However, we have to provide a place-holder in InVals, so
3752       // pretend we have an 8-byte item at the current address for that
3753       // purpose.
3754       if (!ObjSize) {
3755         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3756         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3757         InVals.push_back(FIN);
3758         continue;
3759       }
3760 
3761       // Create a stack object covering all stack doublewords occupied
3762       // by the argument.  If the argument is (fully or partially) on
3763       // the stack, or if the argument is fully in registers but the
3764       // caller has allocated the parameter save anyway, we can refer
3765       // directly to the caller's stack frame.  Otherwise, create a
3766       // local copy in our own frame.
3767       int FI;
3768       if (HasParameterArea ||
3769           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3770         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3771       else
3772         FI = MFI.CreateStackObject(ArgSize, Align, false);
3773       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3774 
3775       // Handle aggregates smaller than 8 bytes.
3776       if (ObjSize < PtrByteSize) {
3777         // The value of the object is its address, which differs from the
3778         // address of the enclosing doubleword on big-endian systems.
3779         SDValue Arg = FIN;
3780         if (!isLittleEndian) {
3781           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3782           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3783         }
3784         InVals.push_back(Arg);
3785 
3786         if (GPR_idx != Num_GPR_Regs) {
3787           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3788           FuncInfo->addLiveInAttr(VReg, Flags);
3789           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3790           SDValue Store;
3791 
3792           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3793             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3794                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
3795             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3796                                       MachinePointerInfo(&*FuncArg), ObjType);
3797           } else {
3798             // For sizes that don't fit a truncating store (3, 5, 6, 7),
3799             // store the whole register as-is to the parameter save area
3800             // slot.
3801             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3802                                  MachinePointerInfo(&*FuncArg));
3803           }
3804 
3805           MemOps.push_back(Store);
3806         }
3807         // Whether we copied from a register or not, advance the offset
3808         // into the parameter save area by a full doubleword.
3809         ArgOffset += PtrByteSize;
3810         continue;
3811       }
3812 
3813       // The value of the object is its address, which is the address of
3814       // its first stack doubleword.
3815       InVals.push_back(FIN);
3816 
3817       // Store whatever pieces of the object are in registers to memory.
3818       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3819         if (GPR_idx == Num_GPR_Regs)
3820           break;
3821 
3822         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3823         FuncInfo->addLiveInAttr(VReg, Flags);
3824         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3825         SDValue Addr = FIN;
3826         if (j) {
3827           SDValue Off = DAG.getConstant(j, dl, PtrVT);
3828           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3829         }
3830         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3831                                      MachinePointerInfo(&*FuncArg, j));
3832         MemOps.push_back(Store);
3833         ++GPR_idx;
3834       }
3835       ArgOffset += ArgSize;
3836       continue;
3837     }
3838 
3839     switch (ObjectVT.getSimpleVT().SimpleTy) {
3840     default: llvm_unreachable("Unhandled argument type!");
3841     case MVT::i1:
3842     case MVT::i32:
3843     case MVT::i64:
3844       if (Flags.isNest()) {
3845         // The 'nest' parameter, if any, is passed in R11.
3846         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3847         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3848 
3849         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3850           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3851 
3852         break;
3853       }
3854 
3855       // These can be scalar arguments or elements of an integer array type
3856       // passed directly.  Clang may use those instead of "byval" aggregate
3857       // types to avoid forcing arguments to memory unnecessarily.
3858       if (GPR_idx != Num_GPR_Regs) {
3859         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3860         FuncInfo->addLiveInAttr(VReg, Flags);
3861         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3862 
3863         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3864           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3865           // value to MVT::i64 and then truncate to the correct register size.
3866           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3867       } else {
3868         if (CallConv == CallingConv::Fast)
3869           ComputeArgOffset();
3870 
3871         needsLoad = true;
3872         ArgSize = PtrByteSize;
3873       }
3874       if (CallConv != CallingConv::Fast || needsLoad)
3875         ArgOffset += 8;
3876       break;
3877 
3878     case MVT::f32:
3879     case MVT::f64:
3880       // These can be scalar arguments or elements of a float array type
3881       // passed directly.  The latter are used to implement ELFv2 homogenous
3882       // float aggregates.
3883       if (FPR_idx != Num_FPR_Regs) {
3884         unsigned VReg;
3885 
3886         if (ObjectVT == MVT::f32)
3887           VReg = MF.addLiveIn(FPR[FPR_idx],
3888                               Subtarget.hasP8Vector()
3889                                   ? &PPC::VSSRCRegClass
3890                                   : &PPC::F4RCRegClass);
3891         else
3892           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3893                                                 ? &PPC::VSFRCRegClass
3894                                                 : &PPC::F8RCRegClass);
3895 
3896         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3897         ++FPR_idx;
3898       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3899         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3900         // once we support fp <-> gpr moves.
3901 
3902         // This can only ever happen in the presence of f32 array types,
3903         // since otherwise we never run out of FPRs before running out
3904         // of GPRs.
3905         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3906         FuncInfo->addLiveInAttr(VReg, Flags);
3907         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3908 
3909         if (ObjectVT == MVT::f32) {
3910           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3911             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3912                                  DAG.getConstant(32, dl, MVT::i32));
3913           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3914         }
3915 
3916         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3917       } else {
3918         if (CallConv == CallingConv::Fast)
3919           ComputeArgOffset();
3920 
3921         needsLoad = true;
3922       }
3923 
3924       // When passing an array of floats, the array occupies consecutive
3925       // space in the argument area; only round up to the next doubleword
3926       // at the end of the array.  Otherwise, each float takes 8 bytes.
3927       if (CallConv != CallingConv::Fast || needsLoad) {
3928         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3929         ArgOffset += ArgSize;
3930         if (Flags.isInConsecutiveRegsLast())
3931           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3932       }
3933       break;
3934     case MVT::v4f32:
3935     case MVT::v4i32:
3936     case MVT::v8i16:
3937     case MVT::v16i8:
3938     case MVT::v2f64:
3939     case MVT::v2i64:
3940     case MVT::v1i128:
3941     case MVT::f128:
3942       if (!Subtarget.hasQPX()) {
3943         // These can be scalar arguments or elements of a vector array type
3944         // passed directly.  The latter are used to implement ELFv2 homogenous
3945         // vector aggregates.
3946         if (VR_idx != Num_VR_Regs) {
3947           unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3948           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3949           ++VR_idx;
3950         } else {
3951           if (CallConv == CallingConv::Fast)
3952             ComputeArgOffset();
3953           needsLoad = true;
3954         }
3955         if (CallConv != CallingConv::Fast || needsLoad)
3956           ArgOffset += 16;
3957         break;
3958       } // not QPX
3959 
3960       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3961              "Invalid QPX parameter type");
3962       LLVM_FALLTHROUGH;
3963 
3964     case MVT::v4f64:
3965     case MVT::v4i1:
3966       // QPX vectors are treated like their scalar floating-point subregisters
3967       // (except that they're larger).
3968       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3969       if (QFPR_idx != Num_QFPR_Regs) {
3970         const TargetRegisterClass *RC;
3971         switch (ObjectVT.getSimpleVT().SimpleTy) {
3972         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3973         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3974         default:         RC = &PPC::QBRCRegClass; break;
3975         }
3976 
3977         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3978         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3979         ++QFPR_idx;
3980       } else {
3981         if (CallConv == CallingConv::Fast)
3982           ComputeArgOffset();
3983         needsLoad = true;
3984       }
3985       if (CallConv != CallingConv::Fast || needsLoad)
3986         ArgOffset += Sz;
3987       break;
3988     }
3989 
3990     // We need to load the argument to a virtual register if we determined
3991     // above that we ran out of physical registers of the appropriate type.
3992     if (needsLoad) {
3993       if (ObjSize < ArgSize && !isLittleEndian)
3994         CurArgOffset += ArgSize - ObjSize;
3995       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3996       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3997       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3998     }
3999 
4000     InVals.push_back(ArgVal);
4001   }
4002 
4003   // Area that is at least reserved in the caller of this function.
4004   unsigned MinReservedArea;
4005   if (HasParameterArea)
4006     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4007   else
4008     MinReservedArea = LinkageSize;
4009 
4010   // Set the size that is at least reserved in caller of this function.  Tail
4011   // call optimized functions' reserved stack space needs to be aligned so that
4012   // taking the difference between two stack areas will result in an aligned
4013   // stack.
4014   MinReservedArea =
4015       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4016   FuncInfo->setMinReservedArea(MinReservedArea);
4017 
4018   // If the function takes variable number of arguments, make a frame index for
4019   // the start of the first vararg value... for expansion of llvm.va_start.
4020   if (isVarArg) {
4021     int Depth = ArgOffset;
4022 
4023     FuncInfo->setVarArgsFrameIndex(
4024       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4025     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4026 
4027     // If this function is vararg, store any remaining integer argument regs
4028     // to their spots on the stack so that they may be loaded by dereferencing
4029     // the result of va_next.
4030     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4031          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4032       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4033       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4034       SDValue Store =
4035           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4036       MemOps.push_back(Store);
4037       // Increment the address by four for the next argument to store
4038       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4039       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4040     }
4041   }
4042 
4043   if (!MemOps.empty())
4044     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4045 
4046   return Chain;
4047 }
4048 
4049 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4050     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4051     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4052     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4053   // TODO: add description of PPC stack frame format, or at least some docs.
4054   //
4055   MachineFunction &MF = DAG.getMachineFunction();
4056   MachineFrameInfo &MFI = MF.getFrameInfo();
4057   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4058 
4059   EVT PtrVT = getPointerTy(MF.getDataLayout());
4060   bool isPPC64 = PtrVT == MVT::i64;
4061   // Potential tail calls could cause overwriting of argument stack slots.
4062   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4063                        (CallConv == CallingConv::Fast));
4064   unsigned PtrByteSize = isPPC64 ? 8 : 4;
4065   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4066   unsigned ArgOffset = LinkageSize;
4067   // Area that is at least reserved in caller of this function.
4068   unsigned MinReservedArea = ArgOffset;
4069 
4070   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4071     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4072     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4073   };
4074   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4075     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4076     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4077   };
4078   static const MCPhysReg VR[] = {
4079     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4080     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4081   };
4082 
4083   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4084   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4085   const unsigned Num_VR_Regs  = array_lengthof( VR);
4086 
4087   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4088 
4089   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4090 
4091   // In 32-bit non-varargs functions, the stack space for vectors is after the
4092   // stack space for non-vectors.  We do not use this space unless we have
4093   // too many vectors to fit in registers, something that only occurs in
4094   // constructed examples:), but we have to walk the arglist to figure
4095   // that out...for the pathological case, compute VecArgOffset as the
4096   // start of the vector parameter area.  Computing VecArgOffset is the
4097   // entire point of the following loop.
4098   unsigned VecArgOffset = ArgOffset;
4099   if (!isVarArg && !isPPC64) {
4100     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4101          ++ArgNo) {
4102       EVT ObjectVT = Ins[ArgNo].VT;
4103       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4104 
4105       if (Flags.isByVal()) {
4106         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4107         unsigned ObjSize = Flags.getByValSize();
4108         unsigned ArgSize =
4109                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4110         VecArgOffset += ArgSize;
4111         continue;
4112       }
4113 
4114       switch(ObjectVT.getSimpleVT().SimpleTy) {
4115       default: llvm_unreachable("Unhandled argument type!");
4116       case MVT::i1:
4117       case MVT::i32:
4118       case MVT::f32:
4119         VecArgOffset += 4;
4120         break;
4121       case MVT::i64:  // PPC64
4122       case MVT::f64:
4123         // FIXME: We are guaranteed to be !isPPC64 at this point.
4124         // Does MVT::i64 apply?
4125         VecArgOffset += 8;
4126         break;
4127       case MVT::v4f32:
4128       case MVT::v4i32:
4129       case MVT::v8i16:
4130       case MVT::v16i8:
4131         // Nothing to do, we're only looking at Nonvector args here.
4132         break;
4133       }
4134     }
4135   }
4136   // We've found where the vector parameter area in memory is.  Skip the
4137   // first 12 parameters; these don't use that memory.
4138   VecArgOffset = ((VecArgOffset+15)/16)*16;
4139   VecArgOffset += 12*16;
4140 
4141   // Add DAG nodes to load the arguments or copy them out of registers.  On
4142   // entry to a function on PPC, the arguments start after the linkage area,
4143   // although the first ones are often in registers.
4144 
4145   SmallVector<SDValue, 8> MemOps;
4146   unsigned nAltivecParamsAtEnd = 0;
4147   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4148   unsigned CurArgIdx = 0;
4149   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4150     SDValue ArgVal;
4151     bool needsLoad = false;
4152     EVT ObjectVT = Ins[ArgNo].VT;
4153     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4154     unsigned ArgSize = ObjSize;
4155     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4156     if (Ins[ArgNo].isOrigArg()) {
4157       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4158       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4159     }
4160     unsigned CurArgOffset = ArgOffset;
4161 
4162     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4163     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4164         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4165       if (isVarArg || isPPC64) {
4166         MinReservedArea = ((MinReservedArea+15)/16)*16;
4167         MinReservedArea += CalculateStackSlotSize(ObjectVT,
4168                                                   Flags,
4169                                                   PtrByteSize);
4170       } else  nAltivecParamsAtEnd++;
4171     } else
4172       // Calculate min reserved area.
4173       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4174                                                 Flags,
4175                                                 PtrByteSize);
4176 
4177     // FIXME the codegen can be much improved in some cases.
4178     // We do not have to keep everything in memory.
4179     if (Flags.isByVal()) {
4180       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4181 
4182       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4183       ObjSize = Flags.getByValSize();
4184       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4185       // Objects of size 1 and 2 are right justified, everything else is
4186       // left justified.  This means the memory address is adjusted forwards.
4187       if (ObjSize==1 || ObjSize==2) {
4188         CurArgOffset = CurArgOffset + (4 - ObjSize);
4189       }
4190       // The value of the object is its address.
4191       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4192       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4193       InVals.push_back(FIN);
4194       if (ObjSize==1 || ObjSize==2) {
4195         if (GPR_idx != Num_GPR_Regs) {
4196           unsigned VReg;
4197           if (isPPC64)
4198             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4199           else
4200             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4201           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4202           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4203           SDValue Store =
4204               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4205                                 MachinePointerInfo(&*FuncArg), ObjType);
4206           MemOps.push_back(Store);
4207           ++GPR_idx;
4208         }
4209 
4210         ArgOffset += PtrByteSize;
4211 
4212         continue;
4213       }
4214       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4215         // Store whatever pieces of the object are in registers
4216         // to memory.  ArgOffset will be the address of the beginning
4217         // of the object.
4218         if (GPR_idx != Num_GPR_Regs) {
4219           unsigned VReg;
4220           if (isPPC64)
4221             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4222           else
4223             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4224           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4225           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4226           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4227           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4228                                        MachinePointerInfo(&*FuncArg, j));
4229           MemOps.push_back(Store);
4230           ++GPR_idx;
4231           ArgOffset += PtrByteSize;
4232         } else {
4233           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4234           break;
4235         }
4236       }
4237       continue;
4238     }
4239 
4240     switch (ObjectVT.getSimpleVT().SimpleTy) {
4241     default: llvm_unreachable("Unhandled argument type!");
4242     case MVT::i1:
4243     case MVT::i32:
4244       if (!isPPC64) {
4245         if (GPR_idx != Num_GPR_Regs) {
4246           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4247           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4248 
4249           if (ObjectVT == MVT::i1)
4250             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4251 
4252           ++GPR_idx;
4253         } else {
4254           needsLoad = true;
4255           ArgSize = PtrByteSize;
4256         }
4257         // All int arguments reserve stack space in the Darwin ABI.
4258         ArgOffset += PtrByteSize;
4259         break;
4260       }
4261       LLVM_FALLTHROUGH;
4262     case MVT::i64:  // PPC64
4263       if (GPR_idx != Num_GPR_Regs) {
4264         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4265         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4266 
4267         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4268           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4269           // value to MVT::i64 and then truncate to the correct register size.
4270           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4271 
4272         ++GPR_idx;
4273       } else {
4274         needsLoad = true;
4275         ArgSize = PtrByteSize;
4276       }
4277       // All int arguments reserve stack space in the Darwin ABI.
4278       ArgOffset += 8;
4279       break;
4280 
4281     case MVT::f32:
4282     case MVT::f64:
4283       // Every 4 bytes of argument space consumes one of the GPRs available for
4284       // argument passing.
4285       if (GPR_idx != Num_GPR_Regs) {
4286         ++GPR_idx;
4287         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4288           ++GPR_idx;
4289       }
4290       if (FPR_idx != Num_FPR_Regs) {
4291         unsigned VReg;
4292 
4293         if (ObjectVT == MVT::f32)
4294           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4295         else
4296           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4297 
4298         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4299         ++FPR_idx;
4300       } else {
4301         needsLoad = true;
4302       }
4303 
4304       // All FP arguments reserve stack space in the Darwin ABI.
4305       ArgOffset += isPPC64 ? 8 : ObjSize;
4306       break;
4307     case MVT::v4f32:
4308     case MVT::v4i32:
4309     case MVT::v8i16:
4310     case MVT::v16i8:
4311       // Note that vector arguments in registers don't reserve stack space,
4312       // except in varargs functions.
4313       if (VR_idx != Num_VR_Regs) {
4314         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4315         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4316         if (isVarArg) {
4317           while ((ArgOffset % 16) != 0) {
4318             ArgOffset += PtrByteSize;
4319             if (GPR_idx != Num_GPR_Regs)
4320               GPR_idx++;
4321           }
4322           ArgOffset += 16;
4323           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4324         }
4325         ++VR_idx;
4326       } else {
4327         if (!isVarArg && !isPPC64) {
4328           // Vectors go after all the nonvectors.
4329           CurArgOffset = VecArgOffset;
4330           VecArgOffset += 16;
4331         } else {
4332           // Vectors are aligned.
4333           ArgOffset = ((ArgOffset+15)/16)*16;
4334           CurArgOffset = ArgOffset;
4335           ArgOffset += 16;
4336         }
4337         needsLoad = true;
4338       }
4339       break;
4340     }
4341 
4342     // We need to load the argument to a virtual register if we determined above
4343     // that we ran out of physical registers of the appropriate type.
4344     if (needsLoad) {
4345       int FI = MFI.CreateFixedObject(ObjSize,
4346                                      CurArgOffset + (ArgSize - ObjSize),
4347                                      isImmutable);
4348       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4349       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4350     }
4351 
4352     InVals.push_back(ArgVal);
4353   }
4354 
4355   // Allow for Altivec parameters at the end, if needed.
4356   if (nAltivecParamsAtEnd) {
4357     MinReservedArea = ((MinReservedArea+15)/16)*16;
4358     MinReservedArea += 16*nAltivecParamsAtEnd;
4359   }
4360 
4361   // Area that is at least reserved in the caller of this function.
4362   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4363 
4364   // Set the size that is at least reserved in caller of this function.  Tail
4365   // call optimized functions' reserved stack space needs to be aligned so that
4366   // taking the difference between two stack areas will result in an aligned
4367   // stack.
4368   MinReservedArea =
4369       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4370   FuncInfo->setMinReservedArea(MinReservedArea);
4371 
4372   // If the function takes variable number of arguments, make a frame index for
4373   // the start of the first vararg value... for expansion of llvm.va_start.
4374   if (isVarArg) {
4375     int Depth = ArgOffset;
4376 
4377     FuncInfo->setVarArgsFrameIndex(
4378       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4379                             Depth, true));
4380     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4381 
4382     // If this function is vararg, store any remaining integer argument regs
4383     // to their spots on the stack so that they may be loaded by dereferencing
4384     // the result of va_next.
4385     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4386       unsigned VReg;
4387 
4388       if (isPPC64)
4389         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4390       else
4391         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4392 
4393       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4394       SDValue Store =
4395           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4396       MemOps.push_back(Store);
4397       // Increment the address by four for the next argument to store
4398       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4399       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4400     }
4401   }
4402 
4403   if (!MemOps.empty())
4404     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4405 
4406   return Chain;
4407 }
4408 
4409 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4410 /// adjusted to accommodate the arguments for the tailcall.
4411 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4412                                    unsigned ParamSize) {
4413 
4414   if (!isTailCall) return 0;
4415 
4416   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4417   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4418   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4419   // Remember only if the new adjustment is bigger.
4420   if (SPDiff < FI->getTailCallSPDelta())
4421     FI->setTailCallSPDelta(SPDiff);
4422 
4423   return SPDiff;
4424 }
4425 
4426 static bool isFunctionGlobalAddress(SDValue Callee);
4427 
4428 static bool
4429 callsShareTOCBase(const Function *Caller, SDValue Callee,
4430                     const TargetMachine &TM) {
4431   // Need a GlobalValue to determine if a Caller and Callee share the same
4432   // TOCBase.
4433   const GlobalValue *GV = nullptr;
4434 
4435   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4436     GV = G->getGlobal();
4437   } else if (MCSymbolSDNode *M = dyn_cast<MCSymbolSDNode>(Callee)) {
4438     // On AIX only, we replace GlobalAddressSDNode with MCSymbolSDNode for
4439     // the callee of a direct function call. The MCSymbolSDNode contains the
4440     // MCSymbol for the funtion entry point.
4441     const auto *S = cast<MCSymbolXCOFF>(M->getMCSymbol());
4442     GV = S->getGlobalValue();
4443   }
4444 
4445   // If we failed to get a GlobalValue, then pessimistically assume they do not
4446   // share a TOCBase.
4447   if (!GV)
4448     return false;
4449 
4450   // The medium and large code models are expected to provide a sufficiently
4451   // large TOC to provide all data addressing needs of a module with a
4452   // single TOC. Since each module will be addressed with a single TOC then we
4453   // only need to check that caller and callee don't cross dso boundaries.
4454   if (CodeModel::Medium == TM.getCodeModel() ||
4455       CodeModel::Large == TM.getCodeModel())
4456     return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
4457 
4458   // Otherwise we need to ensure callee and caller are in the same section,
4459   // since the linker may allocate multiple TOCs, and we don't know which
4460   // sections will belong to the same TOC base.
4461 
4462   if (!GV->isStrongDefinitionForLinker())
4463     return false;
4464 
4465   // Any explicitly-specified sections and section prefixes must also match.
4466   // Also, if we're using -ffunction-sections, then each function is always in
4467   // a different section (the same is true for COMDAT functions).
4468   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4469       GV->getSection() != Caller->getSection())
4470     return false;
4471   if (const auto *F = dyn_cast<Function>(GV)) {
4472     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4473       return false;
4474   }
4475 
4476   // If the callee might be interposed, then we can't assume the ultimate call
4477   // target will be in the same section. Even in cases where we can assume that
4478   // interposition won't happen, in any case where the linker might insert a
4479   // stub to allow for interposition, we must generate code as though
4480   // interposition might occur. To understand why this matters, consider a
4481   // situation where: a -> b -> c where the arrows indicate calls. b and c are
4482   // in the same section, but a is in a different module (i.e. has a different
4483   // TOC base pointer). If the linker allows for interposition between b and c,
4484   // then it will generate a stub for the call edge between b and c which will
4485   // save the TOC pointer into the designated stack slot allocated by b. If we
4486   // return true here, and therefore allow a tail call between b and c, that
4487   // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4488   // pointer into the stack slot allocated by a (where the a -> b stub saved
4489   // a's TOC base pointer). If we're not considering a tail call, but rather,
4490   // whether a nop is needed after the call instruction in b, because the linker
4491   // will insert a stub, it might complain about a missing nop if we omit it
4492   // (although many don't complain in this case).
4493   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4494     return false;
4495 
4496   return true;
4497 }
4498 
4499 static bool
4500 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4501                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4502   assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
4503 
4504   const unsigned PtrByteSize = 8;
4505   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4506 
4507   static const MCPhysReg GPR[] = {
4508     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4509     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4510   };
4511   static const MCPhysReg VR[] = {
4512     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4513     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4514   };
4515 
4516   const unsigned NumGPRs = array_lengthof(GPR);
4517   const unsigned NumFPRs = 13;
4518   const unsigned NumVRs = array_lengthof(VR);
4519   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4520 
4521   unsigned NumBytes = LinkageSize;
4522   unsigned AvailableFPRs = NumFPRs;
4523   unsigned AvailableVRs = NumVRs;
4524 
4525   for (const ISD::OutputArg& Param : Outs) {
4526     if (Param.Flags.isNest()) continue;
4527 
4528     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4529                                PtrByteSize, LinkageSize, ParamAreaSize,
4530                                NumBytes, AvailableFPRs, AvailableVRs,
4531                                Subtarget.hasQPX()))
4532       return true;
4533   }
4534   return false;
4535 }
4536 
4537 static bool
4538 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
4539   if (CS.arg_size() != CallerFn->arg_size())
4540     return false;
4541 
4542   ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
4543   ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
4544   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4545 
4546   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4547     const Value* CalleeArg = *CalleeArgIter;
4548     const Value* CallerArg = &(*CallerArgIter);
4549     if (CalleeArg == CallerArg)
4550       continue;
4551 
4552     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4553     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4554     //      }
4555     // 1st argument of callee is undef and has the same type as caller.
4556     if (CalleeArg->getType() == CallerArg->getType() &&
4557         isa<UndefValue>(CalleeArg))
4558       continue;
4559 
4560     return false;
4561   }
4562 
4563   return true;
4564 }
4565 
4566 // Returns true if TCO is possible between the callers and callees
4567 // calling conventions.
4568 static bool
4569 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4570                                     CallingConv::ID CalleeCC) {
4571   // Tail calls are possible with fastcc and ccc.
4572   auto isTailCallableCC  = [] (CallingConv::ID CC){
4573       return  CC == CallingConv::C || CC == CallingConv::Fast;
4574   };
4575   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4576     return false;
4577 
4578   // We can safely tail call both fastcc and ccc callees from a c calling
4579   // convention caller. If the caller is fastcc, we may have less stack space
4580   // than a non-fastcc caller with the same signature so disable tail-calls in
4581   // that case.
4582   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4583 }
4584 
4585 bool
4586 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4587                                     SDValue Callee,
4588                                     CallingConv::ID CalleeCC,
4589                                     ImmutableCallSite CS,
4590                                     bool isVarArg,
4591                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4592                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4593                                     SelectionDAG& DAG) const {
4594   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4595 
4596   if (DisableSCO && !TailCallOpt) return false;
4597 
4598   // Variadic argument functions are not supported.
4599   if (isVarArg) return false;
4600 
4601   auto &Caller = DAG.getMachineFunction().getFunction();
4602   // Check that the calling conventions are compatible for tco.
4603   if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4604     return false;
4605 
4606   // Caller contains any byval parameter is not supported.
4607   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4608     return false;
4609 
4610   // Callee contains any byval parameter is not supported, too.
4611   // Note: This is a quick work around, because in some cases, e.g.
4612   // caller's stack size > callee's stack size, we are still able to apply
4613   // sibling call optimization. For example, gcc is able to do SCO for caller1
4614   // in the following example, but not for caller2.
4615   //   struct test {
4616   //     long int a;
4617   //     char ary[56];
4618   //   } gTest;
4619   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4620   //     b->a = v.a;
4621   //     return 0;
4622   //   }
4623   //   void caller1(struct test a, struct test c, struct test *b) {
4624   //     callee(gTest, b); }
4625   //   void caller2(struct test *b) { callee(gTest, b); }
4626   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4627     return false;
4628 
4629   // If callee and caller use different calling conventions, we cannot pass
4630   // parameters on stack since offsets for the parameter area may be different.
4631   if (Caller.getCallingConv() != CalleeCC &&
4632       needStackSlotPassParameters(Subtarget, Outs))
4633     return false;
4634 
4635   // No TCO/SCO on indirect call because Caller have to restore its TOC
4636   if (!isFunctionGlobalAddress(Callee) &&
4637       !isa<ExternalSymbolSDNode>(Callee))
4638     return false;
4639 
4640   // If the caller and callee potentially have different TOC bases then we
4641   // cannot tail call since we need to restore the TOC pointer after the call.
4642   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4643   if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4644     return false;
4645 
4646   // TCO allows altering callee ABI, so we don't have to check further.
4647   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4648     return true;
4649 
4650   if (DisableSCO) return false;
4651 
4652   // If callee use the same argument list that caller is using, then we can
4653   // apply SCO on this case. If it is not, then we need to check if callee needs
4654   // stack for passing arguments.
4655   if (!hasSameArgumentList(&Caller, CS) &&
4656       needStackSlotPassParameters(Subtarget, Outs)) {
4657     return false;
4658   }
4659 
4660   return true;
4661 }
4662 
4663 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4664 /// for tail call optimization. Targets which want to do tail call
4665 /// optimization should implement this function.
4666 bool
4667 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4668                                                      CallingConv::ID CalleeCC,
4669                                                      bool isVarArg,
4670                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4671                                                      SelectionDAG& DAG) const {
4672   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4673     return false;
4674 
4675   // Variable argument functions are not supported.
4676   if (isVarArg)
4677     return false;
4678 
4679   MachineFunction &MF = DAG.getMachineFunction();
4680   CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4681   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4682     // Functions containing by val parameters are not supported.
4683     for (unsigned i = 0; i != Ins.size(); i++) {
4684        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4685        if (Flags.isByVal()) return false;
4686     }
4687 
4688     // Non-PIC/GOT tail calls are supported.
4689     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4690       return true;
4691 
4692     // At the moment we can only do local tail calls (in same module, hidden
4693     // or protected) if we are generating PIC.
4694     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4695       return G->getGlobal()->hasHiddenVisibility()
4696           || G->getGlobal()->hasProtectedVisibility();
4697   }
4698 
4699   return false;
4700 }
4701 
4702 /// isCallCompatibleAddress - Return the immediate to use if the specified
4703 /// 32-bit value is representable in the immediate field of a BxA instruction.
4704 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4705   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4706   if (!C) return nullptr;
4707 
4708   int Addr = C->getZExtValue();
4709   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4710       SignExtend32<26>(Addr) != Addr)
4711     return nullptr;  // Top 6 bits have to be sext of immediate.
4712 
4713   return DAG
4714       .getConstant(
4715           (int)C->getZExtValue() >> 2, SDLoc(Op),
4716           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4717       .getNode();
4718 }
4719 
4720 namespace {
4721 
4722 struct TailCallArgumentInfo {
4723   SDValue Arg;
4724   SDValue FrameIdxOp;
4725   int FrameIdx = 0;
4726 
4727   TailCallArgumentInfo() = default;
4728 };
4729 
4730 } // end anonymous namespace
4731 
4732 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4733 static void StoreTailCallArgumentsToStackSlot(
4734     SelectionDAG &DAG, SDValue Chain,
4735     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4736     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4737   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4738     SDValue Arg = TailCallArgs[i].Arg;
4739     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4740     int FI = TailCallArgs[i].FrameIdx;
4741     // Store relative to framepointer.
4742     MemOpChains.push_back(DAG.getStore(
4743         Chain, dl, Arg, FIN,
4744         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4745   }
4746 }
4747 
4748 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4749 /// the appropriate stack slot for the tail call optimized function call.
4750 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4751                                              SDValue OldRetAddr, SDValue OldFP,
4752                                              int SPDiff, const SDLoc &dl) {
4753   if (SPDiff) {
4754     // Calculate the new stack slot for the return address.
4755     MachineFunction &MF = DAG.getMachineFunction();
4756     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4757     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4758     bool isPPC64 = Subtarget.isPPC64();
4759     int SlotSize = isPPC64 ? 8 : 4;
4760     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4761     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4762                                                          NewRetAddrLoc, true);
4763     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4764     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4765     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4766                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4767 
4768     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4769     // slot as the FP is never overwritten.
4770     if (Subtarget.isDarwinABI()) {
4771       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4772       int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4773                                                          true);
4774       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4775       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4776                            MachinePointerInfo::getFixedStack(
4777                                DAG.getMachineFunction(), NewFPIdx));
4778     }
4779   }
4780   return Chain;
4781 }
4782 
4783 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4784 /// the position of the argument.
4785 static void
4786 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4787                          SDValue Arg, int SPDiff, unsigned ArgOffset,
4788                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4789   int Offset = ArgOffset + SPDiff;
4790   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4791   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4792   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4793   SDValue FIN = DAG.getFrameIndex(FI, VT);
4794   TailCallArgumentInfo Info;
4795   Info.Arg = Arg;
4796   Info.FrameIdxOp = FIN;
4797   Info.FrameIdx = FI;
4798   TailCallArguments.push_back(Info);
4799 }
4800 
4801 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4802 /// stack slot. Returns the chain as result and the loaded frame pointers in
4803 /// LROpOut/FPOpout. Used when tail calling.
4804 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4805     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4806     SDValue &FPOpOut, const SDLoc &dl) const {
4807   if (SPDiff) {
4808     // Load the LR and FP stack slot for later adjusting.
4809     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4810     LROpOut = getReturnAddrFrameIndex(DAG);
4811     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4812     Chain = SDValue(LROpOut.getNode(), 1);
4813 
4814     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4815     // slot as the FP is never overwritten.
4816     if (Subtarget.isDarwinABI()) {
4817       FPOpOut = getFramePointerFrameIndex(DAG);
4818       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4819       Chain = SDValue(FPOpOut.getNode(), 1);
4820     }
4821   }
4822   return Chain;
4823 }
4824 
4825 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4826 /// by "Src" to address "Dst" of size "Size".  Alignment information is
4827 /// specified by the specific parameter attribute. The copy will be passed as
4828 /// a byval function parameter.
4829 /// Sometimes what we are copying is the end of a larger object, the part that
4830 /// does not fit in registers.
4831 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4832                                          SDValue Chain, ISD::ArgFlagsTy Flags,
4833                                          SelectionDAG &DAG, const SDLoc &dl) {
4834   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4835   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4836                        false, false, false, MachinePointerInfo(),
4837                        MachinePointerInfo());
4838 }
4839 
4840 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4841 /// tail calls.
4842 static void LowerMemOpCallTo(
4843     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4844     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4845     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4846     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4847   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4848   if (!isTailCall) {
4849     if (isVector) {
4850       SDValue StackPtr;
4851       if (isPPC64)
4852         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4853       else
4854         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4855       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4856                            DAG.getConstant(ArgOffset, dl, PtrVT));
4857     }
4858     MemOpChains.push_back(
4859         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4860     // Calculate and remember argument location.
4861   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4862                                   TailCallArguments);
4863 }
4864 
4865 static void
4866 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4867                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4868                 SDValue FPOp,
4869                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4870   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4871   // might overwrite each other in case of tail call optimization.
4872   SmallVector<SDValue, 8> MemOpChains2;
4873   // Do not flag preceding copytoreg stuff together with the following stuff.
4874   InFlag = SDValue();
4875   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4876                                     MemOpChains2, dl);
4877   if (!MemOpChains2.empty())
4878     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4879 
4880   // Store the return address to the appropriate stack slot.
4881   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4882 
4883   // Emit callseq_end just before tailcall node.
4884   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4885                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4886   InFlag = Chain.getValue(1);
4887 }
4888 
4889 // Is this global address that of a function that can be called by name? (as
4890 // opposed to something that must hold a descriptor for an indirect call).
4891 static bool isFunctionGlobalAddress(SDValue Callee) {
4892   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4893     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4894         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4895       return false;
4896 
4897     return G->getGlobal()->getValueType()->isFunctionTy();
4898   }
4899 
4900   return false;
4901 }
4902 
4903 static unsigned
4904 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4905             SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4906             bool isPatchPoint, bool hasNest,
4907             SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4908             SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4909             ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
4910   bool isPPC64 = Subtarget.isPPC64();
4911   bool isSVR4ABI = Subtarget.isSVR4ABI();
4912   bool isELFv2ABI = Subtarget.isELFv2ABI();
4913   bool isAIXABI = Subtarget.isAIXABI();
4914 
4915   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4916   NodeTys.push_back(MVT::Other);   // Returns a chain
4917   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4918 
4919   unsigned CallOpc = PPCISD::CALL;
4920 
4921   bool needIndirectCall = true;
4922   if (!isSVR4ABI || !isPPC64)
4923     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4924       // If this is an absolute destination address, use the munged value.
4925       Callee = SDValue(Dest, 0);
4926       needIndirectCall = false;
4927     }
4928 
4929   // PC-relative references to external symbols should go through $stub, unless
4930   // we're building with the leopard linker or later, which automatically
4931   // synthesizes these stubs.
4932   const TargetMachine &TM = DAG.getTarget();
4933   MachineFunction &MF = DAG.getMachineFunction();
4934   const Module *Mod = MF.getFunction().getParent();
4935   const GlobalValue *GV = nullptr;
4936   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4937     GV = G->getGlobal();
4938   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4939   bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4940 
4941   if (isFunctionGlobalAddress(Callee)) {
4942     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4943 
4944     if (TM.getTargetTriple().isOSAIX()) {
4945       // Direct function calls reference the symbol for the function's entry
4946       // point, which is named by inserting a "." before the function's
4947       // C-linkage name.
4948       auto &Context = MF.getMMI().getContext();
4949       MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
4950                                               Twine(G->getGlobal()->getName()));
4951       cast<MCSymbolXCOFF>(S)->setGlobalValue(GV);
4952       Callee = DAG.getMCSymbol(S, PtrVT);
4953     } else {
4954       // A call to a TLS address is actually an indirect call to a
4955       // thread-specific pointer.
4956       unsigned OpFlags = 0;
4957       if (UsePlt)
4958         OpFlags = PPCII::MO_PLT;
4959 
4960       // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4961       // every direct call is) turn it into a TargetGlobalAddress /
4962       // TargetExternalSymbol node so that legalize doesn't hack it.
4963       Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4964                                           Callee.getValueType(), 0, OpFlags);
4965     }
4966     needIndirectCall = false;
4967   }
4968 
4969   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4970     unsigned char OpFlags = 0;
4971 
4972     if (UsePlt)
4973       OpFlags = PPCII::MO_PLT;
4974 
4975     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4976                                          OpFlags);
4977     needIndirectCall = false;
4978   }
4979 
4980   if (isPatchPoint) {
4981     // We'll form an invalid direct call when lowering a patchpoint; the full
4982     // sequence for an indirect call is complicated, and many of the
4983     // instructions introduced might have side effects (and, thus, can't be
4984     // removed later). The call itself will be removed as soon as the
4985     // argument/return lowering is complete, so the fact that it has the wrong
4986     // kind of operands should not really matter.
4987     needIndirectCall = false;
4988   }
4989 
4990   if (needIndirectCall) {
4991     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4992     // to do the call, we can't use PPCISD::CALL.
4993     SDValue MTCTROps[] = {Chain, Callee, InFlag};
4994 
4995     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4996       // Function pointers in the 64-bit SVR4 ABI do not point to the function
4997       // entry point, but to the function descriptor (the function entry point
4998       // address is part of the function descriptor though).
4999       // The function descriptor is a three doubleword structure with the
5000       // following fields: function entry point, TOC base address and
5001       // environment pointer.
5002       // Thus for a call through a function pointer, the following actions need
5003       // to be performed:
5004       //   1. Save the TOC of the caller in the TOC save area of its stack
5005       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5006       //   2. Load the address of the function entry point from the function
5007       //      descriptor.
5008       //   3. Load the TOC of the callee from the function descriptor into r2.
5009       //   4. Load the environment pointer from the function descriptor into
5010       //      r11.
5011       //   5. Branch to the function entry point address.
5012       //   6. On return of the callee, the TOC of the caller needs to be
5013       //      restored (this is done in FinishCall()).
5014       //
5015       // The loads are scheduled at the beginning of the call sequence, and the
5016       // register copies are flagged together to ensure that no other
5017       // operations can be scheduled in between. E.g. without flagging the
5018       // copies together, a TOC access in the caller could be scheduled between
5019       // the assignment of the callee TOC and the branch to the callee, which
5020       // results in the TOC access going through the TOC of the callee instead
5021       // of going through the TOC of the caller, which leads to incorrect code.
5022 
5023       // Load the address of the function entry point from the function
5024       // descriptor.
5025       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
5026       if (LDChain.getValueType() == MVT::Glue)
5027         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
5028 
5029       auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5030                           ? (MachineMemOperand::MODereferenceable |
5031                              MachineMemOperand::MOInvariant)
5032                           : MachineMemOperand::MONone;
5033 
5034       MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
5035       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
5036                                         /* Alignment = */ 8, MMOFlags);
5037 
5038       // Load environment pointer into r11.
5039       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
5040       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
5041       SDValue LoadEnvPtr =
5042           DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
5043                       /* Alignment = */ 8, MMOFlags);
5044 
5045       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
5046       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
5047       SDValue TOCPtr =
5048           DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
5049                       /* Alignment = */ 8, MMOFlags);
5050 
5051       setUsesTOCBasePtr(DAG);
5052       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
5053                                         InFlag);
5054       Chain = TOCVal.getValue(0);
5055       InFlag = TOCVal.getValue(1);
5056 
5057       // If the function call has an explicit 'nest' parameter, it takes the
5058       // place of the environment pointer.
5059       if (!hasNest) {
5060         SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
5061                                           InFlag);
5062 
5063         Chain = EnvVal.getValue(0);
5064         InFlag = EnvVal.getValue(1);
5065       }
5066 
5067       MTCTROps[0] = Chain;
5068       MTCTROps[1] = LoadFuncPtr;
5069       MTCTROps[2] = InFlag;
5070     }
5071 
5072     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
5073                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
5074     InFlag = Chain.getValue(1);
5075 
5076     NodeTys.clear();
5077     NodeTys.push_back(MVT::Other);
5078     NodeTys.push_back(MVT::Glue);
5079     Ops.push_back(Chain);
5080     CallOpc = PPCISD::BCTRL;
5081     Callee.setNode(nullptr);
5082     // Add use of X11 (holding environment pointer)
5083     if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
5084       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
5085     // Add CTR register as callee so a bctr can be emitted later.
5086     if (isTailCall)
5087       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
5088   }
5089 
5090   // If this is a direct call, pass the chain and the callee.
5091   if (Callee.getNode()) {
5092     Ops.push_back(Chain);
5093     Ops.push_back(Callee);
5094   }
5095   // If this is a tail call add stack pointer delta.
5096   if (isTailCall)
5097     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5098 
5099   // Add argument registers to the end of the list so that they are known live
5100   // into the call.
5101   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5102     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5103                                   RegsToPass[i].second.getValueType()));
5104 
5105   // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
5106   // live into the call.
5107   // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
5108   if ((isSVR4ABI && isPPC64) || isAIXABI) {
5109     setUsesTOCBasePtr(DAG);
5110 
5111     // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5112     // no way to mark dependencies as implicit here.
5113     // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5114     if (!isPatchPoint)
5115       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
5116                                             : PPC::R2, PtrVT));
5117   }
5118 
5119   return CallOpc;
5120 }
5121 
5122 SDValue PPCTargetLowering::LowerCallResult(
5123     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5124     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5125     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5126   SmallVector<CCValAssign, 16> RVLocs;
5127   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5128                     *DAG.getContext());
5129 
5130   CCRetInfo.AnalyzeCallResult(
5131       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5132                ? RetCC_PPC_Cold
5133                : RetCC_PPC);
5134 
5135   // Copy all of the result registers out of their specified physreg.
5136   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5137     CCValAssign &VA = RVLocs[i];
5138     assert(VA.isRegLoc() && "Can only return in registers!");
5139 
5140     SDValue Val;
5141 
5142     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5143       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5144                                       InFlag);
5145       Chain = Lo.getValue(1);
5146       InFlag = Lo.getValue(2);
5147       VA = RVLocs[++i]; // skip ahead to next loc
5148       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5149                                       InFlag);
5150       Chain = Hi.getValue(1);
5151       InFlag = Hi.getValue(2);
5152       if (!Subtarget.isLittleEndian())
5153         std::swap (Lo, Hi);
5154       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5155     } else {
5156       Val = DAG.getCopyFromReg(Chain, dl,
5157                                VA.getLocReg(), VA.getLocVT(), InFlag);
5158       Chain = Val.getValue(1);
5159       InFlag = Val.getValue(2);
5160     }
5161 
5162     switch (VA.getLocInfo()) {
5163     default: llvm_unreachable("Unknown loc info!");
5164     case CCValAssign::Full: break;
5165     case CCValAssign::AExt:
5166       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5167       break;
5168     case CCValAssign::ZExt:
5169       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5170                         DAG.getValueType(VA.getValVT()));
5171       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5172       break;
5173     case CCValAssign::SExt:
5174       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5175                         DAG.getValueType(VA.getValVT()));
5176       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5177       break;
5178     }
5179 
5180     InVals.push_back(Val);
5181   }
5182 
5183   return Chain;
5184 }
5185 
5186 SDValue PPCTargetLowering::FinishCall(
5187     CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
5188     bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
5189     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
5190     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5191     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5192     SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
5193   std::vector<EVT> NodeTys;
5194   SmallVector<SDValue, 8> Ops;
5195   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
5196                                  SPDiff, isTailCall, isPatchPoint, hasNest,
5197                                  RegsToPass, Ops, NodeTys, CS, Subtarget);
5198 
5199   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5200   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
5201     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5202 
5203   // When performing tail call optimization the callee pops its arguments off
5204   // the stack. Account for this here so these bytes can be pushed back on in
5205   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5206   int BytesCalleePops =
5207     (CallConv == CallingConv::Fast &&
5208      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
5209 
5210   // Add a register mask operand representing the call-preserved registers.
5211   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5212   const uint32_t *Mask =
5213       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
5214   assert(Mask && "Missing call preserved mask for calling convention");
5215   Ops.push_back(DAG.getRegisterMask(Mask));
5216 
5217   if (InFlag.getNode())
5218     Ops.push_back(InFlag);
5219 
5220   // Emit tail call.
5221   if (isTailCall) {
5222     assert(((Callee.getOpcode() == ISD::Register &&
5223              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5224             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5225             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5226             isa<ConstantSDNode>(Callee)) &&
5227     "Expecting an global address, external symbol, absolute value or register");
5228 
5229     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5230     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
5231   }
5232 
5233   // Add a NOP immediately after the branch instruction when using the 64-bit
5234   // SVR4 or the AIX ABI.
5235   // At link time, if caller and callee are in a different module and
5236   // thus have a different TOC, the call will be replaced with a call to a stub
5237   // function which saves the current TOC, loads the TOC of the callee and
5238   // branches to the callee. The NOP will be replaced with a load instruction
5239   // which restores the TOC of the caller from the TOC save slot of the current
5240   // stack frame. If caller and callee belong to the same module (and have the
5241   // same TOC), the NOP will remain unchanged, or become some other NOP.
5242 
5243   MachineFunction &MF = DAG.getMachineFunction();
5244   if (!isTailCall && !isPatchPoint &&
5245       ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
5246        Subtarget.isAIXABI())) {
5247     if (CallOpc == PPCISD::BCTRL) {
5248       if (Subtarget.isAIXABI())
5249         report_fatal_error("Indirect call on AIX is not implemented.");
5250 
5251       // This is a call through a function pointer.
5252       // Restore the caller TOC from the save area into R2.
5253       // See PrepareCall() for more information about calls through function
5254       // pointers in the 64-bit SVR4 ABI.
5255       // We are using a target-specific load with r2 hard coded, because the
5256       // result of a target-independent load would never go directly into r2,
5257       // since r2 is a reserved register (which prevents the register allocator
5258       // from allocating it), resulting in an additional register being
5259       // allocated and an unnecessary move instruction being generated.
5260       CallOpc = PPCISD::BCTRL_LOAD_TOC;
5261 
5262       EVT PtrVT = getPointerTy(DAG.getDataLayout());
5263       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
5264       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5265       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5266       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
5267 
5268       // The address needs to go after the chain input but before the flag (or
5269       // any other variadic arguments).
5270       Ops.insert(std::next(Ops.begin()), AddTOC);
5271     } else if (CallOpc == PPCISD::CALL &&
5272       !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
5273       // Otherwise insert NOP for non-local calls.
5274       CallOpc = PPCISD::CALL_NOP;
5275     }
5276   }
5277 
5278   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
5279   InFlag = Chain.getValue(1);
5280 
5281   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5282                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5283                              InFlag, dl);
5284   if (!Ins.empty())
5285     InFlag = Chain.getValue(1);
5286 
5287   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5288                          Ins, dl, DAG, InVals);
5289 }
5290 
5291 SDValue
5292 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5293                              SmallVectorImpl<SDValue> &InVals) const {
5294   SelectionDAG &DAG                     = CLI.DAG;
5295   SDLoc &dl                             = CLI.DL;
5296   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5297   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5298   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5299   SDValue Chain                         = CLI.Chain;
5300   SDValue Callee                        = CLI.Callee;
5301   bool &isTailCall                      = CLI.IsTailCall;
5302   CallingConv::ID CallConv              = CLI.CallConv;
5303   bool isVarArg                         = CLI.IsVarArg;
5304   bool isPatchPoint                     = CLI.IsPatchPoint;
5305   ImmutableCallSite CS                  = CLI.CS;
5306 
5307   if (isTailCall) {
5308     if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall()))
5309       isTailCall = false;
5310     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5311       isTailCall =
5312         IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5313                                                  isVarArg, Outs, Ins, DAG);
5314     else
5315       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5316                                                      Ins, DAG);
5317     if (isTailCall) {
5318       ++NumTailCalls;
5319       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5320         ++NumSiblingCalls;
5321 
5322       assert(isa<GlobalAddressSDNode>(Callee) &&
5323              "Callee should be an llvm::Function object.");
5324       LLVM_DEBUG(
5325           const GlobalValue *GV =
5326               cast<GlobalAddressSDNode>(Callee)->getGlobal();
5327           const unsigned Width =
5328               80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
5329           dbgs() << "TCO caller: "
5330                  << left_justify(DAG.getMachineFunction().getName(), Width)
5331                  << ", callee linkage: " << GV->getVisibility() << ", "
5332                  << GV->getLinkage() << "\n");
5333     }
5334   }
5335 
5336   if (!isTailCall && CS && CS.isMustTailCall())
5337     report_fatal_error("failed to perform tail call elimination on a call "
5338                        "site marked musttail");
5339 
5340   // When long calls (i.e. indirect calls) are always used, calls are always
5341   // made via function pointer. If we have a function name, first translate it
5342   // into a pointer.
5343   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5344       !isTailCall)
5345     Callee = LowerGlobalAddress(Callee, DAG);
5346 
5347   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5348     return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5349                             isTailCall, isPatchPoint, Outs, OutVals, Ins,
5350                             dl, DAG, InVals, CS);
5351 
5352   if (Subtarget.isSVR4ABI())
5353     return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5354                             isTailCall, isPatchPoint, Outs, OutVals, Ins,
5355                             dl, DAG, InVals, CS);
5356 
5357   if (Subtarget.isAIXABI())
5358     return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
5359                          isTailCall, isPatchPoint, Outs, OutVals, Ins,
5360                          dl, DAG, InVals, CS);
5361 
5362   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5363                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
5364                           dl, DAG, InVals, CS);
5365 }
5366 
5367 SDValue PPCTargetLowering::LowerCall_32SVR4(
5368     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5369     bool isTailCall, bool isPatchPoint,
5370     const SmallVectorImpl<ISD::OutputArg> &Outs,
5371     const SmallVectorImpl<SDValue> &OutVals,
5372     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5373     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5374     ImmutableCallSite CS) const {
5375   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5376   // of the 32-bit SVR4 ABI stack frame layout.
5377 
5378   assert((CallConv == CallingConv::C ||
5379           CallConv == CallingConv::Cold ||
5380           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5381 
5382   unsigned PtrByteSize = 4;
5383 
5384   MachineFunction &MF = DAG.getMachineFunction();
5385 
5386   // Mark this function as potentially containing a function that contains a
5387   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5388   // and restoring the callers stack pointer in this functions epilog. This is
5389   // done because by tail calling the called function might overwrite the value
5390   // in this function's (MF) stack pointer stack slot 0(SP).
5391   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5392       CallConv == CallingConv::Fast)
5393     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5394 
5395   // Count how many bytes are to be pushed on the stack, including the linkage
5396   // area, parameter list area and the part of the local variable space which
5397   // contains copies of aggregates which are passed by value.
5398 
5399   // Assign locations to all of the outgoing arguments.
5400   SmallVector<CCValAssign, 16> ArgLocs;
5401   PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5402 
5403   // Reserve space for the linkage area on the stack.
5404   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5405                        PtrByteSize);
5406   if (useSoftFloat())
5407     CCInfo.PreAnalyzeCallOperands(Outs);
5408 
5409   if (isVarArg) {
5410     // Handle fixed and variable vector arguments differently.
5411     // Fixed vector arguments go into registers as long as registers are
5412     // available. Variable vector arguments always go into memory.
5413     unsigned NumArgs = Outs.size();
5414 
5415     for (unsigned i = 0; i != NumArgs; ++i) {
5416       MVT ArgVT = Outs[i].VT;
5417       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5418       bool Result;
5419 
5420       if (Outs[i].IsFixed) {
5421         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5422                                CCInfo);
5423       } else {
5424         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5425                                       ArgFlags, CCInfo);
5426       }
5427 
5428       if (Result) {
5429 #ifndef NDEBUG
5430         errs() << "Call operand #" << i << " has unhandled type "
5431              << EVT(ArgVT).getEVTString() << "\n";
5432 #endif
5433         llvm_unreachable(nullptr);
5434       }
5435     }
5436   } else {
5437     // All arguments are treated the same.
5438     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5439   }
5440   CCInfo.clearWasPPCF128();
5441 
5442   // Assign locations to all of the outgoing aggregate by value arguments.
5443   SmallVector<CCValAssign, 16> ByValArgLocs;
5444   CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5445 
5446   // Reserve stack space for the allocations in CCInfo.
5447   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5448 
5449   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5450 
5451   // Size of the linkage area, parameter list area and the part of the local
5452   // space variable where copies of aggregates which are passed by value are
5453   // stored.
5454   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5455 
5456   // Calculate by how many bytes the stack has to be adjusted in case of tail
5457   // call optimization.
5458   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5459 
5460   // Adjust the stack pointer for the new arguments...
5461   // These operations are automatically eliminated by the prolog/epilog pass
5462   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5463   SDValue CallSeqStart = Chain;
5464 
5465   // Load the return address and frame pointer so it can be moved somewhere else
5466   // later.
5467   SDValue LROp, FPOp;
5468   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5469 
5470   // Set up a copy of the stack pointer for use loading and storing any
5471   // arguments that may not fit in the registers available for argument
5472   // passing.
5473   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5474 
5475   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5476   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5477   SmallVector<SDValue, 8> MemOpChains;
5478 
5479   bool seenFloatArg = false;
5480   // Walk the register/memloc assignments, inserting copies/loads.
5481   // i - Tracks the index into the list of registers allocated for the call
5482   // RealArgIdx - Tracks the index into the list of actual function arguments
5483   // j - Tracks the index into the list of byval arguments
5484   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5485        i != e;
5486        ++i, ++RealArgIdx) {
5487     CCValAssign &VA = ArgLocs[i];
5488     SDValue Arg = OutVals[RealArgIdx];
5489     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5490 
5491     if (Flags.isByVal()) {
5492       // Argument is an aggregate which is passed by value, thus we need to
5493       // create a copy of it in the local variable space of the current stack
5494       // frame (which is the stack frame of the caller) and pass the address of
5495       // this copy to the callee.
5496       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5497       CCValAssign &ByValVA = ByValArgLocs[j++];
5498       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5499 
5500       // Memory reserved in the local variable space of the callers stack frame.
5501       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5502 
5503       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5504       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5505                            StackPtr, PtrOff);
5506 
5507       // Create a copy of the argument in the local area of the current
5508       // stack frame.
5509       SDValue MemcpyCall =
5510         CreateCopyOfByValArgument(Arg, PtrOff,
5511                                   CallSeqStart.getNode()->getOperand(0),
5512                                   Flags, DAG, dl);
5513 
5514       // This must go outside the CALLSEQ_START..END.
5515       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5516                                                      SDLoc(MemcpyCall));
5517       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5518                              NewCallSeqStart.getNode());
5519       Chain = CallSeqStart = NewCallSeqStart;
5520 
5521       // Pass the address of the aggregate copy on the stack either in a
5522       // physical register or in the parameter list area of the current stack
5523       // frame to the callee.
5524       Arg = PtrOff;
5525     }
5526 
5527     // When useCRBits() is true, there can be i1 arguments.
5528     // It is because getRegisterType(MVT::i1) => MVT::i1,
5529     // and for other integer types getRegisterType() => MVT::i32.
5530     // Extend i1 and ensure callee will get i32.
5531     if (Arg.getValueType() == MVT::i1)
5532       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5533                         dl, MVT::i32, Arg);
5534 
5535     if (VA.isRegLoc()) {
5536       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5537       // Put argument in a physical register.
5538       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5539         bool IsLE = Subtarget.isLittleEndian();
5540         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5541                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5542         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5543         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5544                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5545         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5546                              SVal.getValue(0)));
5547       } else
5548         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5549     } else {
5550       // Put argument in the parameter list area of the current stack frame.
5551       assert(VA.isMemLoc());
5552       unsigned LocMemOffset = VA.getLocMemOffset();
5553 
5554       if (!isTailCall) {
5555         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5556         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5557                              StackPtr, PtrOff);
5558 
5559         MemOpChains.push_back(
5560             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5561       } else {
5562         // Calculate and remember argument location.
5563         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5564                                  TailCallArguments);
5565       }
5566     }
5567   }
5568 
5569   if (!MemOpChains.empty())
5570     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5571 
5572   // Build a sequence of copy-to-reg nodes chained together with token chain
5573   // and flag operands which copy the outgoing args into the appropriate regs.
5574   SDValue InFlag;
5575   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5576     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5577                              RegsToPass[i].second, InFlag);
5578     InFlag = Chain.getValue(1);
5579   }
5580 
5581   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5582   // registers.
5583   if (isVarArg) {
5584     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5585     SDValue Ops[] = { Chain, InFlag };
5586 
5587     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5588                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5589 
5590     InFlag = Chain.getValue(1);
5591   }
5592 
5593   if (isTailCall)
5594     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5595                     TailCallArguments);
5596 
5597   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5598                     /* unused except on PPC64 ELFv1 */ false, DAG,
5599                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5600                     NumBytes, Ins, InVals, CS);
5601 }
5602 
5603 // Copy an argument into memory, being careful to do this outside the
5604 // call sequence for the call to which the argument belongs.
5605 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5606     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5607     SelectionDAG &DAG, const SDLoc &dl) const {
5608   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5609                         CallSeqStart.getNode()->getOperand(0),
5610                         Flags, DAG, dl);
5611   // The MEMCPY must go outside the CALLSEQ_START..END.
5612   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5613   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5614                                                  SDLoc(MemcpyCall));
5615   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5616                          NewCallSeqStart.getNode());
5617   return NewCallSeqStart;
5618 }
5619 
5620 SDValue PPCTargetLowering::LowerCall_64SVR4(
5621     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5622     bool isTailCall, bool isPatchPoint,
5623     const SmallVectorImpl<ISD::OutputArg> &Outs,
5624     const SmallVectorImpl<SDValue> &OutVals,
5625     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5626     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5627     ImmutableCallSite CS) const {
5628   bool isELFv2ABI = Subtarget.isELFv2ABI();
5629   bool isLittleEndian = Subtarget.isLittleEndian();
5630   unsigned NumOps = Outs.size();
5631   bool hasNest = false;
5632   bool IsSibCall = false;
5633 
5634   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5635   unsigned PtrByteSize = 8;
5636 
5637   MachineFunction &MF = DAG.getMachineFunction();
5638 
5639   if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5640     IsSibCall = true;
5641 
5642   // Mark this function as potentially containing a function that contains a
5643   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5644   // and restoring the callers stack pointer in this functions epilog. This is
5645   // done because by tail calling the called function might overwrite the value
5646   // in this function's (MF) stack pointer stack slot 0(SP).
5647   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5648       CallConv == CallingConv::Fast)
5649     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5650 
5651   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5652          "fastcc not supported on varargs functions");
5653 
5654   // Count how many bytes are to be pushed on the stack, including the linkage
5655   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5656   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5657   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5658   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5659   unsigned NumBytes = LinkageSize;
5660   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5661   unsigned &QFPR_idx = FPR_idx;
5662 
5663   static const MCPhysReg GPR[] = {
5664     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5665     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5666   };
5667   static const MCPhysReg VR[] = {
5668     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5669     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5670   };
5671 
5672   const unsigned NumGPRs = array_lengthof(GPR);
5673   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5674   const unsigned NumVRs  = array_lengthof(VR);
5675   const unsigned NumQFPRs = NumFPRs;
5676 
5677   // On ELFv2, we can avoid allocating the parameter area if all the arguments
5678   // can be passed to the callee in registers.
5679   // For the fast calling convention, there is another check below.
5680   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5681   bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
5682   if (!HasParameterArea) {
5683     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5684     unsigned AvailableFPRs = NumFPRs;
5685     unsigned AvailableVRs = NumVRs;
5686     unsigned NumBytesTmp = NumBytes;
5687     for (unsigned i = 0; i != NumOps; ++i) {
5688       if (Outs[i].Flags.isNest()) continue;
5689       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5690                                 PtrByteSize, LinkageSize, ParamAreaSize,
5691                                 NumBytesTmp, AvailableFPRs, AvailableVRs,
5692                                 Subtarget.hasQPX()))
5693         HasParameterArea = true;
5694     }
5695   }
5696 
5697   // When using the fast calling convention, we don't provide backing for
5698   // arguments that will be in registers.
5699   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5700 
5701   // Avoid allocating parameter area for fastcc functions if all the arguments
5702   // can be passed in the registers.
5703   if (CallConv == CallingConv::Fast)
5704     HasParameterArea = false;
5705 
5706   // Add up all the space actually used.
5707   for (unsigned i = 0; i != NumOps; ++i) {
5708     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5709     EVT ArgVT = Outs[i].VT;
5710     EVT OrigVT = Outs[i].ArgVT;
5711 
5712     if (Flags.isNest())
5713       continue;
5714 
5715     if (CallConv == CallingConv::Fast) {
5716       if (Flags.isByVal()) {
5717         NumGPRsUsed += (Flags.getByValSize()+7)/8;
5718         if (NumGPRsUsed > NumGPRs)
5719           HasParameterArea = true;
5720       } else {
5721         switch (ArgVT.getSimpleVT().SimpleTy) {
5722         default: llvm_unreachable("Unexpected ValueType for argument!");
5723         case MVT::i1:
5724         case MVT::i32:
5725         case MVT::i64:
5726           if (++NumGPRsUsed <= NumGPRs)
5727             continue;
5728           break;
5729         case MVT::v4i32:
5730         case MVT::v8i16:
5731         case MVT::v16i8:
5732         case MVT::v2f64:
5733         case MVT::v2i64:
5734         case MVT::v1i128:
5735         case MVT::f128:
5736           if (++NumVRsUsed <= NumVRs)
5737             continue;
5738           break;
5739         case MVT::v4f32:
5740           // When using QPX, this is handled like a FP register, otherwise, it
5741           // is an Altivec register.
5742           if (Subtarget.hasQPX()) {
5743             if (++NumFPRsUsed <= NumFPRs)
5744               continue;
5745           } else {
5746             if (++NumVRsUsed <= NumVRs)
5747               continue;
5748           }
5749           break;
5750         case MVT::f32:
5751         case MVT::f64:
5752         case MVT::v4f64: // QPX
5753         case MVT::v4i1:  // QPX
5754           if (++NumFPRsUsed <= NumFPRs)
5755             continue;
5756           break;
5757         }
5758         HasParameterArea = true;
5759       }
5760     }
5761 
5762     /* Respect alignment of argument on the stack.  */
5763     unsigned Align =
5764       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5765     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5766 
5767     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5768     if (Flags.isInConsecutiveRegsLast())
5769       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5770   }
5771 
5772   unsigned NumBytesActuallyUsed = NumBytes;
5773 
5774   // In the old ELFv1 ABI,
5775   // the prolog code of the callee may store up to 8 GPR argument registers to
5776   // the stack, allowing va_start to index over them in memory if its varargs.
5777   // Because we cannot tell if this is needed on the caller side, we have to
5778   // conservatively assume that it is needed.  As such, make sure we have at
5779   // least enough stack space for the caller to store the 8 GPRs.
5780   // In the ELFv2 ABI, we allocate the parameter area iff a callee
5781   // really requires memory operands, e.g. a vararg function.
5782   if (HasParameterArea)
5783     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5784   else
5785     NumBytes = LinkageSize;
5786 
5787   // Tail call needs the stack to be aligned.
5788   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5789       CallConv == CallingConv::Fast)
5790     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5791 
5792   int SPDiff = 0;
5793 
5794   // Calculate by how many bytes the stack has to be adjusted in case of tail
5795   // call optimization.
5796   if (!IsSibCall)
5797     SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5798 
5799   // To protect arguments on the stack from being clobbered in a tail call,
5800   // force all the loads to happen before doing any other lowering.
5801   if (isTailCall)
5802     Chain = DAG.getStackArgumentTokenFactor(Chain);
5803 
5804   // Adjust the stack pointer for the new arguments...
5805   // These operations are automatically eliminated by the prolog/epilog pass
5806   if (!IsSibCall)
5807     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5808   SDValue CallSeqStart = Chain;
5809 
5810   // Load the return address and frame pointer so it can be move somewhere else
5811   // later.
5812   SDValue LROp, FPOp;
5813   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5814 
5815   // Set up a copy of the stack pointer for use loading and storing any
5816   // arguments that may not fit in the registers available for argument
5817   // passing.
5818   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5819 
5820   // Figure out which arguments are going to go in registers, and which in
5821   // memory.  Also, if this is a vararg function, floating point operations
5822   // must be stored to our stack, and loaded into integer regs as well, if
5823   // any integer regs are available for argument passing.
5824   unsigned ArgOffset = LinkageSize;
5825 
5826   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5827   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5828 
5829   SmallVector<SDValue, 8> MemOpChains;
5830   for (unsigned i = 0; i != NumOps; ++i) {
5831     SDValue Arg = OutVals[i];
5832     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5833     EVT ArgVT = Outs[i].VT;
5834     EVT OrigVT = Outs[i].ArgVT;
5835 
5836     // PtrOff will be used to store the current argument to the stack if a
5837     // register cannot be found for it.
5838     SDValue PtrOff;
5839 
5840     // We re-align the argument offset for each argument, except when using the
5841     // fast calling convention, when we need to make sure we do that only when
5842     // we'll actually use a stack slot.
5843     auto ComputePtrOff = [&]() {
5844       /* Respect alignment of argument on the stack.  */
5845       unsigned Align =
5846         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5847       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5848 
5849       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5850 
5851       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5852     };
5853 
5854     if (CallConv != CallingConv::Fast) {
5855       ComputePtrOff();
5856 
5857       /* Compute GPR index associated with argument offset.  */
5858       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5859       GPR_idx = std::min(GPR_idx, NumGPRs);
5860     }
5861 
5862     // Promote integers to 64-bit values.
5863     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5864       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5865       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5866       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5867     }
5868 
5869     // FIXME memcpy is used way more than necessary.  Correctness first.
5870     // Note: "by value" is code for passing a structure by value, not
5871     // basic types.
5872     if (Flags.isByVal()) {
5873       // Note: Size includes alignment padding, so
5874       //   struct x { short a; char b; }
5875       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5876       // These are the proper values we need for right-justifying the
5877       // aggregate in a parameter register.
5878       unsigned Size = Flags.getByValSize();
5879 
5880       // An empty aggregate parameter takes up no storage and no
5881       // registers.
5882       if (Size == 0)
5883         continue;
5884 
5885       if (CallConv == CallingConv::Fast)
5886         ComputePtrOff();
5887 
5888       // All aggregates smaller than 8 bytes must be passed right-justified.
5889       if (Size==1 || Size==2 || Size==4) {
5890         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5891         if (GPR_idx != NumGPRs) {
5892           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5893                                         MachinePointerInfo(), VT);
5894           MemOpChains.push_back(Load.getValue(1));
5895           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5896 
5897           ArgOffset += PtrByteSize;
5898           continue;
5899         }
5900       }
5901 
5902       if (GPR_idx == NumGPRs && Size < 8) {
5903         SDValue AddPtr = PtrOff;
5904         if (!isLittleEndian) {
5905           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5906                                           PtrOff.getValueType());
5907           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5908         }
5909         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5910                                                           CallSeqStart,
5911                                                           Flags, DAG, dl);
5912         ArgOffset += PtrByteSize;
5913         continue;
5914       }
5915       // Copy entire object into memory.  There are cases where gcc-generated
5916       // code assumes it is there, even if it could be put entirely into
5917       // registers.  (This is not what the doc says.)
5918 
5919       // FIXME: The above statement is likely due to a misunderstanding of the
5920       // documents.  All arguments must be copied into the parameter area BY
5921       // THE CALLEE in the event that the callee takes the address of any
5922       // formal argument.  That has not yet been implemented.  However, it is
5923       // reasonable to use the stack area as a staging area for the register
5924       // load.
5925 
5926       // Skip this for small aggregates, as we will use the same slot for a
5927       // right-justified copy, below.
5928       if (Size >= 8)
5929         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5930                                                           CallSeqStart,
5931                                                           Flags, DAG, dl);
5932 
5933       // When a register is available, pass a small aggregate right-justified.
5934       if (Size < 8 && GPR_idx != NumGPRs) {
5935         // The easiest way to get this right-justified in a register
5936         // is to copy the structure into the rightmost portion of a
5937         // local variable slot, then load the whole slot into the
5938         // register.
5939         // FIXME: The memcpy seems to produce pretty awful code for
5940         // small aggregates, particularly for packed ones.
5941         // FIXME: It would be preferable to use the slot in the
5942         // parameter save area instead of a new local variable.
5943         SDValue AddPtr = PtrOff;
5944         if (!isLittleEndian) {
5945           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5946           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5947         }
5948         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5949                                                           CallSeqStart,
5950                                                           Flags, DAG, dl);
5951 
5952         // Load the slot into the register.
5953         SDValue Load =
5954             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5955         MemOpChains.push_back(Load.getValue(1));
5956         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5957 
5958         // Done with this argument.
5959         ArgOffset += PtrByteSize;
5960         continue;
5961       }
5962 
5963       // For aggregates larger than PtrByteSize, copy the pieces of the
5964       // object that fit into registers from the parameter save area.
5965       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5966         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5967         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5968         if (GPR_idx != NumGPRs) {
5969           SDValue Load =
5970               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5971           MemOpChains.push_back(Load.getValue(1));
5972           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5973           ArgOffset += PtrByteSize;
5974         } else {
5975           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5976           break;
5977         }
5978       }
5979       continue;
5980     }
5981 
5982     switch (Arg.getSimpleValueType().SimpleTy) {
5983     default: llvm_unreachable("Unexpected ValueType for argument!");
5984     case MVT::i1:
5985     case MVT::i32:
5986     case MVT::i64:
5987       if (Flags.isNest()) {
5988         // The 'nest' parameter, if any, is passed in R11.
5989         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5990         hasNest = true;
5991         break;
5992       }
5993 
5994       // These can be scalar arguments or elements of an integer array type
5995       // passed directly.  Clang may use those instead of "byval" aggregate
5996       // types to avoid forcing arguments to memory unnecessarily.
5997       if (GPR_idx != NumGPRs) {
5998         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5999       } else {
6000         if (CallConv == CallingConv::Fast)
6001           ComputePtrOff();
6002 
6003         assert(HasParameterArea &&
6004                "Parameter area must exist to pass an argument in memory.");
6005         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6006                          true, isTailCall, false, MemOpChains,
6007                          TailCallArguments, dl);
6008         if (CallConv == CallingConv::Fast)
6009           ArgOffset += PtrByteSize;
6010       }
6011       if (CallConv != CallingConv::Fast)
6012         ArgOffset += PtrByteSize;
6013       break;
6014     case MVT::f32:
6015     case MVT::f64: {
6016       // These can be scalar arguments or elements of a float array type
6017       // passed directly.  The latter are used to implement ELFv2 homogenous
6018       // float aggregates.
6019 
6020       // Named arguments go into FPRs first, and once they overflow, the
6021       // remaining arguments go into GPRs and then the parameter save area.
6022       // Unnamed arguments for vararg functions always go to GPRs and
6023       // then the parameter save area.  For now, put all arguments to vararg
6024       // routines always in both locations (FPR *and* GPR or stack slot).
6025       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
6026       bool NeededLoad = false;
6027 
6028       // First load the argument into the next available FPR.
6029       if (FPR_idx != NumFPRs)
6030         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6031 
6032       // Next, load the argument into GPR or stack slot if needed.
6033       if (!NeedGPROrStack)
6034         ;
6035       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
6036         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6037         // once we support fp <-> gpr moves.
6038 
6039         // In the non-vararg case, this can only ever happen in the
6040         // presence of f32 array types, since otherwise we never run
6041         // out of FPRs before running out of GPRs.
6042         SDValue ArgVal;
6043 
6044         // Double values are always passed in a single GPR.
6045         if (Arg.getValueType() != MVT::f32) {
6046           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6047 
6048         // Non-array float values are extended and passed in a GPR.
6049         } else if (!Flags.isInConsecutiveRegs()) {
6050           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6051           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6052 
6053         // If we have an array of floats, we collect every odd element
6054         // together with its predecessor into one GPR.
6055         } else if (ArgOffset % PtrByteSize != 0) {
6056           SDValue Lo, Hi;
6057           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6058           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6059           if (!isLittleEndian)
6060             std::swap(Lo, Hi);
6061           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6062 
6063         // The final element, if even, goes into the first half of a GPR.
6064         } else if (Flags.isInConsecutiveRegsLast()) {
6065           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6066           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6067           if (!isLittleEndian)
6068             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6069                                  DAG.getConstant(32, dl, MVT::i32));
6070 
6071         // Non-final even elements are skipped; they will be handled
6072         // together the with subsequent argument on the next go-around.
6073         } else
6074           ArgVal = SDValue();
6075 
6076         if (ArgVal.getNode())
6077           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6078       } else {
6079         if (CallConv == CallingConv::Fast)
6080           ComputePtrOff();
6081 
6082         // Single-precision floating-point values are mapped to the
6083         // second (rightmost) word of the stack doubleword.
6084         if (Arg.getValueType() == MVT::f32 &&
6085             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6086           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6087           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6088         }
6089 
6090         assert(HasParameterArea &&
6091                "Parameter area must exist to pass an argument in memory.");
6092         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6093                          true, isTailCall, false, MemOpChains,
6094                          TailCallArguments, dl);
6095 
6096         NeededLoad = true;
6097       }
6098       // When passing an array of floats, the array occupies consecutive
6099       // space in the argument area; only round up to the next doubleword
6100       // at the end of the array.  Otherwise, each float takes 8 bytes.
6101       if (CallConv != CallingConv::Fast || NeededLoad) {
6102         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6103                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6104         if (Flags.isInConsecutiveRegsLast())
6105           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6106       }
6107       break;
6108     }
6109     case MVT::v4f32:
6110     case MVT::v4i32:
6111     case MVT::v8i16:
6112     case MVT::v16i8:
6113     case MVT::v2f64:
6114     case MVT::v2i64:
6115     case MVT::v1i128:
6116     case MVT::f128:
6117       if (!Subtarget.hasQPX()) {
6118       // These can be scalar arguments or elements of a vector array type
6119       // passed directly.  The latter are used to implement ELFv2 homogenous
6120       // vector aggregates.
6121 
6122       // For a varargs call, named arguments go into VRs or on the stack as
6123       // usual; unnamed arguments always go to the stack or the corresponding
6124       // GPRs when within range.  For now, we always put the value in both
6125       // locations (or even all three).
6126       if (isVarArg) {
6127         assert(HasParameterArea &&
6128                "Parameter area must exist if we have a varargs call.");
6129         // We could elide this store in the case where the object fits
6130         // entirely in R registers.  Maybe later.
6131         SDValue Store =
6132             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6133         MemOpChains.push_back(Store);
6134         if (VR_idx != NumVRs) {
6135           SDValue Load =
6136               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6137           MemOpChains.push_back(Load.getValue(1));
6138           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6139         }
6140         ArgOffset += 16;
6141         for (unsigned i=0; i<16; i+=PtrByteSize) {
6142           if (GPR_idx == NumGPRs)
6143             break;
6144           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6145                                    DAG.getConstant(i, dl, PtrVT));
6146           SDValue Load =
6147               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6148           MemOpChains.push_back(Load.getValue(1));
6149           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6150         }
6151         break;
6152       }
6153 
6154       // Non-varargs Altivec params go into VRs or on the stack.
6155       if (VR_idx != NumVRs) {
6156         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6157       } else {
6158         if (CallConv == CallingConv::Fast)
6159           ComputePtrOff();
6160 
6161         assert(HasParameterArea &&
6162                "Parameter area must exist to pass an argument in memory.");
6163         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6164                          true, isTailCall, true, MemOpChains,
6165                          TailCallArguments, dl);
6166         if (CallConv == CallingConv::Fast)
6167           ArgOffset += 16;
6168       }
6169 
6170       if (CallConv != CallingConv::Fast)
6171         ArgOffset += 16;
6172       break;
6173       } // not QPX
6174 
6175       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
6176              "Invalid QPX parameter type");
6177 
6178       LLVM_FALLTHROUGH;
6179     case MVT::v4f64:
6180     case MVT::v4i1: {
6181       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
6182       if (isVarArg) {
6183         assert(HasParameterArea &&
6184                "Parameter area must exist if we have a varargs call.");
6185         // We could elide this store in the case where the object fits
6186         // entirely in R registers.  Maybe later.
6187         SDValue Store =
6188             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6189         MemOpChains.push_back(Store);
6190         if (QFPR_idx != NumQFPRs) {
6191           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
6192                                      PtrOff, MachinePointerInfo());
6193           MemOpChains.push_back(Load.getValue(1));
6194           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
6195         }
6196         ArgOffset += (IsF32 ? 16 : 32);
6197         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
6198           if (GPR_idx == NumGPRs)
6199             break;
6200           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6201                                    DAG.getConstant(i, dl, PtrVT));
6202           SDValue Load =
6203               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6204           MemOpChains.push_back(Load.getValue(1));
6205           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6206         }
6207         break;
6208       }
6209 
6210       // Non-varargs QPX params go into registers or on the stack.
6211       if (QFPR_idx != NumQFPRs) {
6212         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
6213       } else {
6214         if (CallConv == CallingConv::Fast)
6215           ComputePtrOff();
6216 
6217         assert(HasParameterArea &&
6218                "Parameter area must exist to pass an argument in memory.");
6219         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6220                          true, isTailCall, true, MemOpChains,
6221                          TailCallArguments, dl);
6222         if (CallConv == CallingConv::Fast)
6223           ArgOffset += (IsF32 ? 16 : 32);
6224       }
6225 
6226       if (CallConv != CallingConv::Fast)
6227         ArgOffset += (IsF32 ? 16 : 32);
6228       break;
6229       }
6230     }
6231   }
6232 
6233   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6234          "mismatch in size of parameter area");
6235   (void)NumBytesActuallyUsed;
6236 
6237   if (!MemOpChains.empty())
6238     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6239 
6240   // Check if this is an indirect call (MTCTR/BCTRL).
6241   // See PrepareCall() for more information about calls through function
6242   // pointers in the 64-bit SVR4 ABI.
6243   if (!isTailCall && !isPatchPoint &&
6244       !isFunctionGlobalAddress(Callee) &&
6245       !isa<ExternalSymbolSDNode>(Callee)) {
6246     // Load r2 into a virtual register and store it to the TOC save area.
6247     setUsesTOCBasePtr(DAG);
6248     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6249     // TOC save area offset.
6250     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6251     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6252     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6253     Chain = DAG.getStore(
6254         Val.getValue(1), dl, Val, AddPtr,
6255         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
6256     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6257     // This does not mean the MTCTR instruction must use R12; it's easier
6258     // to model this as an extra parameter, so do that.
6259     if (isELFv2ABI && !isPatchPoint)
6260       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6261   }
6262 
6263   // Build a sequence of copy-to-reg nodes chained together with token chain
6264   // and flag operands which copy the outgoing args into the appropriate regs.
6265   SDValue InFlag;
6266   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6267     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6268                              RegsToPass[i].second, InFlag);
6269     InFlag = Chain.getValue(1);
6270   }
6271 
6272   if (isTailCall && !IsSibCall)
6273     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6274                     TailCallArguments);
6275 
6276   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
6277                     DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
6278                     SPDiff, NumBytes, Ins, InVals, CS);
6279 }
6280 
6281 SDValue PPCTargetLowering::LowerCall_Darwin(
6282     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
6283     bool isTailCall, bool isPatchPoint,
6284     const SmallVectorImpl<ISD::OutputArg> &Outs,
6285     const SmallVectorImpl<SDValue> &OutVals,
6286     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6287     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6288     ImmutableCallSite CS) const {
6289   unsigned NumOps = Outs.size();
6290 
6291   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6292   bool isPPC64 = PtrVT == MVT::i64;
6293   unsigned PtrByteSize = isPPC64 ? 8 : 4;
6294 
6295   MachineFunction &MF = DAG.getMachineFunction();
6296 
6297   // Mark this function as potentially containing a function that contains a
6298   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6299   // and restoring the callers stack pointer in this functions epilog. This is
6300   // done because by tail calling the called function might overwrite the value
6301   // in this function's (MF) stack pointer stack slot 0(SP).
6302   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6303       CallConv == CallingConv::Fast)
6304     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6305 
6306   // Count how many bytes are to be pushed on the stack, including the linkage
6307   // area, and parameter passing area.  We start with 24/48 bytes, which is
6308   // prereserved space for [SP][CR][LR][3 x unused].
6309   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6310   unsigned NumBytes = LinkageSize;
6311 
6312   // Add up all the space actually used.
6313   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6314   // they all go in registers, but we must reserve stack space for them for
6315   // possible use by the caller.  In varargs or 64-bit calls, parameters are
6316   // assigned stack space in order, with padding so Altivec parameters are
6317   // 16-byte aligned.
6318   unsigned nAltivecParamsAtEnd = 0;
6319   for (unsigned i = 0; i != NumOps; ++i) {
6320     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6321     EVT ArgVT = Outs[i].VT;
6322     // Varargs Altivec parameters are padded to a 16 byte boundary.
6323     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6324         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6325         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6326       if (!isVarArg && !isPPC64) {
6327         // Non-varargs Altivec parameters go after all the non-Altivec
6328         // parameters; handle those later so we know how much padding we need.
6329         nAltivecParamsAtEnd++;
6330         continue;
6331       }
6332       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6333       NumBytes = ((NumBytes+15)/16)*16;
6334     }
6335     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6336   }
6337 
6338   // Allow for Altivec parameters at the end, if needed.
6339   if (nAltivecParamsAtEnd) {
6340     NumBytes = ((NumBytes+15)/16)*16;
6341     NumBytes += 16*nAltivecParamsAtEnd;
6342   }
6343 
6344   // The prolog code of the callee may store up to 8 GPR argument registers to
6345   // the stack, allowing va_start to index over them in memory if its varargs.
6346   // Because we cannot tell if this is needed on the caller side, we have to
6347   // conservatively assume that it is needed.  As such, make sure we have at
6348   // least enough stack space for the caller to store the 8 GPRs.
6349   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6350 
6351   // Tail call needs the stack to be aligned.
6352   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6353       CallConv == CallingConv::Fast)
6354     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6355 
6356   // Calculate by how many bytes the stack has to be adjusted in case of tail
6357   // call optimization.
6358   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
6359 
6360   // To protect arguments on the stack from being clobbered in a tail call,
6361   // force all the loads to happen before doing any other lowering.
6362   if (isTailCall)
6363     Chain = DAG.getStackArgumentTokenFactor(Chain);
6364 
6365   // Adjust the stack pointer for the new arguments...
6366   // These operations are automatically eliminated by the prolog/epilog pass
6367   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6368   SDValue CallSeqStart = Chain;
6369 
6370   // Load the return address and frame pointer so it can be move somewhere else
6371   // later.
6372   SDValue LROp, FPOp;
6373   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6374 
6375   // Set up a copy of the stack pointer for use loading and storing any
6376   // arguments that may not fit in the registers available for argument
6377   // passing.
6378   SDValue StackPtr;
6379   if (isPPC64)
6380     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6381   else
6382     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6383 
6384   // Figure out which arguments are going to go in registers, and which in
6385   // memory.  Also, if this is a vararg function, floating point operations
6386   // must be stored to our stack, and loaded into integer regs as well, if
6387   // any integer regs are available for argument passing.
6388   unsigned ArgOffset = LinkageSize;
6389   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6390 
6391   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6392     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6393     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6394   };
6395   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6396     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6397     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6398   };
6399   static const MCPhysReg VR[] = {
6400     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6401     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6402   };
6403   const unsigned NumGPRs = array_lengthof(GPR_32);
6404   const unsigned NumFPRs = 13;
6405   const unsigned NumVRs  = array_lengthof(VR);
6406 
6407   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6408 
6409   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6410   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6411 
6412   SmallVector<SDValue, 8> MemOpChains;
6413   for (unsigned i = 0; i != NumOps; ++i) {
6414     SDValue Arg = OutVals[i];
6415     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6416 
6417     // PtrOff will be used to store the current argument to the stack if a
6418     // register cannot be found for it.
6419     SDValue PtrOff;
6420 
6421     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6422 
6423     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6424 
6425     // On PPC64, promote integers to 64-bit values.
6426     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6427       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6428       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6429       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6430     }
6431 
6432     // FIXME memcpy is used way more than necessary.  Correctness first.
6433     // Note: "by value" is code for passing a structure by value, not
6434     // basic types.
6435     if (Flags.isByVal()) {
6436       unsigned Size = Flags.getByValSize();
6437       // Very small objects are passed right-justified.  Everything else is
6438       // passed left-justified.
6439       if (Size==1 || Size==2) {
6440         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6441         if (GPR_idx != NumGPRs) {
6442           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6443                                         MachinePointerInfo(), VT);
6444           MemOpChains.push_back(Load.getValue(1));
6445           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6446 
6447           ArgOffset += PtrByteSize;
6448         } else {
6449           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6450                                           PtrOff.getValueType());
6451           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6452           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6453                                                             CallSeqStart,
6454                                                             Flags, DAG, dl);
6455           ArgOffset += PtrByteSize;
6456         }
6457         continue;
6458       }
6459       // Copy entire object into memory.  There are cases where gcc-generated
6460       // code assumes it is there, even if it could be put entirely into
6461       // registers.  (This is not what the doc says.)
6462       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6463                                                         CallSeqStart,
6464                                                         Flags, DAG, dl);
6465 
6466       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6467       // copy the pieces of the object that fit into registers from the
6468       // parameter save area.
6469       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6470         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6471         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6472         if (GPR_idx != NumGPRs) {
6473           SDValue Load =
6474               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6475           MemOpChains.push_back(Load.getValue(1));
6476           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6477           ArgOffset += PtrByteSize;
6478         } else {
6479           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6480           break;
6481         }
6482       }
6483       continue;
6484     }
6485 
6486     switch (Arg.getSimpleValueType().SimpleTy) {
6487     default: llvm_unreachable("Unexpected ValueType for argument!");
6488     case MVT::i1:
6489     case MVT::i32:
6490     case MVT::i64:
6491       if (GPR_idx != NumGPRs) {
6492         if (Arg.getValueType() == MVT::i1)
6493           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6494 
6495         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6496       } else {
6497         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6498                          isPPC64, isTailCall, false, MemOpChains,
6499                          TailCallArguments, dl);
6500       }
6501       ArgOffset += PtrByteSize;
6502       break;
6503     case MVT::f32:
6504     case MVT::f64:
6505       if (FPR_idx != NumFPRs) {
6506         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6507 
6508         if (isVarArg) {
6509           SDValue Store =
6510               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6511           MemOpChains.push_back(Store);
6512 
6513           // Float varargs are always shadowed in available integer registers
6514           if (GPR_idx != NumGPRs) {
6515             SDValue Load =
6516                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6517             MemOpChains.push_back(Load.getValue(1));
6518             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6519           }
6520           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6521             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6522             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6523             SDValue Load =
6524                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6525             MemOpChains.push_back(Load.getValue(1));
6526             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6527           }
6528         } else {
6529           // If we have any FPRs remaining, we may also have GPRs remaining.
6530           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6531           // GPRs.
6532           if (GPR_idx != NumGPRs)
6533             ++GPR_idx;
6534           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6535               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6536             ++GPR_idx;
6537         }
6538       } else
6539         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6540                          isPPC64, isTailCall, false, MemOpChains,
6541                          TailCallArguments, dl);
6542       if (isPPC64)
6543         ArgOffset += 8;
6544       else
6545         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6546       break;
6547     case MVT::v4f32:
6548     case MVT::v4i32:
6549     case MVT::v8i16:
6550     case MVT::v16i8:
6551       if (isVarArg) {
6552         // These go aligned on the stack, or in the corresponding R registers
6553         // when within range.  The Darwin PPC ABI doc claims they also go in
6554         // V registers; in fact gcc does this only for arguments that are
6555         // prototyped, not for those that match the ...  We do it for all
6556         // arguments, seems to work.
6557         while (ArgOffset % 16 !=0) {
6558           ArgOffset += PtrByteSize;
6559           if (GPR_idx != NumGPRs)
6560             GPR_idx++;
6561         }
6562         // We could elide this store in the case where the object fits
6563         // entirely in R registers.  Maybe later.
6564         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6565                              DAG.getConstant(ArgOffset, dl, PtrVT));
6566         SDValue Store =
6567             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6568         MemOpChains.push_back(Store);
6569         if (VR_idx != NumVRs) {
6570           SDValue Load =
6571               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6572           MemOpChains.push_back(Load.getValue(1));
6573           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6574         }
6575         ArgOffset += 16;
6576         for (unsigned i=0; i<16; i+=PtrByteSize) {
6577           if (GPR_idx == NumGPRs)
6578             break;
6579           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6580                                    DAG.getConstant(i, dl, PtrVT));
6581           SDValue Load =
6582               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6583           MemOpChains.push_back(Load.getValue(1));
6584           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6585         }
6586         break;
6587       }
6588 
6589       // Non-varargs Altivec params generally go in registers, but have
6590       // stack space allocated at the end.
6591       if (VR_idx != NumVRs) {
6592         // Doesn't have GPR space allocated.
6593         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6594       } else if (nAltivecParamsAtEnd==0) {
6595         // We are emitting Altivec params in order.
6596         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6597                          isPPC64, isTailCall, true, MemOpChains,
6598                          TailCallArguments, dl);
6599         ArgOffset += 16;
6600       }
6601       break;
6602     }
6603   }
6604   // If all Altivec parameters fit in registers, as they usually do,
6605   // they get stack space following the non-Altivec parameters.  We
6606   // don't track this here because nobody below needs it.
6607   // If there are more Altivec parameters than fit in registers emit
6608   // the stores here.
6609   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
6610     unsigned j = 0;
6611     // Offset is aligned; skip 1st 12 params which go in V registers.
6612     ArgOffset = ((ArgOffset+15)/16)*16;
6613     ArgOffset += 12*16;
6614     for (unsigned i = 0; i != NumOps; ++i) {
6615       SDValue Arg = OutVals[i];
6616       EVT ArgType = Outs[i].VT;
6617       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6618           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6619         if (++j > NumVRs) {
6620           SDValue PtrOff;
6621           // We are emitting Altivec params in order.
6622           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6623                            isPPC64, isTailCall, true, MemOpChains,
6624                            TailCallArguments, dl);
6625           ArgOffset += 16;
6626         }
6627       }
6628     }
6629   }
6630 
6631   if (!MemOpChains.empty())
6632     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6633 
6634   // On Darwin, R12 must contain the address of an indirect callee.  This does
6635   // not mean the MTCTR instruction must use R12; it's easier to model this as
6636   // an extra parameter, so do that.
6637   if (!isTailCall &&
6638       !isFunctionGlobalAddress(Callee) &&
6639       !isa<ExternalSymbolSDNode>(Callee) &&
6640       !isBLACompatibleAddress(Callee, DAG))
6641     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6642                                                    PPC::R12), Callee));
6643 
6644   // Build a sequence of copy-to-reg nodes chained together with token chain
6645   // and flag operands which copy the outgoing args into the appropriate regs.
6646   SDValue InFlag;
6647   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6648     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6649                              RegsToPass[i].second, InFlag);
6650     InFlag = Chain.getValue(1);
6651   }
6652 
6653   if (isTailCall)
6654     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6655                     TailCallArguments);
6656 
6657   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6658                     /* unused except on PPC64 ELFv1 */ false, DAG,
6659                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6660                     NumBytes, Ins, InVals, CS);
6661 }
6662 
6663 
6664 SDValue PPCTargetLowering::LowerCall_AIX(
6665     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
6666     bool isTailCall, bool isPatchPoint,
6667     const SmallVectorImpl<ISD::OutputArg> &Outs,
6668     const SmallVectorImpl<SDValue> &OutVals,
6669     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6670     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6671     ImmutableCallSite CS) const {
6672 
6673   assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
6674          "Unimplemented calling convention!");
6675   if (isVarArg || isPatchPoint)
6676     report_fatal_error("This call type is unimplemented on AIX.");
6677 
6678   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6679   bool isPPC64 = PtrVT == MVT::i64;
6680   unsigned PtrByteSize = isPPC64 ? 8 : 4;
6681   unsigned NumOps = Outs.size();
6682 
6683 
6684   // Count how many bytes are to be pushed on the stack, including the linkage
6685   // area, parameter list area.
6686   // On XCOFF, we start with 24/48, which is reserved space for
6687   // [SP][CR][LR][2 x reserved][TOC].
6688   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6689 
6690   // The prolog code of the callee may store up to 8 GPR argument registers to
6691   // the stack, allowing va_start to index over them in memory if the callee
6692   // is variadic.
6693   // Because we cannot tell if this is needed on the caller side, we have to
6694   // conservatively assume that it is needed.  As such, make sure we have at
6695   // least enough stack space for the caller to store the 8 GPRs.
6696   unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
6697 
6698   // Adjust the stack pointer for the new arguments...
6699   // These operations are automatically eliminated by the prolog/epilog
6700   // inserter pass.
6701   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6702   SDValue CallSeqStart = Chain;
6703 
6704   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6705     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6706     PPC::R7, PPC::R8, PPC::R9, PPC::R10
6707   };
6708   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6709     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6710     PPC::X7, PPC::X8, PPC::X9, PPC::X10
6711   };
6712 
6713   const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
6714                                    : array_lengthof(GPR_32);
6715   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6716   unsigned GPR_idx = 0;
6717 
6718   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6719 
6720   if (isTailCall)
6721     report_fatal_error("Handling of tail call is unimplemented!");
6722   int SPDiff = 0;
6723 
6724   for (unsigned i = 0; i != NumOps; ++i) {
6725     SDValue Arg = OutVals[i];
6726     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6727 
6728     // Promote integers if needed.
6729     if (Arg.getValueType() == MVT::i1 ||
6730         (isPPC64 && Arg.getValueType() == MVT::i32)) {
6731       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6732       Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
6733     }
6734 
6735     // Note: "by value" is code for passing a structure by value, not
6736     // basic types.
6737     if (Flags.isByVal())
6738       report_fatal_error("Passing structure by value is unimplemented!");
6739 
6740     switch (Arg.getSimpleValueType().SimpleTy) {
6741     default: llvm_unreachable("Unexpected ValueType for argument!");
6742     case MVT::i1:
6743     case MVT::i32:
6744     case MVT::i64:
6745       if (GPR_idx != NumGPRs)
6746         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6747       else
6748         report_fatal_error("Handling of placing parameters on the stack is "
6749                            "unimplemented!");
6750       break;
6751     case MVT::f32:
6752     case MVT::f64:
6753     case MVT::v4f32:
6754     case MVT::v4i32:
6755     case MVT::v8i16:
6756     case MVT::v16i8:
6757     case MVT::v2f64:
6758     case MVT::v2i64:
6759     case MVT::v1i128:
6760     case MVT::f128:
6761     case MVT::v4f64:
6762     case MVT::v4i1:
6763       report_fatal_error("Handling of this parameter type is unimplemented!");
6764     }
6765   }
6766 
6767   if (!isFunctionGlobalAddress(Callee) &&
6768       !isa<ExternalSymbolSDNode>(Callee))
6769     report_fatal_error("Handling of indirect call is unimplemented!");
6770 
6771   // Build a sequence of copy-to-reg nodes chained together with token chain
6772   // and flag operands which copy the outgoing args into the appropriate regs.
6773   SDValue InFlag;
6774   for (auto Reg : RegsToPass) {
6775     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
6776     InFlag = Chain.getValue(1);
6777   }
6778 
6779   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6780                     /* unused except on PPC64 ELFv1 */ false, DAG,
6781                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6782                     NumBytes, Ins, InVals, CS);
6783 }
6784 
6785 bool
6786 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6787                                   MachineFunction &MF, bool isVarArg,
6788                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
6789                                   LLVMContext &Context) const {
6790   SmallVector<CCValAssign, 16> RVLocs;
6791   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6792   return CCInfo.CheckReturn(
6793       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
6794                 ? RetCC_PPC_Cold
6795                 : RetCC_PPC);
6796 }
6797 
6798 SDValue
6799 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6800                                bool isVarArg,
6801                                const SmallVectorImpl<ISD::OutputArg> &Outs,
6802                                const SmallVectorImpl<SDValue> &OutVals,
6803                                const SDLoc &dl, SelectionDAG &DAG) const {
6804   SmallVector<CCValAssign, 16> RVLocs;
6805   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6806                  *DAG.getContext());
6807   CCInfo.AnalyzeReturn(Outs,
6808                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
6809                            ? RetCC_PPC_Cold
6810                            : RetCC_PPC);
6811 
6812   SDValue Flag;
6813   SmallVector<SDValue, 4> RetOps(1, Chain);
6814 
6815   // Copy the result values into the output registers.
6816   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
6817     CCValAssign &VA = RVLocs[i];
6818     assert(VA.isRegLoc() && "Can only return in registers!");
6819 
6820     SDValue Arg = OutVals[RealResIdx];
6821 
6822     switch (VA.getLocInfo()) {
6823     default: llvm_unreachable("Unknown loc info!");
6824     case CCValAssign::Full: break;
6825     case CCValAssign::AExt:
6826       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6827       break;
6828     case CCValAssign::ZExt:
6829       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6830       break;
6831     case CCValAssign::SExt:
6832       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6833       break;
6834     }
6835     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
6836       bool isLittleEndian = Subtarget.isLittleEndian();
6837       // Legalize ret f64 -> ret 2 x i32.
6838       SDValue SVal =
6839           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6840                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
6841       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
6842       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6843       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6844                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
6845       Flag = Chain.getValue(1);
6846       VA = RVLocs[++i]; // skip ahead to next loc
6847       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
6848     } else
6849       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6850     Flag = Chain.getValue(1);
6851     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6852   }
6853 
6854   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6855   const MCPhysReg *I =
6856     TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6857   if (I) {
6858     for (; *I; ++I) {
6859 
6860       if (PPC::G8RCRegClass.contains(*I))
6861         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6862       else if (PPC::F8RCRegClass.contains(*I))
6863         RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6864       else if (PPC::CRRCRegClass.contains(*I))
6865         RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6866       else if (PPC::VRRCRegClass.contains(*I))
6867         RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6868       else
6869         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6870     }
6871   }
6872 
6873   RetOps[0] = Chain;  // Update chain.
6874 
6875   // Add the flag if we have it.
6876   if (Flag.getNode())
6877     RetOps.push_back(Flag);
6878 
6879   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6880 }
6881 
6882 SDValue
6883 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6884                                                 SelectionDAG &DAG) const {
6885   SDLoc dl(Op);
6886 
6887   // Get the correct type for integers.
6888   EVT IntVT = Op.getValueType();
6889 
6890   // Get the inputs.
6891   SDValue Chain = Op.getOperand(0);
6892   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6893   // Build a DYNAREAOFFSET node.
6894   SDValue Ops[2] = {Chain, FPSIdx};
6895   SDVTList VTs = DAG.getVTList(IntVT);
6896   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6897 }
6898 
6899 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6900                                              SelectionDAG &DAG) const {
6901   // When we pop the dynamic allocation we need to restore the SP link.
6902   SDLoc dl(Op);
6903 
6904   // Get the correct type for pointers.
6905   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6906 
6907   // Construct the stack pointer operand.
6908   bool isPPC64 = Subtarget.isPPC64();
6909   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6910   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6911 
6912   // Get the operands for the STACKRESTORE.
6913   SDValue Chain = Op.getOperand(0);
6914   SDValue SaveSP = Op.getOperand(1);
6915 
6916   // Load the old link SP.
6917   SDValue LoadLinkSP =
6918       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6919 
6920   // Restore the stack pointer.
6921   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6922 
6923   // Store the old link SP.
6924   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6925 }
6926 
6927 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6928   MachineFunction &MF = DAG.getMachineFunction();
6929   bool isPPC64 = Subtarget.isPPC64();
6930   EVT PtrVT = getPointerTy(MF.getDataLayout());
6931 
6932   // Get current frame pointer save index.  The users of this index will be
6933   // primarily DYNALLOC instructions.
6934   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6935   int RASI = FI->getReturnAddrSaveIndex();
6936 
6937   // If the frame pointer save index hasn't been defined yet.
6938   if (!RASI) {
6939     // Find out what the fix offset of the frame pointer save area.
6940     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6941     // Allocate the frame index for frame pointer save area.
6942     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6943     // Save the result.
6944     FI->setReturnAddrSaveIndex(RASI);
6945   }
6946   return DAG.getFrameIndex(RASI, PtrVT);
6947 }
6948 
6949 SDValue
6950 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6951   MachineFunction &MF = DAG.getMachineFunction();
6952   bool isPPC64 = Subtarget.isPPC64();
6953   EVT PtrVT = getPointerTy(MF.getDataLayout());
6954 
6955   // Get current frame pointer save index.  The users of this index will be
6956   // primarily DYNALLOC instructions.
6957   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6958   int FPSI = FI->getFramePointerSaveIndex();
6959 
6960   // If the frame pointer save index hasn't been defined yet.
6961   if (!FPSI) {
6962     // Find out what the fix offset of the frame pointer save area.
6963     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6964     // Allocate the frame index for frame pointer save area.
6965     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
6966     // Save the result.
6967     FI->setFramePointerSaveIndex(FPSI);
6968   }
6969   return DAG.getFrameIndex(FPSI, PtrVT);
6970 }
6971 
6972 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6973                                                    SelectionDAG &DAG) const {
6974   // Get the inputs.
6975   SDValue Chain = Op.getOperand(0);
6976   SDValue Size  = Op.getOperand(1);
6977   SDLoc dl(Op);
6978 
6979   // Get the correct type for pointers.
6980   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6981   // Negate the size.
6982   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
6983                                 DAG.getConstant(0, dl, PtrVT), Size);
6984   // Construct a node for the frame pointer save index.
6985   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6986   // Build a DYNALLOC node.
6987   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
6988   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
6989   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
6990 }
6991 
6992 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
6993                                                      SelectionDAG &DAG) const {
6994   MachineFunction &MF = DAG.getMachineFunction();
6995 
6996   bool isPPC64 = Subtarget.isPPC64();
6997   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6998 
6999   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7000   return DAG.getFrameIndex(FI, PtrVT);
7001 }
7002 
7003 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7004                                                SelectionDAG &DAG) const {
7005   SDLoc DL(Op);
7006   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7007                      DAG.getVTList(MVT::i32, MVT::Other),
7008                      Op.getOperand(0), Op.getOperand(1));
7009 }
7010 
7011 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7012                                                 SelectionDAG &DAG) const {
7013   SDLoc DL(Op);
7014   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7015                      Op.getOperand(0), Op.getOperand(1));
7016 }
7017 
7018 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7019   if (Op.getValueType().isVector())
7020     return LowerVectorLoad(Op, DAG);
7021 
7022   assert(Op.getValueType() == MVT::i1 &&
7023          "Custom lowering only for i1 loads");
7024 
7025   // First, load 8 bits into 32 bits, then truncate to 1 bit.
7026 
7027   SDLoc dl(Op);
7028   LoadSDNode *LD = cast<LoadSDNode>(Op);
7029 
7030   SDValue Chain = LD->getChain();
7031   SDValue BasePtr = LD->getBasePtr();
7032   MachineMemOperand *MMO = LD->getMemOperand();
7033 
7034   SDValue NewLD =
7035       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7036                      BasePtr, MVT::i8, MMO);
7037   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7038 
7039   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7040   return DAG.getMergeValues(Ops, dl);
7041 }
7042 
7043 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7044   if (Op.getOperand(1).getValueType().isVector())
7045     return LowerVectorStore(Op, DAG);
7046 
7047   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7048          "Custom lowering only for i1 stores");
7049 
7050   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7051 
7052   SDLoc dl(Op);
7053   StoreSDNode *ST = cast<StoreSDNode>(Op);
7054 
7055   SDValue Chain = ST->getChain();
7056   SDValue BasePtr = ST->getBasePtr();
7057   SDValue Value = ST->getValue();
7058   MachineMemOperand *MMO = ST->getMemOperand();
7059 
7060   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7061                       Value);
7062   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7063 }
7064 
7065 // FIXME: Remove this once the ANDI glue bug is fixed:
7066 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7067   assert(Op.getValueType() == MVT::i1 &&
7068          "Custom lowering only for i1 results");
7069 
7070   SDLoc DL(Op);
7071   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
7072                      Op.getOperand(0));
7073 }
7074 
7075 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7076                                                SelectionDAG &DAG) const {
7077 
7078   // Implements a vector truncate that fits in a vector register as a shuffle.
7079   // We want to legalize vector truncates down to where the source fits in
7080   // a vector register (and target is therefore smaller than vector register
7081   // size).  At that point legalization will try to custom lower the sub-legal
7082   // result and get here - where we can contain the truncate as a single target
7083   // operation.
7084 
7085   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7086   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7087   //
7088   // We will implement it for big-endian ordering as this (where x denotes
7089   // undefined):
7090   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7091   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7092   //
7093   // The same operation in little-endian ordering will be:
7094   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7095   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7096 
7097   assert(Op.getValueType().isVector() && "Vector type expected.");
7098 
7099   SDLoc DL(Op);
7100   SDValue N1 = Op.getOperand(0);
7101   unsigned SrcSize = N1.getValueType().getSizeInBits();
7102   assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
7103   SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7104 
7105   EVT TrgVT = Op.getValueType();
7106   unsigned TrgNumElts = TrgVT.getVectorNumElements();
7107   EVT EltVT = TrgVT.getVectorElementType();
7108   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7109   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7110 
7111   // First list the elements we want to keep.
7112   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7113   SmallVector<int, 16> ShuffV;
7114   if (Subtarget.isLittleEndian())
7115     for (unsigned i = 0; i < TrgNumElts; ++i)
7116       ShuffV.push_back(i * SizeMult);
7117   else
7118     for (unsigned i = 1; i <= TrgNumElts; ++i)
7119       ShuffV.push_back(i * SizeMult - 1);
7120 
7121   // Populate the remaining elements with undefs.
7122   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7123     // ShuffV.push_back(i + WideNumElts);
7124     ShuffV.push_back(WideNumElts + 1);
7125 
7126   SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
7127   return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
7128 }
7129 
7130 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7131 /// possible.
7132 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7133   // Not FP? Not a fsel.
7134   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7135       !Op.getOperand(2).getValueType().isFloatingPoint())
7136     return Op;
7137 
7138   // We might be able to do better than this under some circumstances, but in
7139   // general, fsel-based lowering of select is a finite-math-only optimization.
7140   // For more information, see section F.3 of the 2.06 ISA specification.
7141   if (!DAG.getTarget().Options.NoInfsFPMath ||
7142       !DAG.getTarget().Options.NoNaNsFPMath)
7143     return Op;
7144   // TODO: Propagate flags from the select rather than global settings.
7145   SDNodeFlags Flags;
7146   Flags.setNoInfs(true);
7147   Flags.setNoNaNs(true);
7148 
7149   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7150 
7151   EVT ResVT = Op.getValueType();
7152   EVT CmpVT = Op.getOperand(0).getValueType();
7153   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7154   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
7155   SDLoc dl(Op);
7156 
7157   // If the RHS of the comparison is a 0.0, we don't need to do the
7158   // subtraction at all.
7159   SDValue Sel1;
7160   if (isFloatingPointZero(RHS))
7161     switch (CC) {
7162     default: break;       // SETUO etc aren't handled by fsel.
7163     case ISD::SETNE:
7164       std::swap(TV, FV);
7165       LLVM_FALLTHROUGH;
7166     case ISD::SETEQ:
7167       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7168         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7169       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7170       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7171         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7172       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7173                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7174     case ISD::SETULT:
7175     case ISD::SETLT:
7176       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7177       LLVM_FALLTHROUGH;
7178     case ISD::SETOGE:
7179     case ISD::SETGE:
7180       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7181         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7182       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7183     case ISD::SETUGT:
7184     case ISD::SETGT:
7185       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7186       LLVM_FALLTHROUGH;
7187     case ISD::SETOLE:
7188     case ISD::SETLE:
7189       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7190         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7191       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7192                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7193     }
7194 
7195   SDValue Cmp;
7196   switch (CC) {
7197   default: break;       // SETUO etc aren't handled by fsel.
7198   case ISD::SETNE:
7199     std::swap(TV, FV);
7200     LLVM_FALLTHROUGH;
7201   case ISD::SETEQ:
7202     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7203     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7204       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7205     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7206     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7207       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7208     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7209                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7210   case ISD::SETULT:
7211   case ISD::SETLT:
7212     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7213     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7214       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7215     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7216   case ISD::SETOGE:
7217   case ISD::SETGE:
7218     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7219     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7220       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7221     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7222   case ISD::SETUGT:
7223   case ISD::SETGT:
7224     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7225     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7226       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7227     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7228   case ISD::SETOLE:
7229   case ISD::SETLE:
7230     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7231     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7232       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7233     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7234   }
7235   return Op;
7236 }
7237 
7238 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
7239                                                SelectionDAG &DAG,
7240                                                const SDLoc &dl) const {
7241   assert(Op.getOperand(0).getValueType().isFloatingPoint());
7242   SDValue Src = Op.getOperand(0);
7243   if (Src.getValueType() == MVT::f32)
7244     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7245 
7246   SDValue Tmp;
7247   switch (Op.getSimpleValueType().SimpleTy) {
7248   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7249   case MVT::i32:
7250     Tmp = DAG.getNode(
7251         Op.getOpcode() == ISD::FP_TO_SINT
7252             ? PPCISD::FCTIWZ
7253             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
7254         dl, MVT::f64, Src);
7255     break;
7256   case MVT::i64:
7257     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
7258            "i64 FP_TO_UINT is supported only with FPCVT");
7259     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
7260                                                         PPCISD::FCTIDUZ,
7261                       dl, MVT::f64, Src);
7262     break;
7263   }
7264 
7265   // Convert the FP value to an int value through memory.
7266   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
7267     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
7268   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
7269   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
7270   MachinePointerInfo MPI =
7271       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
7272 
7273   // Emit a store to the stack slot.
7274   SDValue Chain;
7275   if (i32Stack) {
7276     MachineFunction &MF = DAG.getMachineFunction();
7277     MachineMemOperand *MMO =
7278       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
7279     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
7280     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7281               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
7282   } else
7283     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
7284 
7285   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
7286   // add in a bias on big endian.
7287   if (Op.getValueType() == MVT::i32 && !i32Stack) {
7288     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
7289                         DAG.getConstant(4, dl, FIPtr.getValueType()));
7290     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
7291   }
7292 
7293   RLI.Chain = Chain;
7294   RLI.Ptr = FIPtr;
7295   RLI.MPI = MPI;
7296 }
7297 
7298 /// Custom lowers floating point to integer conversions to use
7299 /// the direct move instructions available in ISA 2.07 to avoid the
7300 /// need for load/store combinations.
7301 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
7302                                                     SelectionDAG &DAG,
7303                                                     const SDLoc &dl) const {
7304   assert(Op.getOperand(0).getValueType().isFloatingPoint());
7305   SDValue Src = Op.getOperand(0);
7306 
7307   if (Src.getValueType() == MVT::f32)
7308     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7309 
7310   SDValue Tmp;
7311   switch (Op.getSimpleValueType().SimpleTy) {
7312   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7313   case MVT::i32:
7314     Tmp = DAG.getNode(
7315         Op.getOpcode() == ISD::FP_TO_SINT
7316             ? PPCISD::FCTIWZ
7317             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
7318         dl, MVT::f64, Src);
7319     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
7320     break;
7321   case MVT::i64:
7322     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
7323            "i64 FP_TO_UINT is supported only with FPCVT");
7324     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
7325                                                         PPCISD::FCTIDUZ,
7326                       dl, MVT::f64, Src);
7327     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
7328     break;
7329   }
7330   return Tmp;
7331 }
7332 
7333 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
7334                                           const SDLoc &dl) const {
7335 
7336   // FP to INT conversions are legal for f128.
7337   if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128))
7338     return Op;
7339 
7340   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7341   // PPC (the libcall is not available).
7342   if (Op.getOperand(0).getValueType() == MVT::ppcf128) {
7343     if (Op.getValueType() == MVT::i32) {
7344       if (Op.getOpcode() == ISD::FP_TO_SINT) {
7345         SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7346                                  MVT::f64, Op.getOperand(0),
7347                                  DAG.getIntPtrConstant(0, dl));
7348         SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7349                                  MVT::f64, Op.getOperand(0),
7350                                  DAG.getIntPtrConstant(1, dl));
7351 
7352         // Add the two halves of the long double in round-to-zero mode.
7353         SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7354 
7355         // Now use a smaller FP_TO_SINT.
7356         return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
7357       }
7358       if (Op.getOpcode() == ISD::FP_TO_UINT) {
7359         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
7360         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
7361         SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
7362         //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7363         // FIXME: generated code sucks.
7364         // TODO: Are there fast-math-flags to propagate to this FSUB?
7365         SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128,
7366                                    Op.getOperand(0), Tmp);
7367         True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
7368         True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
7369                            DAG.getConstant(0x80000000, dl, MVT::i32));
7370         SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
7371                                     Op.getOperand(0));
7372         return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False,
7373                                ISD::SETGE);
7374       }
7375     }
7376 
7377     return SDValue();
7378   }
7379 
7380   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
7381     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
7382 
7383   ReuseLoadInfo RLI;
7384   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7385 
7386   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7387                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7388 }
7389 
7390 // We're trying to insert a regular store, S, and then a load, L. If the
7391 // incoming value, O, is a load, we might just be able to have our load use the
7392 // address used by O. However, we don't know if anything else will store to
7393 // that address before we can load from it. To prevent this situation, we need
7394 // to insert our load, L, into the chain as a peer of O. To do this, we give L
7395 // the same chain operand as O, we create a token factor from the chain results
7396 // of O and L, and we replace all uses of O's chain result with that token
7397 // factor (see spliceIntoChain below for this last part).
7398 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
7399                                             ReuseLoadInfo &RLI,
7400                                             SelectionDAG &DAG,
7401                                             ISD::LoadExtType ET) const {
7402   SDLoc dl(Op);
7403   if (ET == ISD::NON_EXTLOAD &&
7404       (Op.getOpcode() == ISD::FP_TO_UINT ||
7405        Op.getOpcode() == ISD::FP_TO_SINT) &&
7406       isOperationLegalOrCustom(Op.getOpcode(),
7407                                Op.getOperand(0).getValueType())) {
7408 
7409     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7410     return true;
7411   }
7412 
7413   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
7414   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
7415       LD->isNonTemporal())
7416     return false;
7417   if (LD->getMemoryVT() != MemVT)
7418     return false;
7419 
7420   RLI.Ptr = LD->getBasePtr();
7421   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
7422     assert(LD->getAddressingMode() == ISD::PRE_INC &&
7423            "Non-pre-inc AM on PPC?");
7424     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
7425                           LD->getOffset());
7426   }
7427 
7428   RLI.Chain = LD->getChain();
7429   RLI.MPI = LD->getPointerInfo();
7430   RLI.IsDereferenceable = LD->isDereferenceable();
7431   RLI.IsInvariant = LD->isInvariant();
7432   RLI.Alignment = LD->getAlignment();
7433   RLI.AAInfo = LD->getAAInfo();
7434   RLI.Ranges = LD->getRanges();
7435 
7436   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
7437   return true;
7438 }
7439 
7440 // Given the head of the old chain, ResChain, insert a token factor containing
7441 // it and NewResChain, and make users of ResChain now be users of that token
7442 // factor.
7443 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7444 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
7445                                         SDValue NewResChain,
7446                                         SelectionDAG &DAG) const {
7447   if (!ResChain)
7448     return;
7449 
7450   SDLoc dl(NewResChain);
7451 
7452   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
7453                            NewResChain, DAG.getUNDEF(MVT::Other));
7454   assert(TF.getNode() != NewResChain.getNode() &&
7455          "A new TF really is required here");
7456 
7457   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
7458   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
7459 }
7460 
7461 /// Analyze profitability of direct move
7462 /// prefer float load to int load plus direct move
7463 /// when there is no integer use of int load
7464 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
7465   SDNode *Origin = Op.getOperand(0).getNode();
7466   if (Origin->getOpcode() != ISD::LOAD)
7467     return true;
7468 
7469   // If there is no LXSIBZX/LXSIHZX, like Power8,
7470   // prefer direct move if the memory size is 1 or 2 bytes.
7471   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
7472   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
7473     return true;
7474 
7475   for (SDNode::use_iterator UI = Origin->use_begin(),
7476                             UE = Origin->use_end();
7477        UI != UE; ++UI) {
7478 
7479     // Only look at the users of the loaded value.
7480     if (UI.getUse().get().getResNo() != 0)
7481       continue;
7482 
7483     if (UI->getOpcode() != ISD::SINT_TO_FP &&
7484         UI->getOpcode() != ISD::UINT_TO_FP)
7485       return true;
7486   }
7487 
7488   return false;
7489 }
7490 
7491 /// Custom lowers integer to floating point conversions to use
7492 /// the direct move instructions available in ISA 2.07 to avoid the
7493 /// need for load/store combinations.
7494 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
7495                                                     SelectionDAG &DAG,
7496                                                     const SDLoc &dl) const {
7497   assert((Op.getValueType() == MVT::f32 ||
7498           Op.getValueType() == MVT::f64) &&
7499          "Invalid floating point type as target of conversion");
7500   assert(Subtarget.hasFPCVT() &&
7501          "Int to FP conversions with direct moves require FPCVT");
7502   SDValue FP;
7503   SDValue Src = Op.getOperand(0);
7504   bool SinglePrec = Op.getValueType() == MVT::f32;
7505   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
7506   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
7507   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
7508                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
7509 
7510   if (WordInt) {
7511     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
7512                      dl, MVT::f64, Src);
7513     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
7514   }
7515   else {
7516     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
7517     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
7518   }
7519 
7520   return FP;
7521 }
7522 
7523 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
7524 
7525   EVT VecVT = Vec.getValueType();
7526   assert(VecVT.isVector() && "Expected a vector type.");
7527   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
7528 
7529   EVT EltVT = VecVT.getVectorElementType();
7530   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7531   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7532 
7533   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
7534   SmallVector<SDValue, 16> Ops(NumConcat);
7535   Ops[0] = Vec;
7536   SDValue UndefVec = DAG.getUNDEF(VecVT);
7537   for (unsigned i = 1; i < NumConcat; ++i)
7538     Ops[i] = UndefVec;
7539 
7540   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
7541 }
7542 
7543 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
7544                                                 const SDLoc &dl) const {
7545 
7546   unsigned Opc = Op.getOpcode();
7547   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
7548          "Unexpected conversion type");
7549   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
7550          "Supports conversions to v2f64/v4f32 only.");
7551 
7552   bool SignedConv = Opc == ISD::SINT_TO_FP;
7553   bool FourEltRes = Op.getValueType() == MVT::v4f32;
7554 
7555   SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
7556   EVT WideVT = Wide.getValueType();
7557   unsigned WideNumElts = WideVT.getVectorNumElements();
7558   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
7559 
7560   SmallVector<int, 16> ShuffV;
7561   for (unsigned i = 0; i < WideNumElts; ++i)
7562     ShuffV.push_back(i + WideNumElts);
7563 
7564   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
7565   int SaveElts = FourEltRes ? 4 : 2;
7566   if (Subtarget.isLittleEndian())
7567     for (int i = 0; i < SaveElts; i++)
7568       ShuffV[i * Stride] = i;
7569   else
7570     for (int i = 1; i <= SaveElts; i++)
7571       ShuffV[i * Stride - 1] = i - 1;
7572 
7573   SDValue ShuffleSrc2 =
7574       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
7575   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
7576   unsigned ExtendOp =
7577       SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
7578 
7579   SDValue Extend;
7580   if (!Subtarget.hasP9Altivec() && SignedConv) {
7581     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
7582     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
7583                          DAG.getValueType(Op.getOperand(0).getValueType()));
7584   } else
7585     Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
7586 
7587   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
7588 }
7589 
7590 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
7591                                           SelectionDAG &DAG) const {
7592   SDLoc dl(Op);
7593 
7594   EVT InVT = Op.getOperand(0).getValueType();
7595   EVT OutVT = Op.getValueType();
7596   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
7597       isOperationCustom(Op.getOpcode(), InVT))
7598     return LowerINT_TO_FPVector(Op, DAG, dl);
7599 
7600   // Conversions to f128 are legal.
7601   if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
7602     return Op;
7603 
7604   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
7605     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
7606       return SDValue();
7607 
7608     SDValue Value = Op.getOperand(0);
7609     // The values are now known to be -1 (false) or 1 (true). To convert this
7610     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7611     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7612     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7613 
7614     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
7615 
7616     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7617 
7618     if (Op.getValueType() != MVT::v4f64)
7619       Value = DAG.getNode(ISD::FP_ROUND, dl,
7620                           Op.getValueType(), Value,
7621                           DAG.getIntPtrConstant(1, dl));
7622     return Value;
7623   }
7624 
7625   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7626   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
7627     return SDValue();
7628 
7629   if (Op.getOperand(0).getValueType() == MVT::i1)
7630     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
7631                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
7632                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
7633 
7634   // If we have direct moves, we can do all the conversion, skip the store/load
7635   // however, without FPCVT we can't do most conversions.
7636   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
7637       Subtarget.isPPC64() && Subtarget.hasFPCVT())
7638     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
7639 
7640   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
7641          "UINT_TO_FP is supported only with FPCVT");
7642 
7643   // If we have FCFIDS, then use it when converting to single-precision.
7644   // Otherwise, convert to double-precision and then round.
7645   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7646                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
7647                                                             : PPCISD::FCFIDS)
7648                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
7649                                                             : PPCISD::FCFID);
7650   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7651                   ? MVT::f32
7652                   : MVT::f64;
7653 
7654   if (Op.getOperand(0).getValueType() == MVT::i64) {
7655     SDValue SINT = Op.getOperand(0);
7656     // When converting to single-precision, we actually need to convert
7657     // to double-precision first and then round to single-precision.
7658     // To avoid double-rounding effects during that operation, we have
7659     // to prepare the input operand.  Bits that might be truncated when
7660     // converting to double-precision are replaced by a bit that won't
7661     // be lost at this stage, but is below the single-precision rounding
7662     // position.
7663     //
7664     // However, if -enable-unsafe-fp-math is in effect, accept double
7665     // rounding to avoid the extra overhead.
7666     if (Op.getValueType() == MVT::f32 &&
7667         !Subtarget.hasFPCVT() &&
7668         !DAG.getTarget().Options.UnsafeFPMath) {
7669 
7670       // Twiddle input to make sure the low 11 bits are zero.  (If this
7671       // is the case, we are guaranteed the value will fit into the 53 bit
7672       // mantissa of an IEEE double-precision value without rounding.)
7673       // If any of those low 11 bits were not zero originally, make sure
7674       // bit 12 (value 2048) is set instead, so that the final rounding
7675       // to single-precision gets the correct result.
7676       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7677                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
7678       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
7679                           Round, DAG.getConstant(2047, dl, MVT::i64));
7680       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
7681       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7682                           Round, DAG.getConstant(-2048, dl, MVT::i64));
7683 
7684       // However, we cannot use that value unconditionally: if the magnitude
7685       // of the input value is small, the bit-twiddling we did above might
7686       // end up visibly changing the output.  Fortunately, in that case, we
7687       // don't need to twiddle bits since the original input will convert
7688       // exactly to double-precision floating-point already.  Therefore,
7689       // construct a conditional to use the original value if the top 11
7690       // bits are all sign-bit copies, and use the rounded value computed
7691       // above otherwise.
7692       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
7693                                  SINT, DAG.getConstant(53, dl, MVT::i32));
7694       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
7695                          Cond, DAG.getConstant(1, dl, MVT::i64));
7696       Cond = DAG.getSetCC(dl, MVT::i32,
7697                           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
7698 
7699       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
7700     }
7701 
7702     ReuseLoadInfo RLI;
7703     SDValue Bits;
7704 
7705     MachineFunction &MF = DAG.getMachineFunction();
7706     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
7707       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7708                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7709       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7710     } else if (Subtarget.hasLFIWAX() &&
7711                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
7712       MachineMemOperand *MMO =
7713         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7714                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7715       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7716       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
7717                                      DAG.getVTList(MVT::f64, MVT::Other),
7718                                      Ops, MVT::i32, MMO);
7719       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7720     } else if (Subtarget.hasFPCVT() &&
7721                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
7722       MachineMemOperand *MMO =
7723         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7724                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7725       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7726       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
7727                                      DAG.getVTList(MVT::f64, MVT::Other),
7728                                      Ops, MVT::i32, MMO);
7729       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7730     } else if (((Subtarget.hasLFIWAX() &&
7731                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
7732                 (Subtarget.hasFPCVT() &&
7733                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
7734                SINT.getOperand(0).getValueType() == MVT::i32) {
7735       MachineFrameInfo &MFI = MF.getFrameInfo();
7736       EVT PtrVT = getPointerTy(DAG.getDataLayout());
7737 
7738       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7739       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7740 
7741       SDValue Store =
7742           DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
7743                        MachinePointerInfo::getFixedStack(
7744                            DAG.getMachineFunction(), FrameIdx));
7745 
7746       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7747              "Expected an i32 store");
7748 
7749       RLI.Ptr = FIdx;
7750       RLI.Chain = Store;
7751       RLI.MPI =
7752           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7753       RLI.Alignment = 4;
7754 
7755       MachineMemOperand *MMO =
7756         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7757                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7758       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7759       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
7760                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
7761                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
7762                                      Ops, MVT::i32, MMO);
7763     } else
7764       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
7765 
7766     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
7767 
7768     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7769       FP = DAG.getNode(ISD::FP_ROUND, dl,
7770                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
7771     return FP;
7772   }
7773 
7774   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
7775          "Unhandled INT_TO_FP type in custom expander!");
7776   // Since we only generate this in 64-bit mode, we can take advantage of
7777   // 64-bit registers.  In particular, sign extend the input value into the
7778   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7779   // then lfd it and fcfid it.
7780   MachineFunction &MF = DAG.getMachineFunction();
7781   MachineFrameInfo &MFI = MF.getFrameInfo();
7782   EVT PtrVT = getPointerTy(MF.getDataLayout());
7783 
7784   SDValue Ld;
7785   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
7786     ReuseLoadInfo RLI;
7787     bool ReusingLoad;
7788     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
7789                                             DAG))) {
7790       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7791       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7792 
7793       SDValue Store =
7794           DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7795                        MachinePointerInfo::getFixedStack(
7796                            DAG.getMachineFunction(), FrameIdx));
7797 
7798       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7799              "Expected an i32 store");
7800 
7801       RLI.Ptr = FIdx;
7802       RLI.Chain = Store;
7803       RLI.MPI =
7804           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7805       RLI.Alignment = 4;
7806     }
7807 
7808     MachineMemOperand *MMO =
7809       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7810                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7811     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7812     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
7813                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
7814                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
7815                                  Ops, MVT::i32, MMO);
7816     if (ReusingLoad)
7817       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
7818   } else {
7819     assert(Subtarget.isPPC64() &&
7820            "i32->FP without LFIWAX supported only on PPC64");
7821 
7822     int FrameIdx = MFI.CreateStackObject(8, 8, false);
7823     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7824 
7825     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
7826                                 Op.getOperand(0));
7827 
7828     // STD the extended value into the stack slot.
7829     SDValue Store = DAG.getStore(
7830         DAG.getEntryNode(), dl, Ext64, FIdx,
7831         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7832 
7833     // Load the value as a double.
7834     Ld = DAG.getLoad(
7835         MVT::f64, dl, Store, FIdx,
7836         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7837   }
7838 
7839   // FCFID it and return it.
7840   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
7841   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7842     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
7843                      DAG.getIntPtrConstant(0, dl));
7844   return FP;
7845 }
7846 
7847 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
7848                                             SelectionDAG &DAG) const {
7849   SDLoc dl(Op);
7850   /*
7851    The rounding mode is in bits 30:31 of FPSR, and has the following
7852    settings:
7853      00 Round to nearest
7854      01 Round to 0
7855      10 Round to +inf
7856      11 Round to -inf
7857 
7858   FLT_ROUNDS, on the other hand, expects the following:
7859     -1 Undefined
7860      0 Round to 0
7861      1 Round to nearest
7862      2 Round to +inf
7863      3 Round to -inf
7864 
7865   To perform the conversion, we do:
7866     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7867   */
7868 
7869   MachineFunction &MF = DAG.getMachineFunction();
7870   EVT VT = Op.getValueType();
7871   EVT PtrVT = getPointerTy(MF.getDataLayout());
7872 
7873   // Save FP Control Word to register
7874   EVT NodeTys[] = {
7875     MVT::f64,    // return register
7876     MVT::Glue    // unused in this context
7877   };
7878   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
7879 
7880   // Save FP register to stack slot
7881   int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
7882   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
7883   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
7884                                MachinePointerInfo());
7885 
7886   // Load FP Control Word from low 32 bits of stack slot.
7887   SDValue Four = DAG.getConstant(4, dl, PtrVT);
7888   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
7889   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
7890 
7891   // Transform as necessary
7892   SDValue CWD1 =
7893     DAG.getNode(ISD::AND, dl, MVT::i32,
7894                 CWD, DAG.getConstant(3, dl, MVT::i32));
7895   SDValue CWD2 =
7896     DAG.getNode(ISD::SRL, dl, MVT::i32,
7897                 DAG.getNode(ISD::AND, dl, MVT::i32,
7898                             DAG.getNode(ISD::XOR, dl, MVT::i32,
7899                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
7900                             DAG.getConstant(3, dl, MVT::i32)),
7901                 DAG.getConstant(1, dl, MVT::i32));
7902 
7903   SDValue RetVal =
7904     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
7905 
7906   return DAG.getNode((VT.getSizeInBits() < 16 ?
7907                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
7908 }
7909 
7910 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7911   EVT VT = Op.getValueType();
7912   unsigned BitWidth = VT.getSizeInBits();
7913   SDLoc dl(Op);
7914   assert(Op.getNumOperands() == 3 &&
7915          VT == Op.getOperand(1).getValueType() &&
7916          "Unexpected SHL!");
7917 
7918   // Expand into a bunch of logical ops.  Note that these ops
7919   // depend on the PPC behavior for oversized shift amounts.
7920   SDValue Lo = Op.getOperand(0);
7921   SDValue Hi = Op.getOperand(1);
7922   SDValue Amt = Op.getOperand(2);
7923   EVT AmtVT = Amt.getValueType();
7924 
7925   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7926                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7927   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
7928   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
7929   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
7930   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7931                              DAG.getConstant(-BitWidth, dl, AmtVT));
7932   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7933   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7934   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7935   SDValue OutOps[] = { OutLo, OutHi };
7936   return DAG.getMergeValues(OutOps, dl);
7937 }
7938 
7939 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7940   EVT VT = Op.getValueType();
7941   SDLoc dl(Op);
7942   unsigned BitWidth = VT.getSizeInBits();
7943   assert(Op.getNumOperands() == 3 &&
7944          VT == Op.getOperand(1).getValueType() &&
7945          "Unexpected SRL!");
7946 
7947   // Expand into a bunch of logical ops.  Note that these ops
7948   // depend on the PPC behavior for oversized shift amounts.
7949   SDValue Lo = Op.getOperand(0);
7950   SDValue Hi = Op.getOperand(1);
7951   SDValue Amt = Op.getOperand(2);
7952   EVT AmtVT = Amt.getValueType();
7953 
7954   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7955                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7956   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7957   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7958   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7959   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7960                              DAG.getConstant(-BitWidth, dl, AmtVT));
7961   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
7962   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7963   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
7964   SDValue OutOps[] = { OutLo, OutHi };
7965   return DAG.getMergeValues(OutOps, dl);
7966 }
7967 
7968 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
7969   SDLoc dl(Op);
7970   EVT VT = Op.getValueType();
7971   unsigned BitWidth = VT.getSizeInBits();
7972   assert(Op.getNumOperands() == 3 &&
7973          VT == Op.getOperand(1).getValueType() &&
7974          "Unexpected SRA!");
7975 
7976   // Expand into a bunch of logical ops, followed by a select_cc.
7977   SDValue Lo = Op.getOperand(0);
7978   SDValue Hi = Op.getOperand(1);
7979   SDValue Amt = Op.getOperand(2);
7980   EVT AmtVT = Amt.getValueType();
7981 
7982   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7983                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7984   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7985   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7986   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7987   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7988                              DAG.getConstant(-BitWidth, dl, AmtVT));
7989   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
7990   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
7991   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
7992                                   Tmp4, Tmp6, ISD::SETLE);
7993   SDValue OutOps[] = { OutLo, OutHi };
7994   return DAG.getMergeValues(OutOps, dl);
7995 }
7996 
7997 //===----------------------------------------------------------------------===//
7998 // Vector related lowering.
7999 //
8000 
8001 /// BuildSplatI - Build a canonical splati of Val with an element size of
8002 /// SplatSize.  Cast the result to VT.
8003 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
8004                            SelectionDAG &DAG, const SDLoc &dl) {
8005   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
8006 
8007   static const MVT VTys[] = { // canonical VT to use for each size.
8008     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8009   };
8010 
8011   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8012 
8013   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
8014   if (Val == -1)
8015     SplatSize = 1;
8016 
8017   EVT CanonicalVT = VTys[SplatSize-1];
8018 
8019   // Build a canonical splat for this value.
8020   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8021 }
8022 
8023 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8024 /// specified intrinsic ID.
8025 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8026                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
8027   if (DestVT == MVT::Other) DestVT = Op.getValueType();
8028   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8029                      DAG.getConstant(IID, dl, MVT::i32), Op);
8030 }
8031 
8032 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8033 /// specified intrinsic ID.
8034 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8035                                 SelectionDAG &DAG, const SDLoc &dl,
8036                                 EVT DestVT = MVT::Other) {
8037   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8038   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8039                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8040 }
8041 
8042 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8043 /// specified intrinsic ID.
8044 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8045                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8046                                 EVT DestVT = MVT::Other) {
8047   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8048   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8049                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8050 }
8051 
8052 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8053 /// amount.  The result has the specified value type.
8054 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8055                            SelectionDAG &DAG, const SDLoc &dl) {
8056   // Force LHS/RHS to be the right type.
8057   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8058   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8059 
8060   int Ops[16];
8061   for (unsigned i = 0; i != 16; ++i)
8062     Ops[i] = i + Amt;
8063   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8064   return DAG.getNode(ISD::BITCAST, dl, VT, T);
8065 }
8066 
8067 /// Do we have an efficient pattern in a .td file for this node?
8068 ///
8069 /// \param V - pointer to the BuildVectorSDNode being matched
8070 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8071 ///
8072 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8073 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8074 /// the opposite is true (expansion is beneficial) are:
8075 /// - The node builds a vector out of integers that are not 32 or 64-bits
8076 /// - The node builds a vector out of constants
8077 /// - The node is a "load-and-splat"
8078 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8079 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
8080                                             bool HasDirectMove,
8081                                             bool HasP8Vector) {
8082   EVT VecVT = V->getValueType(0);
8083   bool RightType = VecVT == MVT::v2f64 ||
8084     (HasP8Vector && VecVT == MVT::v4f32) ||
8085     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8086   if (!RightType)
8087     return false;
8088 
8089   bool IsSplat = true;
8090   bool IsLoad = false;
8091   SDValue Op0 = V->getOperand(0);
8092 
8093   // This function is called in a block that confirms the node is not a constant
8094   // splat. So a constant BUILD_VECTOR here means the vector is built out of
8095   // different constants.
8096   if (V->isConstant())
8097     return false;
8098   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8099     if (V->getOperand(i).isUndef())
8100       return false;
8101     // We want to expand nodes that represent load-and-splat even if the
8102     // loaded value is a floating point truncation or conversion to int.
8103     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8104         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8105          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8106         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8107          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8108         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8109          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
8110       IsLoad = true;
8111     // If the operands are different or the input is not a load and has more
8112     // uses than just this BV node, then it isn't a splat.
8113     if (V->getOperand(i) != Op0 ||
8114         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8115       IsSplat = false;
8116   }
8117   return !(IsSplat && IsLoad);
8118 }
8119 
8120 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8121 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8122 
8123   SDLoc dl(Op);
8124   SDValue Op0 = Op->getOperand(0);
8125 
8126   if (!EnableQuadPrecision ||
8127       (Op.getValueType() != MVT::f128 ) ||
8128       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8129       (Op0.getOperand(0).getValueType() !=  MVT::i64) ||
8130       (Op0.getOperand(1).getValueType() != MVT::i64))
8131     return SDValue();
8132 
8133   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8134                      Op0.getOperand(1));
8135 }
8136 
8137 // If this is a case we can't handle, return null and let the default
8138 // expansion code take care of it.  If we CAN select this case, and if it
8139 // selects to a single instruction, return Op.  Otherwise, if we can codegen
8140 // this case more efficiently than a constant pool load, lower it to the
8141 // sequence of ops that should be used.
8142 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
8143                                              SelectionDAG &DAG) const {
8144   SDLoc dl(Op);
8145   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
8146   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8147 
8148   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
8149     // We first build an i32 vector, load it into a QPX register,
8150     // then convert it to a floating-point vector and compare it
8151     // to a zero vector to get the boolean result.
8152     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8153     int FrameIdx = MFI.CreateStackObject(16, 16, false);
8154     MachinePointerInfo PtrInfo =
8155         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8156     EVT PtrVT = getPointerTy(DAG.getDataLayout());
8157     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8158 
8159     assert(BVN->getNumOperands() == 4 &&
8160       "BUILD_VECTOR for v4i1 does not have 4 operands");
8161 
8162     bool IsConst = true;
8163     for (unsigned i = 0; i < 4; ++i) {
8164       if (BVN->getOperand(i).isUndef()) continue;
8165       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
8166         IsConst = false;
8167         break;
8168       }
8169     }
8170 
8171     if (IsConst) {
8172       Constant *One =
8173         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
8174       Constant *NegOne =
8175         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
8176 
8177       Constant *CV[4];
8178       for (unsigned i = 0; i < 4; ++i) {
8179         if (BVN->getOperand(i).isUndef())
8180           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
8181         else if (isNullConstant(BVN->getOperand(i)))
8182           CV[i] = NegOne;
8183         else
8184           CV[i] = One;
8185       }
8186 
8187       Constant *CP = ConstantVector::get(CV);
8188       SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
8189                                           16 /* alignment */);
8190 
8191       SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
8192       SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
8193       return DAG.getMemIntrinsicNode(
8194           PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
8195           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
8196     }
8197 
8198     SmallVector<SDValue, 4> Stores;
8199     for (unsigned i = 0; i < 4; ++i) {
8200       if (BVN->getOperand(i).isUndef()) continue;
8201 
8202       unsigned Offset = 4*i;
8203       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8204       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8205 
8206       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
8207       if (StoreSize > 4) {
8208         Stores.push_back(
8209             DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
8210                               PtrInfo.getWithOffset(Offset), MVT::i32));
8211       } else {
8212         SDValue StoreValue = BVN->getOperand(i);
8213         if (StoreSize < 4)
8214           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
8215 
8216         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
8217                                       PtrInfo.getWithOffset(Offset)));
8218       }
8219     }
8220 
8221     SDValue StoreChain;
8222     if (!Stores.empty())
8223       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8224     else
8225       StoreChain = DAG.getEntryNode();
8226 
8227     // Now load from v4i32 into the QPX register; this will extend it to
8228     // v4i64 but not yet convert it to a floating point. Nevertheless, this
8229     // is typed as v4f64 because the QPX register integer states are not
8230     // explicitly represented.
8231 
8232     SDValue Ops[] = {StoreChain,
8233                      DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
8234                      FIdx};
8235     SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
8236 
8237     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
8238       dl, VTs, Ops, MVT::v4i32, PtrInfo);
8239     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8240       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
8241       LoadedVect);
8242 
8243     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
8244 
8245     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
8246   }
8247 
8248   // All other QPX vectors are handled by generic code.
8249   if (Subtarget.hasQPX())
8250     return SDValue();
8251 
8252   // Check if this is a splat of a constant value.
8253   APInt APSplatBits, APSplatUndef;
8254   unsigned SplatBitSize;
8255   bool HasAnyUndefs;
8256   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8257                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
8258       SplatBitSize > 32) {
8259     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
8260     // lowered to VSX instructions under certain conditions.
8261     // Without VSX, there is no pattern more efficient than expanding the node.
8262     if (Subtarget.hasVSX() &&
8263         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
8264                                         Subtarget.hasP8Vector()))
8265       return Op;
8266     return SDValue();
8267   }
8268 
8269   unsigned SplatBits = APSplatBits.getZExtValue();
8270   unsigned SplatUndef = APSplatUndef.getZExtValue();
8271   unsigned SplatSize = SplatBitSize / 8;
8272 
8273   // First, handle single instruction cases.
8274 
8275   // All zeros?
8276   if (SplatBits == 0) {
8277     // Canonicalize all zero vectors to be v4i32.
8278     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
8279       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
8280       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
8281     }
8282     return Op;
8283   }
8284 
8285   // We have XXSPLTIB for constant splats one byte wide
8286   if (Subtarget.hasP9Vector() && SplatSize == 1) {
8287     // This is a splat of 1-byte elements with some elements potentially undef.
8288     // Rather than trying to match undef in the SDAG patterns, ensure that all
8289     // elements are the same constant.
8290     if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
8291       SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
8292                                                        dl, MVT::i32));
8293       SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
8294       if (Op.getValueType() != MVT::v16i8)
8295         return DAG.getBitcast(Op.getValueType(), NewBV);
8296       return NewBV;
8297     }
8298 
8299     // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
8300     // detect that constant splats like v8i16: 0xABAB are really just splats
8301     // of a 1-byte constant. In this case, we need to convert the node to a
8302     // splat of v16i8 and a bitcast.
8303     if (Op.getValueType() != MVT::v16i8)
8304       return DAG.getBitcast(Op.getValueType(),
8305                             DAG.getConstant(SplatBits, dl, MVT::v16i8));
8306 
8307     return Op;
8308   }
8309 
8310   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8311   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
8312                     (32-SplatBitSize));
8313   if (SextVal >= -16 && SextVal <= 15)
8314     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
8315 
8316   // Two instruction sequences.
8317 
8318   // If this value is in the range [-32,30] and is even, use:
8319   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8320   // If this value is in the range [17,31] and is odd, use:
8321   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8322   // If this value is in the range [-31,-17] and is odd, use:
8323   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8324   // Note the last two are three-instruction sequences.
8325   if (SextVal >= -32 && SextVal <= 31) {
8326     // To avoid having these optimizations undone by constant folding,
8327     // we convert to a pseudo that will be expanded later into one of
8328     // the above forms.
8329     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
8330     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
8331               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
8332     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
8333     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
8334     if (VT == Op.getValueType())
8335       return RetVal;
8336     else
8337       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
8338   }
8339 
8340   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
8341   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
8342   // for fneg/fabs.
8343   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
8344     // Make -1 and vspltisw -1:
8345     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
8346 
8347     // Make the VSLW intrinsic, computing 0x8000_0000.
8348     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
8349                                    OnesV, DAG, dl);
8350 
8351     // xor by OnesV to invert it.
8352     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
8353     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8354   }
8355 
8356   // Check to see if this is a wide variety of vsplti*, binop self cases.
8357   static const signed char SplatCsts[] = {
8358     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8359     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8360   };
8361 
8362   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
8363     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8364     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
8365     int i = SplatCsts[idx];
8366 
8367     // Figure out what shift amount will be used by altivec if shifted by i in
8368     // this splat size.
8369     unsigned TypeShiftAmt = i & (SplatBitSize-1);
8370 
8371     // vsplti + shl self.
8372     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
8373       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8374       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8375         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
8376         Intrinsic::ppc_altivec_vslw
8377       };
8378       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8379       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8380     }
8381 
8382     // vsplti + srl self.
8383     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8384       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8385       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8386         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
8387         Intrinsic::ppc_altivec_vsrw
8388       };
8389       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8390       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8391     }
8392 
8393     // vsplti + sra self.
8394     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8395       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8396       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8397         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
8398         Intrinsic::ppc_altivec_vsraw
8399       };
8400       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8401       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8402     }
8403 
8404     // vsplti + rol self.
8405     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
8406                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
8407       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8408       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8409         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
8410         Intrinsic::ppc_altivec_vrlw
8411       };
8412       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8413       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8414     }
8415 
8416     // t = vsplti c, result = vsldoi t, t, 1
8417     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
8418       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8419       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
8420       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8421     }
8422     // t = vsplti c, result = vsldoi t, t, 2
8423     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
8424       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8425       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
8426       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8427     }
8428     // t = vsplti c, result = vsldoi t, t, 3
8429     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
8430       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8431       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
8432       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8433     }
8434   }
8435 
8436   return SDValue();
8437 }
8438 
8439 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8440 /// the specified operations to build the shuffle.
8441 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8442                                       SDValue RHS, SelectionDAG &DAG,
8443                                       const SDLoc &dl) {
8444   unsigned OpNum = (PFEntry >> 26) & 0x0F;
8445   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8446   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
8447 
8448   enum {
8449     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8450     OP_VMRGHW,
8451     OP_VMRGLW,
8452     OP_VSPLTISW0,
8453     OP_VSPLTISW1,
8454     OP_VSPLTISW2,
8455     OP_VSPLTISW3,
8456     OP_VSLDOI4,
8457     OP_VSLDOI8,
8458     OP_VSLDOI12
8459   };
8460 
8461   if (OpNum == OP_COPY) {
8462     if (LHSID == (1*9+2)*9+3) return LHS;
8463     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8464     return RHS;
8465   }
8466 
8467   SDValue OpLHS, OpRHS;
8468   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8469   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8470 
8471   int ShufIdxs[16];
8472   switch (OpNum) {
8473   default: llvm_unreachable("Unknown i32 permute!");
8474   case OP_VMRGHW:
8475     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
8476     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
8477     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
8478     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
8479     break;
8480   case OP_VMRGLW:
8481     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
8482     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
8483     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
8484     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
8485     break;
8486   case OP_VSPLTISW0:
8487     for (unsigned i = 0; i != 16; ++i)
8488       ShufIdxs[i] = (i&3)+0;
8489     break;
8490   case OP_VSPLTISW1:
8491     for (unsigned i = 0; i != 16; ++i)
8492       ShufIdxs[i] = (i&3)+4;
8493     break;
8494   case OP_VSPLTISW2:
8495     for (unsigned i = 0; i != 16; ++i)
8496       ShufIdxs[i] = (i&3)+8;
8497     break;
8498   case OP_VSPLTISW3:
8499     for (unsigned i = 0; i != 16; ++i)
8500       ShufIdxs[i] = (i&3)+12;
8501     break;
8502   case OP_VSLDOI4:
8503     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
8504   case OP_VSLDOI8:
8505     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
8506   case OP_VSLDOI12:
8507     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
8508   }
8509   EVT VT = OpLHS.getValueType();
8510   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
8511   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
8512   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
8513   return DAG.getNode(ISD::BITCAST, dl, VT, T);
8514 }
8515 
8516 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8517 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8518 /// SDValue.
8519 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
8520                                            SelectionDAG &DAG) const {
8521   const unsigned BytesInVector = 16;
8522   bool IsLE = Subtarget.isLittleEndian();
8523   SDLoc dl(N);
8524   SDValue V1 = N->getOperand(0);
8525   SDValue V2 = N->getOperand(1);
8526   unsigned ShiftElts = 0, InsertAtByte = 0;
8527   bool Swap = false;
8528 
8529   // Shifts required to get the byte we want at element 7.
8530   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
8531                                    0, 15, 14, 13, 12, 11, 10, 9};
8532   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
8533                                 1, 2,  3,  4,  5,  6,  7,  8};
8534 
8535   ArrayRef<int> Mask = N->getMask();
8536   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8537 
8538   // For each mask element, find out if we're just inserting something
8539   // from V2 into V1 or vice versa.
8540   // Possible permutations inserting an element from V2 into V1:
8541   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8542   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8543   //   ...
8544   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8545   // Inserting from V1 into V2 will be similar, except mask range will be
8546   // [16,31].
8547 
8548   bool FoundCandidate = false;
8549   // If both vector operands for the shuffle are the same vector, the mask
8550   // will contain only elements from the first one and the second one will be
8551   // undef.
8552   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
8553   // Go through the mask of half-words to find an element that's being moved
8554   // from one vector to the other.
8555   for (unsigned i = 0; i < BytesInVector; ++i) {
8556     unsigned CurrentElement = Mask[i];
8557     // If 2nd operand is undefined, we should only look for element 7 in the
8558     // Mask.
8559     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
8560       continue;
8561 
8562     bool OtherElementsInOrder = true;
8563     // Examine the other elements in the Mask to see if they're in original
8564     // order.
8565     for (unsigned j = 0; j < BytesInVector; ++j) {
8566       if (j == i)
8567         continue;
8568       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8569       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
8570       // in which we always assume we're always picking from the 1st operand.
8571       int MaskOffset =
8572           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
8573       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
8574         OtherElementsInOrder = false;
8575         break;
8576       }
8577     }
8578     // If other elements are in original order, we record the number of shifts
8579     // we need to get the element we want into element 7. Also record which byte
8580     // in the vector we should insert into.
8581     if (OtherElementsInOrder) {
8582       // If 2nd operand is undefined, we assume no shifts and no swapping.
8583       if (V2.isUndef()) {
8584         ShiftElts = 0;
8585         Swap = false;
8586       } else {
8587         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
8588         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
8589                          : BigEndianShifts[CurrentElement & 0xF];
8590         Swap = CurrentElement < BytesInVector;
8591       }
8592       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
8593       FoundCandidate = true;
8594       break;
8595     }
8596   }
8597 
8598   if (!FoundCandidate)
8599     return SDValue();
8600 
8601   // Candidate found, construct the proper SDAG sequence with VINSERTB,
8602   // optionally with VECSHL if shift is required.
8603   if (Swap)
8604     std::swap(V1, V2);
8605   if (V2.isUndef())
8606     V2 = V1;
8607   if (ShiftElts) {
8608     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8609                               DAG.getConstant(ShiftElts, dl, MVT::i32));
8610     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
8611                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
8612   }
8613   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
8614                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
8615 }
8616 
8617 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
8618 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
8619 /// SDValue.
8620 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
8621                                            SelectionDAG &DAG) const {
8622   const unsigned NumHalfWords = 8;
8623   const unsigned BytesInVector = NumHalfWords * 2;
8624   // Check that the shuffle is on half-words.
8625   if (!isNByteElemShuffleMask(N, 2, 1))
8626     return SDValue();
8627 
8628   bool IsLE = Subtarget.isLittleEndian();
8629   SDLoc dl(N);
8630   SDValue V1 = N->getOperand(0);
8631   SDValue V2 = N->getOperand(1);
8632   unsigned ShiftElts = 0, InsertAtByte = 0;
8633   bool Swap = false;
8634 
8635   // Shifts required to get the half-word we want at element 3.
8636   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
8637   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
8638 
8639   uint32_t Mask = 0;
8640   uint32_t OriginalOrderLow = 0x1234567;
8641   uint32_t OriginalOrderHigh = 0x89ABCDEF;
8642   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
8643   // 32-bit space, only need 4-bit nibbles per element.
8644   for (unsigned i = 0; i < NumHalfWords; ++i) {
8645     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8646     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
8647   }
8648 
8649   // For each mask element, find out if we're just inserting something
8650   // from V2 into V1 or vice versa.  Possible permutations inserting an element
8651   // from V2 into V1:
8652   //   X, 1, 2, 3, 4, 5, 6, 7
8653   //   0, X, 2, 3, 4, 5, 6, 7
8654   //   0, 1, X, 3, 4, 5, 6, 7
8655   //   0, 1, 2, X, 4, 5, 6, 7
8656   //   0, 1, 2, 3, X, 5, 6, 7
8657   //   0, 1, 2, 3, 4, X, 6, 7
8658   //   0, 1, 2, 3, 4, 5, X, 7
8659   //   0, 1, 2, 3, 4, 5, 6, X
8660   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
8661 
8662   bool FoundCandidate = false;
8663   // Go through the mask of half-words to find an element that's being moved
8664   // from one vector to the other.
8665   for (unsigned i = 0; i < NumHalfWords; ++i) {
8666     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8667     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
8668     uint32_t MaskOtherElts = ~(0xF << MaskShift);
8669     uint32_t TargetOrder = 0x0;
8670 
8671     // If both vector operands for the shuffle are the same vector, the mask
8672     // will contain only elements from the first one and the second one will be
8673     // undef.
8674     if (V2.isUndef()) {
8675       ShiftElts = 0;
8676       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
8677       TargetOrder = OriginalOrderLow;
8678       Swap = false;
8679       // Skip if not the correct element or mask of other elements don't equal
8680       // to our expected order.
8681       if (MaskOneElt == VINSERTHSrcElem &&
8682           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
8683         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
8684         FoundCandidate = true;
8685         break;
8686       }
8687     } else { // If both operands are defined.
8688       // Target order is [8,15] if the current mask is between [0,7].
8689       TargetOrder =
8690           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
8691       // Skip if mask of other elements don't equal our expected order.
8692       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
8693         // We only need the last 3 bits for the number of shifts.
8694         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
8695                          : BigEndianShifts[MaskOneElt & 0x7];
8696         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
8697         Swap = MaskOneElt < NumHalfWords;
8698         FoundCandidate = true;
8699         break;
8700       }
8701     }
8702   }
8703 
8704   if (!FoundCandidate)
8705     return SDValue();
8706 
8707   // Candidate found, construct the proper SDAG sequence with VINSERTH,
8708   // optionally with VECSHL if shift is required.
8709   if (Swap)
8710     std::swap(V1, V2);
8711   if (V2.isUndef())
8712     V2 = V1;
8713   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8714   if (ShiftElts) {
8715     // Double ShiftElts because we're left shifting on v16i8 type.
8716     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8717                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
8718     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
8719     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8720                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
8721     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8722   }
8723   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
8724   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8725                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
8726   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8727 }
8728 
8729 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
8730 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
8731 /// return the code it can be lowered into.  Worst case, it can always be
8732 /// lowered into a vperm.
8733 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
8734                                                SelectionDAG &DAG) const {
8735   SDLoc dl(Op);
8736   SDValue V1 = Op.getOperand(0);
8737   SDValue V2 = Op.getOperand(1);
8738   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
8739   EVT VT = Op.getValueType();
8740   bool isLittleEndian = Subtarget.isLittleEndian();
8741 
8742   unsigned ShiftElts, InsertAtByte;
8743   bool Swap = false;
8744   if (Subtarget.hasP9Vector() &&
8745       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
8746                            isLittleEndian)) {
8747     if (Swap)
8748       std::swap(V1, V2);
8749     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8750     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
8751     if (ShiftElts) {
8752       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
8753                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
8754       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
8755                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8756       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8757     }
8758     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
8759                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
8760     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8761   }
8762 
8763   if (Subtarget.hasP9Altivec()) {
8764     SDValue NewISDNode;
8765     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
8766       return NewISDNode;
8767 
8768     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
8769       return NewISDNode;
8770   }
8771 
8772   if (Subtarget.hasVSX() &&
8773       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
8774     if (Swap)
8775       std::swap(V1, V2);
8776     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8777     SDValue Conv2 =
8778         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
8779 
8780     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
8781                               DAG.getConstant(ShiftElts, dl, MVT::i32));
8782     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
8783   }
8784 
8785   if (Subtarget.hasVSX() &&
8786     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
8787     if (Swap)
8788       std::swap(V1, V2);
8789     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8790     SDValue Conv2 =
8791         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
8792 
8793     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
8794                               DAG.getConstant(ShiftElts, dl, MVT::i32));
8795     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
8796   }
8797 
8798   if (Subtarget.hasP9Vector()) {
8799      if (PPC::isXXBRHShuffleMask(SVOp)) {
8800       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8801       SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
8802       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
8803     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
8804       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8805       SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
8806       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
8807     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
8808       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8809       SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
8810       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
8811     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
8812       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
8813       SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
8814       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
8815     }
8816   }
8817 
8818   if (Subtarget.hasVSX()) {
8819     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
8820       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
8821 
8822       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8823       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
8824                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
8825       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
8826     }
8827 
8828     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
8829     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
8830       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
8831       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
8832       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
8833     }
8834   }
8835 
8836   if (Subtarget.hasQPX()) {
8837     if (VT.getVectorNumElements() != 4)
8838       return SDValue();
8839 
8840     if (V2.isUndef()) V2 = V1;
8841 
8842     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
8843     if (AlignIdx != -1) {
8844       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
8845                          DAG.getConstant(AlignIdx, dl, MVT::i32));
8846     } else if (SVOp->isSplat()) {
8847       int SplatIdx = SVOp->getSplatIndex();
8848       if (SplatIdx >= 4) {
8849         std::swap(V1, V2);
8850         SplatIdx -= 4;
8851       }
8852 
8853       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
8854                          DAG.getConstant(SplatIdx, dl, MVT::i32));
8855     }
8856 
8857     // Lower this into a qvgpci/qvfperm pair.
8858 
8859     // Compute the qvgpci literal
8860     unsigned idx = 0;
8861     for (unsigned i = 0; i < 4; ++i) {
8862       int m = SVOp->getMaskElt(i);
8863       unsigned mm = m >= 0 ? (unsigned) m : i;
8864       idx |= mm << (3-i)*3;
8865     }
8866 
8867     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
8868                              DAG.getConstant(idx, dl, MVT::i32));
8869     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
8870   }
8871 
8872   // Cases that are handled by instructions that take permute immediates
8873   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8874   // selected by the instruction selector.
8875   if (V2.isUndef()) {
8876     if (PPC::isSplatShuffleMask(SVOp, 1) ||
8877         PPC::isSplatShuffleMask(SVOp, 2) ||
8878         PPC::isSplatShuffleMask(SVOp, 4) ||
8879         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
8880         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
8881         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
8882         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
8883         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
8884         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
8885         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
8886         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
8887         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
8888         (Subtarget.hasP8Altivec() && (
8889          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
8890          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
8891          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
8892       return Op;
8893     }
8894   }
8895 
8896   // Altivec has a variety of "shuffle immediates" that take two vector inputs
8897   // and produce a fixed permutation.  If any of these match, do not lower to
8898   // VPERM.
8899   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
8900   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8901       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8902       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
8903       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8904       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8905       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8906       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8907       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8908       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8909       (Subtarget.hasP8Altivec() && (
8910        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8911        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
8912        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
8913     return Op;
8914 
8915   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
8916   // perfect shuffle table to emit an optimal matching sequence.
8917   ArrayRef<int> PermMask = SVOp->getMask();
8918 
8919   unsigned PFIndexes[4];
8920   bool isFourElementShuffle = true;
8921   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
8922     unsigned EltNo = 8;   // Start out undef.
8923     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
8924       if (PermMask[i*4+j] < 0)
8925         continue;   // Undef, ignore it.
8926 
8927       unsigned ByteSource = PermMask[i*4+j];
8928       if ((ByteSource & 3) != j) {
8929         isFourElementShuffle = false;
8930         break;
8931       }
8932 
8933       if (EltNo == 8) {
8934         EltNo = ByteSource/4;
8935       } else if (EltNo != ByteSource/4) {
8936         isFourElementShuffle = false;
8937         break;
8938       }
8939     }
8940     PFIndexes[i] = EltNo;
8941   }
8942 
8943   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8944   // perfect shuffle vector to determine if it is cost effective to do this as
8945   // discrete instructions, or whether we should use a vperm.
8946   // For now, we skip this for little endian until such time as we have a
8947   // little-endian perfect shuffle table.
8948   if (isFourElementShuffle && !isLittleEndian) {
8949     // Compute the index in the perfect shuffle table.
8950     unsigned PFTableIndex =
8951       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8952 
8953     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8954     unsigned Cost  = (PFEntry >> 30);
8955 
8956     // Determining when to avoid vperm is tricky.  Many things affect the cost
8957     // of vperm, particularly how many times the perm mask needs to be computed.
8958     // For example, if the perm mask can be hoisted out of a loop or is already
8959     // used (perhaps because there are multiple permutes with the same shuffle
8960     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
8961     // the loop requires an extra register.
8962     //
8963     // As a compromise, we only emit discrete instructions if the shuffle can be
8964     // generated in 3 or fewer operations.  When we have loop information
8965     // available, if this block is within a loop, we should avoid using vperm
8966     // for 3-operation perms and use a constant pool load instead.
8967     if (Cost < 3)
8968       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8969   }
8970 
8971   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
8972   // vector that will get spilled to the constant pool.
8973   if (V2.isUndef()) V2 = V1;
8974 
8975   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
8976   // that it is in input element units, not in bytes.  Convert now.
8977 
8978   // For little endian, the order of the input vectors is reversed, and
8979   // the permutation mask is complemented with respect to 31.  This is
8980   // necessary to produce proper semantics with the big-endian-biased vperm
8981   // instruction.
8982   EVT EltVT = V1.getValueType().getVectorElementType();
8983   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
8984 
8985   SmallVector<SDValue, 16> ResultMask;
8986   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
8987     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
8988 
8989     for (unsigned j = 0; j != BytesPerElement; ++j)
8990       if (isLittleEndian)
8991         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
8992                                              dl, MVT::i32));
8993       else
8994         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
8995                                              MVT::i32));
8996   }
8997 
8998   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
8999   if (isLittleEndian)
9000     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9001                        V2, V1, VPermMask);
9002   else
9003     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9004                        V1, V2, VPermMask);
9005 }
9006 
9007 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9008 /// vector comparison.  If it is, return true and fill in Opc/isDot with
9009 /// information about the intrinsic.
9010 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
9011                                  bool &isDot, const PPCSubtarget &Subtarget) {
9012   unsigned IntrinsicID =
9013       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9014   CompareOpc = -1;
9015   isDot = false;
9016   switch (IntrinsicID) {
9017   default:
9018     return false;
9019   // Comparison predicates.
9020   case Intrinsic::ppc_altivec_vcmpbfp_p:
9021     CompareOpc = 966;
9022     isDot = true;
9023     break;
9024   case Intrinsic::ppc_altivec_vcmpeqfp_p:
9025     CompareOpc = 198;
9026     isDot = true;
9027     break;
9028   case Intrinsic::ppc_altivec_vcmpequb_p:
9029     CompareOpc = 6;
9030     isDot = true;
9031     break;
9032   case Intrinsic::ppc_altivec_vcmpequh_p:
9033     CompareOpc = 70;
9034     isDot = true;
9035     break;
9036   case Intrinsic::ppc_altivec_vcmpequw_p:
9037     CompareOpc = 134;
9038     isDot = true;
9039     break;
9040   case Intrinsic::ppc_altivec_vcmpequd_p:
9041     if (Subtarget.hasP8Altivec()) {
9042       CompareOpc = 199;
9043       isDot = true;
9044     } else
9045       return false;
9046     break;
9047   case Intrinsic::ppc_altivec_vcmpneb_p:
9048   case Intrinsic::ppc_altivec_vcmpneh_p:
9049   case Intrinsic::ppc_altivec_vcmpnew_p:
9050   case Intrinsic::ppc_altivec_vcmpnezb_p:
9051   case Intrinsic::ppc_altivec_vcmpnezh_p:
9052   case Intrinsic::ppc_altivec_vcmpnezw_p:
9053     if (Subtarget.hasP9Altivec()) {
9054       switch (IntrinsicID) {
9055       default:
9056         llvm_unreachable("Unknown comparison intrinsic.");
9057       case Intrinsic::ppc_altivec_vcmpneb_p:
9058         CompareOpc = 7;
9059         break;
9060       case Intrinsic::ppc_altivec_vcmpneh_p:
9061         CompareOpc = 71;
9062         break;
9063       case Intrinsic::ppc_altivec_vcmpnew_p:
9064         CompareOpc = 135;
9065         break;
9066       case Intrinsic::ppc_altivec_vcmpnezb_p:
9067         CompareOpc = 263;
9068         break;
9069       case Intrinsic::ppc_altivec_vcmpnezh_p:
9070         CompareOpc = 327;
9071         break;
9072       case Intrinsic::ppc_altivec_vcmpnezw_p:
9073         CompareOpc = 391;
9074         break;
9075       }
9076       isDot = true;
9077     } else
9078       return false;
9079     break;
9080   case Intrinsic::ppc_altivec_vcmpgefp_p:
9081     CompareOpc = 454;
9082     isDot = true;
9083     break;
9084   case Intrinsic::ppc_altivec_vcmpgtfp_p:
9085     CompareOpc = 710;
9086     isDot = true;
9087     break;
9088   case Intrinsic::ppc_altivec_vcmpgtsb_p:
9089     CompareOpc = 774;
9090     isDot = true;
9091     break;
9092   case Intrinsic::ppc_altivec_vcmpgtsh_p:
9093     CompareOpc = 838;
9094     isDot = true;
9095     break;
9096   case Intrinsic::ppc_altivec_vcmpgtsw_p:
9097     CompareOpc = 902;
9098     isDot = true;
9099     break;
9100   case Intrinsic::ppc_altivec_vcmpgtsd_p:
9101     if (Subtarget.hasP8Altivec()) {
9102       CompareOpc = 967;
9103       isDot = true;
9104     } else
9105       return false;
9106     break;
9107   case Intrinsic::ppc_altivec_vcmpgtub_p:
9108     CompareOpc = 518;
9109     isDot = true;
9110     break;
9111   case Intrinsic::ppc_altivec_vcmpgtuh_p:
9112     CompareOpc = 582;
9113     isDot = true;
9114     break;
9115   case Intrinsic::ppc_altivec_vcmpgtuw_p:
9116     CompareOpc = 646;
9117     isDot = true;
9118     break;
9119   case Intrinsic::ppc_altivec_vcmpgtud_p:
9120     if (Subtarget.hasP8Altivec()) {
9121       CompareOpc = 711;
9122       isDot = true;
9123     } else
9124       return false;
9125     break;
9126 
9127   // VSX predicate comparisons use the same infrastructure
9128   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9129   case Intrinsic::ppc_vsx_xvcmpgedp_p:
9130   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9131   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9132   case Intrinsic::ppc_vsx_xvcmpgesp_p:
9133   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9134     if (Subtarget.hasVSX()) {
9135       switch (IntrinsicID) {
9136       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9137         CompareOpc = 99;
9138         break;
9139       case Intrinsic::ppc_vsx_xvcmpgedp_p:
9140         CompareOpc = 115;
9141         break;
9142       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9143         CompareOpc = 107;
9144         break;
9145       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9146         CompareOpc = 67;
9147         break;
9148       case Intrinsic::ppc_vsx_xvcmpgesp_p:
9149         CompareOpc = 83;
9150         break;
9151       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9152         CompareOpc = 75;
9153         break;
9154       }
9155       isDot = true;
9156     } else
9157       return false;
9158     break;
9159 
9160   // Normal Comparisons.
9161   case Intrinsic::ppc_altivec_vcmpbfp:
9162     CompareOpc = 966;
9163     break;
9164   case Intrinsic::ppc_altivec_vcmpeqfp:
9165     CompareOpc = 198;
9166     break;
9167   case Intrinsic::ppc_altivec_vcmpequb:
9168     CompareOpc = 6;
9169     break;
9170   case Intrinsic::ppc_altivec_vcmpequh:
9171     CompareOpc = 70;
9172     break;
9173   case Intrinsic::ppc_altivec_vcmpequw:
9174     CompareOpc = 134;
9175     break;
9176   case Intrinsic::ppc_altivec_vcmpequd:
9177     if (Subtarget.hasP8Altivec())
9178       CompareOpc = 199;
9179     else
9180       return false;
9181     break;
9182   case Intrinsic::ppc_altivec_vcmpneb:
9183   case Intrinsic::ppc_altivec_vcmpneh:
9184   case Intrinsic::ppc_altivec_vcmpnew:
9185   case Intrinsic::ppc_altivec_vcmpnezb:
9186   case Intrinsic::ppc_altivec_vcmpnezh:
9187   case Intrinsic::ppc_altivec_vcmpnezw:
9188     if (Subtarget.hasP9Altivec())
9189       switch (IntrinsicID) {
9190       default:
9191         llvm_unreachable("Unknown comparison intrinsic.");
9192       case Intrinsic::ppc_altivec_vcmpneb:
9193         CompareOpc = 7;
9194         break;
9195       case Intrinsic::ppc_altivec_vcmpneh:
9196         CompareOpc = 71;
9197         break;
9198       case Intrinsic::ppc_altivec_vcmpnew:
9199         CompareOpc = 135;
9200         break;
9201       case Intrinsic::ppc_altivec_vcmpnezb:
9202         CompareOpc = 263;
9203         break;
9204       case Intrinsic::ppc_altivec_vcmpnezh:
9205         CompareOpc = 327;
9206         break;
9207       case Intrinsic::ppc_altivec_vcmpnezw:
9208         CompareOpc = 391;
9209         break;
9210       }
9211     else
9212       return false;
9213     break;
9214   case Intrinsic::ppc_altivec_vcmpgefp:
9215     CompareOpc = 454;
9216     break;
9217   case Intrinsic::ppc_altivec_vcmpgtfp:
9218     CompareOpc = 710;
9219     break;
9220   case Intrinsic::ppc_altivec_vcmpgtsb:
9221     CompareOpc = 774;
9222     break;
9223   case Intrinsic::ppc_altivec_vcmpgtsh:
9224     CompareOpc = 838;
9225     break;
9226   case Intrinsic::ppc_altivec_vcmpgtsw:
9227     CompareOpc = 902;
9228     break;
9229   case Intrinsic::ppc_altivec_vcmpgtsd:
9230     if (Subtarget.hasP8Altivec())
9231       CompareOpc = 967;
9232     else
9233       return false;
9234     break;
9235   case Intrinsic::ppc_altivec_vcmpgtub:
9236     CompareOpc = 518;
9237     break;
9238   case Intrinsic::ppc_altivec_vcmpgtuh:
9239     CompareOpc = 582;
9240     break;
9241   case Intrinsic::ppc_altivec_vcmpgtuw:
9242     CompareOpc = 646;
9243     break;
9244   case Intrinsic::ppc_altivec_vcmpgtud:
9245     if (Subtarget.hasP8Altivec())
9246       CompareOpc = 711;
9247     else
9248       return false;
9249     break;
9250   }
9251   return true;
9252 }
9253 
9254 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9255 /// lower, do it, otherwise return null.
9256 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9257                                                    SelectionDAG &DAG) const {
9258   unsigned IntrinsicID =
9259     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9260 
9261   SDLoc dl(Op);
9262 
9263   if (IntrinsicID == Intrinsic::thread_pointer) {
9264     // Reads the thread pointer register, used for __builtin_thread_pointer.
9265     if (Subtarget.isPPC64())
9266       return DAG.getRegister(PPC::X13, MVT::i64);
9267     return DAG.getRegister(PPC::R2, MVT::i32);
9268   }
9269 
9270   // If this is a lowered altivec predicate compare, CompareOpc is set to the
9271   // opcode number of the comparison.
9272   int CompareOpc;
9273   bool isDot;
9274   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
9275     return SDValue();    // Don't custom lower most intrinsics.
9276 
9277   // If this is a non-dot comparison, make the VCMP node and we are done.
9278   if (!isDot) {
9279     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
9280                               Op.getOperand(1), Op.getOperand(2),
9281                               DAG.getConstant(CompareOpc, dl, MVT::i32));
9282     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
9283   }
9284 
9285   // Create the PPCISD altivec 'dot' comparison node.
9286   SDValue Ops[] = {
9287     Op.getOperand(2),  // LHS
9288     Op.getOperand(3),  // RHS
9289     DAG.getConstant(CompareOpc, dl, MVT::i32)
9290   };
9291   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
9292   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
9293 
9294   // Now that we have the comparison, emit a copy from the CR to a GPR.
9295   // This is flagged to the above dot comparison.
9296   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
9297                                 DAG.getRegister(PPC::CR6, MVT::i32),
9298                                 CompNode.getValue(1));
9299 
9300   // Unpack the result based on how the target uses it.
9301   unsigned BitNo;   // Bit # of CR6.
9302   bool InvertBit;   // Invert result?
9303   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
9304   default:  // Can't happen, don't crash on invalid number though.
9305   case 0:   // Return the value of the EQ bit of CR6.
9306     BitNo = 0; InvertBit = false;
9307     break;
9308   case 1:   // Return the inverted value of the EQ bit of CR6.
9309     BitNo = 0; InvertBit = true;
9310     break;
9311   case 2:   // Return the value of the LT bit of CR6.
9312     BitNo = 2; InvertBit = false;
9313     break;
9314   case 3:   // Return the inverted value of the LT bit of CR6.
9315     BitNo = 2; InvertBit = true;
9316     break;
9317   }
9318 
9319   // Shift the bit into the low position.
9320   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
9321                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
9322   // Isolate the bit.
9323   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
9324                       DAG.getConstant(1, dl, MVT::i32));
9325 
9326   // If we are supposed to, toggle the bit.
9327   if (InvertBit)
9328     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
9329                         DAG.getConstant(1, dl, MVT::i32));
9330   return Flags;
9331 }
9332 
9333 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9334                                                SelectionDAG &DAG) const {
9335   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9336   // the beginning of the argument list.
9337   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
9338   SDLoc DL(Op);
9339   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
9340   case Intrinsic::ppc_cfence: {
9341     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
9342     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
9343     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
9344                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
9345                                                   Op.getOperand(ArgStart + 1)),
9346                                       Op.getOperand(0)),
9347                    0);
9348   }
9349   default:
9350     break;
9351   }
9352   return SDValue();
9353 }
9354 
9355 SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
9356   // Check for a DIV with the same operands as this REM.
9357   for (auto UI : Op.getOperand(1)->uses()) {
9358     if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
9359         (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
9360       if (UI->getOperand(0) == Op.getOperand(0) &&
9361           UI->getOperand(1) == Op.getOperand(1))
9362         return SDValue();
9363   }
9364   return Op;
9365 }
9366 
9367 // Lower scalar BSWAP64 to xxbrd.
9368 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
9369   SDLoc dl(Op);
9370   // MTVSRDD
9371   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
9372                    Op.getOperand(0));
9373   // XXBRD
9374   Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
9375   // MFVSRD
9376   int VectorIndex = 0;
9377   if (Subtarget.isLittleEndian())
9378     VectorIndex = 1;
9379   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
9380                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
9381   return Op;
9382 }
9383 
9384 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9385 // compared to a value that is atomically loaded (atomic loads zero-extend).
9386 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
9387                                                 SelectionDAG &DAG) const {
9388   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
9389          "Expecting an atomic compare-and-swap here.");
9390   SDLoc dl(Op);
9391   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
9392   EVT MemVT = AtomicNode->getMemoryVT();
9393   if (MemVT.getSizeInBits() >= 32)
9394     return Op;
9395 
9396   SDValue CmpOp = Op.getOperand(2);
9397   // If this is already correctly zero-extended, leave it alone.
9398   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
9399   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
9400     return Op;
9401 
9402   // Clear the high bits of the compare operand.
9403   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
9404   SDValue NewCmpOp =
9405     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
9406                 DAG.getConstant(MaskVal, dl, MVT::i32));
9407 
9408   // Replace the existing compare operand with the properly zero-extended one.
9409   SmallVector<SDValue, 4> Ops;
9410   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
9411     Ops.push_back(AtomicNode->getOperand(i));
9412   Ops[2] = NewCmpOp;
9413   MachineMemOperand *MMO = AtomicNode->getMemOperand();
9414   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
9415   auto NodeTy =
9416     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
9417   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
9418 }
9419 
9420 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
9421                                                  SelectionDAG &DAG) const {
9422   SDLoc dl(Op);
9423   // Create a stack slot that is 16-byte aligned.
9424   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9425   int FrameIdx = MFI.CreateStackObject(16, 16, false);
9426   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9427   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9428 
9429   // Store the input value into Value#0 of the stack slot.
9430   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
9431                                MachinePointerInfo());
9432   // Load it out.
9433   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
9434 }
9435 
9436 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9437                                                   SelectionDAG &DAG) const {
9438   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
9439          "Should only be called for ISD::INSERT_VECTOR_ELT");
9440 
9441   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
9442   // We have legal lowering for constant indices but not for variable ones.
9443   if (!C)
9444     return SDValue();
9445 
9446   EVT VT = Op.getValueType();
9447   SDLoc dl(Op);
9448   SDValue V1 = Op.getOperand(0);
9449   SDValue V2 = Op.getOperand(1);
9450   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
9451   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
9452     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
9453     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
9454     unsigned InsertAtElement = C->getZExtValue();
9455     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
9456     if (Subtarget.isLittleEndian()) {
9457       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
9458     }
9459     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
9460                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9461   }
9462   return Op;
9463 }
9464 
9465 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
9466                                                    SelectionDAG &DAG) const {
9467   SDLoc dl(Op);
9468   SDNode *N = Op.getNode();
9469 
9470   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
9471          "Unknown extract_vector_elt type");
9472 
9473   SDValue Value = N->getOperand(0);
9474 
9475   // The first part of this is like the store lowering except that we don't
9476   // need to track the chain.
9477 
9478   // The values are now known to be -1 (false) or 1 (true). To convert this
9479   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9480   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9481   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
9482 
9483   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9484   // understand how to form the extending load.
9485   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9486 
9487   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9488 
9489   // Now convert to an integer and store.
9490   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
9491     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
9492     Value);
9493 
9494   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9495   int FrameIdx = MFI.CreateStackObject(16, 16, false);
9496   MachinePointerInfo PtrInfo =
9497       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
9498   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9499   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9500 
9501   SDValue StoreChain = DAG.getEntryNode();
9502   SDValue Ops[] = {StoreChain,
9503                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
9504                    Value, FIdx};
9505   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9506 
9507   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
9508     dl, VTs, Ops, MVT::v4i32, PtrInfo);
9509 
9510   // Extract the value requested.
9511   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
9512   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9513   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9514 
9515   SDValue IntVal =
9516       DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
9517 
9518   if (!Subtarget.useCRBits())
9519     return IntVal;
9520 
9521   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
9522 }
9523 
9524 /// Lowering for QPX v4i1 loads
9525 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
9526                                            SelectionDAG &DAG) const {
9527   SDLoc dl(Op);
9528   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
9529   SDValue LoadChain = LN->getChain();
9530   SDValue BasePtr = LN->getBasePtr();
9531 
9532   if (Op.getValueType() == MVT::v4f64 ||
9533       Op.getValueType() == MVT::v4f32) {
9534     EVT MemVT = LN->getMemoryVT();
9535     unsigned Alignment = LN->getAlignment();
9536 
9537     // If this load is properly aligned, then it is legal.
9538     if (Alignment >= MemVT.getStoreSize())
9539       return Op;
9540 
9541     EVT ScalarVT = Op.getValueType().getScalarType(),
9542         ScalarMemVT = MemVT.getScalarType();
9543     unsigned Stride = ScalarMemVT.getStoreSize();
9544 
9545     SDValue Vals[4], LoadChains[4];
9546     for (unsigned Idx = 0; Idx < 4; ++Idx) {
9547       SDValue Load;
9548       if (ScalarVT != ScalarMemVT)
9549         Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
9550                               BasePtr,
9551                               LN->getPointerInfo().getWithOffset(Idx * Stride),
9552                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9553                               LN->getMemOperand()->getFlags(), LN->getAAInfo());
9554       else
9555         Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
9556                            LN->getPointerInfo().getWithOffset(Idx * Stride),
9557                            MinAlign(Alignment, Idx * Stride),
9558                            LN->getMemOperand()->getFlags(), LN->getAAInfo());
9559 
9560       if (Idx == 0 && LN->isIndexed()) {
9561         assert(LN->getAddressingMode() == ISD::PRE_INC &&
9562                "Unknown addressing mode on vector load");
9563         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
9564                                   LN->getAddressingMode());
9565       }
9566 
9567       Vals[Idx] = Load;
9568       LoadChains[Idx] = Load.getValue(1);
9569 
9570       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9571                             DAG.getConstant(Stride, dl,
9572                                             BasePtr.getValueType()));
9573     }
9574 
9575     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9576     SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
9577 
9578     if (LN->isIndexed()) {
9579       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
9580       return DAG.getMergeValues(RetOps, dl);
9581     }
9582 
9583     SDValue RetOps[] = { Value, TF };
9584     return DAG.getMergeValues(RetOps, dl);
9585   }
9586 
9587   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
9588   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
9589 
9590   // To lower v4i1 from a byte array, we load the byte elements of the
9591   // vector and then reuse the BUILD_VECTOR logic.
9592 
9593   SDValue VectElmts[4], VectElmtChains[4];
9594   for (unsigned i = 0; i < 4; ++i) {
9595     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9596     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9597 
9598     VectElmts[i] = DAG.getExtLoad(
9599         ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
9600         LN->getPointerInfo().getWithOffset(i), MVT::i8,
9601         /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
9602     VectElmtChains[i] = VectElmts[i].getValue(1);
9603   }
9604 
9605   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
9606   SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
9607 
9608   SDValue RVals[] = { Value, LoadChain };
9609   return DAG.getMergeValues(RVals, dl);
9610 }
9611 
9612 /// Lowering for QPX v4i1 stores
9613 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
9614                                             SelectionDAG &DAG) const {
9615   SDLoc dl(Op);
9616   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
9617   SDValue StoreChain = SN->getChain();
9618   SDValue BasePtr = SN->getBasePtr();
9619   SDValue Value = SN->getValue();
9620 
9621   if (Value.getValueType() == MVT::v4f64 ||
9622       Value.getValueType() == MVT::v4f32) {
9623     EVT MemVT = SN->getMemoryVT();
9624     unsigned Alignment = SN->getAlignment();
9625 
9626     // If this store is properly aligned, then it is legal.
9627     if (Alignment >= MemVT.getStoreSize())
9628       return Op;
9629 
9630     EVT ScalarVT = Value.getValueType().getScalarType(),
9631         ScalarMemVT = MemVT.getScalarType();
9632     unsigned Stride = ScalarMemVT.getStoreSize();
9633 
9634     SDValue Stores[4];
9635     for (unsigned Idx = 0; Idx < 4; ++Idx) {
9636       SDValue Ex = DAG.getNode(
9637           ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
9638           DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
9639       SDValue Store;
9640       if (ScalarVT != ScalarMemVT)
9641         Store =
9642             DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
9643                               SN->getPointerInfo().getWithOffset(Idx * Stride),
9644                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9645                               SN->getMemOperand()->getFlags(), SN->getAAInfo());
9646       else
9647         Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
9648                              SN->getPointerInfo().getWithOffset(Idx * Stride),
9649                              MinAlign(Alignment, Idx * Stride),
9650                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
9651 
9652       if (Idx == 0 && SN->isIndexed()) {
9653         assert(SN->getAddressingMode() == ISD::PRE_INC &&
9654                "Unknown addressing mode on vector store");
9655         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
9656                                     SN->getAddressingMode());
9657       }
9658 
9659       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9660                             DAG.getConstant(Stride, dl,
9661                                             BasePtr.getValueType()));
9662       Stores[Idx] = Store;
9663     }
9664 
9665     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9666 
9667     if (SN->isIndexed()) {
9668       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
9669       return DAG.getMergeValues(RetOps, dl);
9670     }
9671 
9672     return TF;
9673   }
9674 
9675   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
9676   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
9677 
9678   // The values are now known to be -1 (false) or 1 (true). To convert this
9679   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9680   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9681   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
9682 
9683   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9684   // understand how to form the extending load.
9685   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9686 
9687   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9688 
9689   // Now convert to an integer and store.
9690   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
9691     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
9692     Value);
9693 
9694   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9695   int FrameIdx = MFI.CreateStackObject(16, 16, false);
9696   MachinePointerInfo PtrInfo =
9697       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
9698   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9699   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9700 
9701   SDValue Ops[] = {StoreChain,
9702                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
9703                    Value, FIdx};
9704   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9705 
9706   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
9707     dl, VTs, Ops, MVT::v4i32, PtrInfo);
9708 
9709   // Move data into the byte array.
9710   SDValue Loads[4], LoadChains[4];
9711   for (unsigned i = 0; i < 4; ++i) {
9712     unsigned Offset = 4*i;
9713     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9714     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9715 
9716     Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
9717                            PtrInfo.getWithOffset(Offset));
9718     LoadChains[i] = Loads[i].getValue(1);
9719   }
9720 
9721   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9722 
9723   SDValue Stores[4];
9724   for (unsigned i = 0; i < 4; ++i) {
9725     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9726     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9727 
9728     Stores[i] = DAG.getTruncStore(
9729         StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
9730         MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
9731         SN->getAAInfo());
9732   }
9733 
9734   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9735 
9736   return StoreChain;
9737 }
9738 
9739 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
9740   SDLoc dl(Op);
9741   if (Op.getValueType() == MVT::v4i32) {
9742     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9743 
9744     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
9745     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
9746 
9747     SDValue RHSSwap =   // = vrlw RHS, 16
9748       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
9749 
9750     // Shrinkify inputs to v8i16.
9751     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
9752     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
9753     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
9754 
9755     // Low parts multiplied together, generating 32-bit results (we ignore the
9756     // top parts).
9757     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
9758                                         LHS, RHS, DAG, dl, MVT::v4i32);
9759 
9760     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
9761                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
9762     // Shift the high parts up 16 bits.
9763     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
9764                               Neg16, DAG, dl);
9765     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
9766   } else if (Op.getValueType() == MVT::v8i16) {
9767     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9768 
9769     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
9770 
9771     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
9772                             LHS, RHS, Zero, DAG, dl);
9773   } else if (Op.getValueType() == MVT::v16i8) {
9774     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9775     bool isLittleEndian = Subtarget.isLittleEndian();
9776 
9777     // Multiply the even 8-bit parts, producing 16-bit sums.
9778     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
9779                                            LHS, RHS, DAG, dl, MVT::v8i16);
9780     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
9781 
9782     // Multiply the odd 8-bit parts, producing 16-bit sums.
9783     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
9784                                           LHS, RHS, DAG, dl, MVT::v8i16);
9785     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
9786 
9787     // Merge the results together.  Because vmuleub and vmuloub are
9788     // instructions with a big-endian bias, we must reverse the
9789     // element numbering and reverse the meaning of "odd" and "even"
9790     // when generating little endian code.
9791     int Ops[16];
9792     for (unsigned i = 0; i != 8; ++i) {
9793       if (isLittleEndian) {
9794         Ops[i*2  ] = 2*i;
9795         Ops[i*2+1] = 2*i+16;
9796       } else {
9797         Ops[i*2  ] = 2*i+1;
9798         Ops[i*2+1] = 2*i+1+16;
9799       }
9800     }
9801     if (isLittleEndian)
9802       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
9803     else
9804       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
9805   } else {
9806     llvm_unreachable("Unknown mul to lower!");
9807   }
9808 }
9809 
9810 SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
9811 
9812   assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
9813 
9814   EVT VT = Op.getValueType();
9815   assert(VT.isVector() &&
9816          "Only set vector abs as custom, scalar abs shouldn't reach here!");
9817   assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
9818           VT == MVT::v16i8) &&
9819          "Unexpected vector element type!");
9820   assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
9821          "Current subtarget doesn't support smax v2i64!");
9822 
9823   // For vector abs, it can be lowered to:
9824   // abs x
9825   // ==>
9826   // y = -x
9827   // smax(x, y)
9828 
9829   SDLoc dl(Op);
9830   SDValue X = Op.getOperand(0);
9831   SDValue Zero = DAG.getConstant(0, dl, VT);
9832   SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
9833 
9834   // SMAX patch https://reviews.llvm.org/D47332
9835   // hasn't landed yet, so use intrinsic first here.
9836   // TODO: Should use SMAX directly once SMAX patch landed
9837   Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
9838   if (VT == MVT::v2i64)
9839     BifID = Intrinsic::ppc_altivec_vmaxsd;
9840   else if (VT == MVT::v8i16)
9841     BifID = Intrinsic::ppc_altivec_vmaxsh;
9842   else if (VT == MVT::v16i8)
9843     BifID = Intrinsic::ppc_altivec_vmaxsb;
9844 
9845   return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
9846 }
9847 
9848 // Custom lowering for fpext vf32 to v2f64
9849 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
9850 
9851   assert(Op.getOpcode() == ISD::FP_EXTEND &&
9852          "Should only be called for ISD::FP_EXTEND");
9853 
9854   // We only want to custom lower an extend from v2f32 to v2f64.
9855   if (Op.getValueType() != MVT::v2f64 ||
9856       Op.getOperand(0).getValueType() != MVT::v2f32)
9857     return SDValue();
9858 
9859   SDLoc dl(Op);
9860   SDValue Op0 = Op.getOperand(0);
9861 
9862   switch (Op0.getOpcode()) {
9863   default:
9864     return SDValue();
9865   case ISD::FADD:
9866   case ISD::FMUL:
9867   case ISD::FSUB: {
9868     SDValue NewLoad[2];
9869     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
9870       // Ensure both input are loads.
9871       SDValue LdOp = Op0.getOperand(i);
9872       if (LdOp.getOpcode() != ISD::LOAD)
9873         return SDValue();
9874       // Generate new load node.
9875       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
9876       SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
9877       NewLoad[i] =
9878         DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
9879                                 DAG.getVTList(MVT::v4f32, MVT::Other),
9880                                 LoadOps, LD->getMemoryVT(),
9881                                 LD->getMemOperand());
9882     }
9883     SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32,
9884                               NewLoad[0], NewLoad[1],
9885                               Op0.getNode()->getFlags());
9886     return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp);
9887   }
9888   case ISD::LOAD: {
9889     LoadSDNode *LD = cast<LoadSDNode>(Op0);
9890     SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
9891     SDValue NewLd =
9892       DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
9893                               DAG.getVTList(MVT::v4f32, MVT::Other),
9894                               LoadOps, LD->getMemoryVT(), LD->getMemOperand());
9895     return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd);
9896   }
9897   }
9898   llvm_unreachable("ERROR:Should return for all cases within swtich.");
9899 }
9900 
9901 /// LowerOperation - Provide custom lowering hooks for some operations.
9902 ///
9903 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
9904   switch (Op.getOpcode()) {
9905   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
9906   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
9907   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
9908   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
9909   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
9910   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
9911   case ISD::SETCC:              return LowerSETCC(Op, DAG);
9912   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
9913   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
9914 
9915   // Variable argument lowering.
9916   case ISD::VASTART:            return LowerVASTART(Op, DAG);
9917   case ISD::VAARG:              return LowerVAARG(Op, DAG);
9918   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
9919 
9920   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
9921   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
9922   case ISD::GET_DYNAMIC_AREA_OFFSET:
9923     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
9924 
9925   // Exception handling lowering.
9926   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
9927   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
9928   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
9929 
9930   case ISD::LOAD:               return LowerLOAD(Op, DAG);
9931   case ISD::STORE:              return LowerSTORE(Op, DAG);
9932   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
9933   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
9934   case ISD::FP_TO_UINT:
9935   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
9936   case ISD::UINT_TO_FP:
9937   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
9938   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
9939 
9940   // Lower 64-bit shifts.
9941   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
9942   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
9943   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
9944 
9945   // Vector-related lowering.
9946   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
9947   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
9948   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
9949   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
9950   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
9951   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
9952   case ISD::MUL:                return LowerMUL(Op, DAG);
9953   case ISD::ABS:                return LowerABS(Op, DAG);
9954   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
9955 
9956   // For counter-based loop handling.
9957   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
9958 
9959   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
9960 
9961   // Frame & Return address.
9962   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
9963   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
9964 
9965   case ISD::INTRINSIC_VOID:
9966     return LowerINTRINSIC_VOID(Op, DAG);
9967   case ISD::SREM:
9968   case ISD::UREM:
9969     return LowerREM(Op, DAG);
9970   case ISD::BSWAP:
9971     return LowerBSWAP(Op, DAG);
9972   case ISD::ATOMIC_CMP_SWAP:
9973     return LowerATOMIC_CMP_SWAP(Op, DAG);
9974   }
9975 }
9976 
9977 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
9978                                            SmallVectorImpl<SDValue>&Results,
9979                                            SelectionDAG &DAG) const {
9980   SDLoc dl(N);
9981   switch (N->getOpcode()) {
9982   default:
9983     llvm_unreachable("Do not know how to custom type legalize this operation!");
9984   case ISD::READCYCLECOUNTER: {
9985     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
9986     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
9987 
9988     Results.push_back(RTB);
9989     Results.push_back(RTB.getValue(1));
9990     Results.push_back(RTB.getValue(2));
9991     break;
9992   }
9993   case ISD::INTRINSIC_W_CHAIN: {
9994     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
9995         Intrinsic::loop_decrement)
9996       break;
9997 
9998     assert(N->getValueType(0) == MVT::i1 &&
9999            "Unexpected result type for CTR decrement intrinsic");
10000     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10001                                  N->getValueType(0));
10002     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10003     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10004                                  N->getOperand(1));
10005 
10006     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10007     Results.push_back(NewInt.getValue(1));
10008     break;
10009   }
10010   case ISD::VAARG: {
10011     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10012       return;
10013 
10014     EVT VT = N->getValueType(0);
10015 
10016     if (VT == MVT::i64) {
10017       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10018 
10019       Results.push_back(NewNode);
10020       Results.push_back(NewNode.getValue(1));
10021     }
10022     return;
10023   }
10024   case ISD::FP_TO_SINT:
10025   case ISD::FP_TO_UINT:
10026     // LowerFP_TO_INT() can only handle f32 and f64.
10027     if (N->getOperand(0).getValueType() == MVT::ppcf128)
10028       return;
10029     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10030     return;
10031   case ISD::TRUNCATE: {
10032     EVT TrgVT = N->getValueType(0);
10033     if (TrgVT.isVector() &&
10034         isOperationCustom(N->getOpcode(), TrgVT) &&
10035         N->getOperand(0).getValueType().getSizeInBits() <= 128)
10036       Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
10037     return;
10038   }
10039   case ISD::BITCAST:
10040     // Don't handle bitcast here.
10041     return;
10042   }
10043 }
10044 
10045 //===----------------------------------------------------------------------===//
10046 //  Other Lowering Code
10047 //===----------------------------------------------------------------------===//
10048 
10049 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
10050   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10051   Function *Func = Intrinsic::getDeclaration(M, Id);
10052   return Builder.CreateCall(Func, {});
10053 }
10054 
10055 // The mappings for emitLeading/TrailingFence is taken from
10056 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10057 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
10058                                                  Instruction *Inst,
10059                                                  AtomicOrdering Ord) const {
10060   if (Ord == AtomicOrdering::SequentiallyConsistent)
10061     return callIntrinsic(Builder, Intrinsic::ppc_sync);
10062   if (isReleaseOrStronger(Ord))
10063     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10064   return nullptr;
10065 }
10066 
10067 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
10068                                                   Instruction *Inst,
10069                                                   AtomicOrdering Ord) const {
10070   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
10071     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10072     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10073     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10074     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
10075       return Builder.CreateCall(
10076           Intrinsic::getDeclaration(
10077               Builder.GetInsertBlock()->getParent()->getParent(),
10078               Intrinsic::ppc_cfence, {Inst->getType()}),
10079           {Inst});
10080     // FIXME: Can use isync for rmw operation.
10081     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10082   }
10083   return nullptr;
10084 }
10085 
10086 MachineBasicBlock *
10087 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
10088                                     unsigned AtomicSize,
10089                                     unsigned BinOpcode,
10090                                     unsigned CmpOpcode,
10091                                     unsigned CmpPred) const {
10092   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10093   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10094 
10095   auto LoadMnemonic = PPC::LDARX;
10096   auto StoreMnemonic = PPC::STDCX;
10097   switch (AtomicSize) {
10098   default:
10099     llvm_unreachable("Unexpected size of atomic entity");
10100   case 1:
10101     LoadMnemonic = PPC::LBARX;
10102     StoreMnemonic = PPC::STBCX;
10103     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10104     break;
10105   case 2:
10106     LoadMnemonic = PPC::LHARX;
10107     StoreMnemonic = PPC::STHCX;
10108     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10109     break;
10110   case 4:
10111     LoadMnemonic = PPC::LWARX;
10112     StoreMnemonic = PPC::STWCX;
10113     break;
10114   case 8:
10115     LoadMnemonic = PPC::LDARX;
10116     StoreMnemonic = PPC::STDCX;
10117     break;
10118   }
10119 
10120   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10121   MachineFunction *F = BB->getParent();
10122   MachineFunction::iterator It = ++BB->getIterator();
10123 
10124   Register dest = MI.getOperand(0).getReg();
10125   Register ptrA = MI.getOperand(1).getReg();
10126   Register ptrB = MI.getOperand(2).getReg();
10127   Register incr = MI.getOperand(3).getReg();
10128   DebugLoc dl = MI.getDebugLoc();
10129 
10130   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10131   MachineBasicBlock *loop2MBB =
10132     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10133   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10134   F->insert(It, loopMBB);
10135   if (CmpOpcode)
10136     F->insert(It, loop2MBB);
10137   F->insert(It, exitMBB);
10138   exitMBB->splice(exitMBB->begin(), BB,
10139                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
10140   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10141 
10142   MachineRegisterInfo &RegInfo = F->getRegInfo();
10143   Register TmpReg = (!BinOpcode) ? incr :
10144     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
10145                                            : &PPC::GPRCRegClass);
10146 
10147   //  thisMBB:
10148   //   ...
10149   //   fallthrough --> loopMBB
10150   BB->addSuccessor(loopMBB);
10151 
10152   //  loopMBB:
10153   //   l[wd]arx dest, ptr
10154   //   add r0, dest, incr
10155   //   st[wd]cx. r0, ptr
10156   //   bne- loopMBB
10157   //   fallthrough --> exitMBB
10158 
10159   // For max/min...
10160   //  loopMBB:
10161   //   l[wd]arx dest, ptr
10162   //   cmpl?[wd] incr, dest
10163   //   bgt exitMBB
10164   //  loop2MBB:
10165   //   st[wd]cx. dest, ptr
10166   //   bne- loopMBB
10167   //   fallthrough --> exitMBB
10168 
10169   BB = loopMBB;
10170   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10171     .addReg(ptrA).addReg(ptrB);
10172   if (BinOpcode)
10173     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
10174   if (CmpOpcode) {
10175     // Signed comparisons of byte or halfword values must be sign-extended.
10176     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
10177       unsigned ExtReg =  RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10178       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
10179               ExtReg).addReg(dest);
10180       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10181         .addReg(incr).addReg(ExtReg);
10182     } else
10183       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10184         .addReg(incr).addReg(dest);
10185 
10186     BuildMI(BB, dl, TII->get(PPC::BCC))
10187       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
10188     BB->addSuccessor(loop2MBB);
10189     BB->addSuccessor(exitMBB);
10190     BB = loop2MBB;
10191   }
10192   BuildMI(BB, dl, TII->get(StoreMnemonic))
10193     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
10194   BuildMI(BB, dl, TII->get(PPC::BCC))
10195     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
10196   BB->addSuccessor(loopMBB);
10197   BB->addSuccessor(exitMBB);
10198 
10199   //  exitMBB:
10200   //   ...
10201   BB = exitMBB;
10202   return BB;
10203 }
10204 
10205 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
10206     MachineInstr &MI, MachineBasicBlock *BB,
10207     bool is8bit, // operation
10208     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
10209   // If we support part-word atomic mnemonics, just use them
10210   if (Subtarget.hasPartwordAtomics())
10211     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
10212                             CmpPred);
10213 
10214   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10215   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10216   // In 64 bit mode we have to use 64 bits for addresses, even though the
10217   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
10218   // registers without caring whether they're 32 or 64, but here we're
10219   // doing actual arithmetic on the addresses.
10220   bool is64bit = Subtarget.isPPC64();
10221   bool isLittleEndian = Subtarget.isLittleEndian();
10222   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10223 
10224   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10225   MachineFunction *F = BB->getParent();
10226   MachineFunction::iterator It = ++BB->getIterator();
10227 
10228   unsigned dest = MI.getOperand(0).getReg();
10229   unsigned ptrA = MI.getOperand(1).getReg();
10230   unsigned ptrB = MI.getOperand(2).getReg();
10231   unsigned incr = MI.getOperand(3).getReg();
10232   DebugLoc dl = MI.getDebugLoc();
10233 
10234   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10235   MachineBasicBlock *loop2MBB =
10236       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10237   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10238   F->insert(It, loopMBB);
10239   if (CmpOpcode)
10240     F->insert(It, loop2MBB);
10241   F->insert(It, exitMBB);
10242   exitMBB->splice(exitMBB->begin(), BB,
10243                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
10244   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10245 
10246   MachineRegisterInfo &RegInfo = F->getRegInfo();
10247   const TargetRegisterClass *RC =
10248       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10249   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10250 
10251   Register PtrReg = RegInfo.createVirtualRegister(RC);
10252   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10253   Register ShiftReg =
10254       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
10255   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
10256   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
10257   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10258   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10259   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10260   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
10261   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10262   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10263   Register Ptr1Reg;
10264   Register TmpReg =
10265       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
10266 
10267   //  thisMBB:
10268   //   ...
10269   //   fallthrough --> loopMBB
10270   BB->addSuccessor(loopMBB);
10271 
10272   // The 4-byte load must be aligned, while a char or short may be
10273   // anywhere in the word.  Hence all this nasty bookkeeping code.
10274   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
10275   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10276   //   xori shift, shift1, 24 [16]
10277   //   rlwinm ptr, ptr1, 0, 0, 29
10278   //   slw incr2, incr, shift
10279   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10280   //   slw mask, mask2, shift
10281   //  loopMBB:
10282   //   lwarx tmpDest, ptr
10283   //   add tmp, tmpDest, incr2
10284   //   andc tmp2, tmpDest, mask
10285   //   and tmp3, tmp, mask
10286   //   or tmp4, tmp3, tmp2
10287   //   stwcx. tmp4, ptr
10288   //   bne- loopMBB
10289   //   fallthrough --> exitMBB
10290   //   srw dest, tmpDest, shift
10291   if (ptrA != ZeroReg) {
10292     Ptr1Reg = RegInfo.createVirtualRegister(RC);
10293     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10294         .addReg(ptrA)
10295         .addReg(ptrB);
10296   } else {
10297     Ptr1Reg = ptrB;
10298   }
10299   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10300   // mode.
10301   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10302       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10303       .addImm(3)
10304       .addImm(27)
10305       .addImm(is8bit ? 28 : 27);
10306   if (!isLittleEndian)
10307     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10308         .addReg(Shift1Reg)
10309         .addImm(is8bit ? 24 : 16);
10310   if (is64bit)
10311     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10312         .addReg(Ptr1Reg)
10313         .addImm(0)
10314         .addImm(61);
10315   else
10316     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10317         .addReg(Ptr1Reg)
10318         .addImm(0)
10319         .addImm(0)
10320         .addImm(29);
10321   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10322   if (is8bit)
10323     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10324   else {
10325     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10326     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10327         .addReg(Mask3Reg)
10328         .addImm(65535);
10329   }
10330   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10331       .addReg(Mask2Reg)
10332       .addReg(ShiftReg);
10333 
10334   BB = loopMBB;
10335   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10336       .addReg(ZeroReg)
10337       .addReg(PtrReg);
10338   if (BinOpcode)
10339     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10340         .addReg(Incr2Reg)
10341         .addReg(TmpDestReg);
10342   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10343       .addReg(TmpDestReg)
10344       .addReg(MaskReg);
10345   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10346   if (CmpOpcode) {
10347     // For unsigned comparisons, we can directly compare the shifted values.
10348     // For signed comparisons we shift and sign extend.
10349     unsigned SReg = RegInfo.createVirtualRegister(GPRC);
10350     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10351         .addReg(TmpDestReg)
10352         .addReg(MaskReg);
10353     unsigned ValueReg = SReg;
10354     unsigned CmpReg = Incr2Reg;
10355     if (CmpOpcode == PPC::CMPW) {
10356       ValueReg = RegInfo.createVirtualRegister(GPRC);
10357       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10358           .addReg(SReg)
10359           .addReg(ShiftReg);
10360       unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC);
10361       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
10362           .addReg(ValueReg);
10363       ValueReg = ValueSReg;
10364       CmpReg = incr;
10365     }
10366     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10367         .addReg(CmpReg)
10368         .addReg(ValueReg);
10369     BuildMI(BB, dl, TII->get(PPC::BCC))
10370         .addImm(CmpPred)
10371         .addReg(PPC::CR0)
10372         .addMBB(exitMBB);
10373     BB->addSuccessor(loop2MBB);
10374     BB->addSuccessor(exitMBB);
10375     BB = loop2MBB;
10376   }
10377   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10378   BuildMI(BB, dl, TII->get(PPC::STWCX))
10379       .addReg(Tmp4Reg)
10380       .addReg(ZeroReg)
10381       .addReg(PtrReg);
10382   BuildMI(BB, dl, TII->get(PPC::BCC))
10383       .addImm(PPC::PRED_NE)
10384       .addReg(PPC::CR0)
10385       .addMBB(loopMBB);
10386   BB->addSuccessor(loopMBB);
10387   BB->addSuccessor(exitMBB);
10388 
10389   //  exitMBB:
10390   //   ...
10391   BB = exitMBB;
10392   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10393       .addReg(TmpDestReg)
10394       .addReg(ShiftReg);
10395   return BB;
10396 }
10397 
10398 llvm::MachineBasicBlock *
10399 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
10400                                     MachineBasicBlock *MBB) const {
10401   DebugLoc DL = MI.getDebugLoc();
10402   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10403   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
10404 
10405   MachineFunction *MF = MBB->getParent();
10406   MachineRegisterInfo &MRI = MF->getRegInfo();
10407 
10408   const BasicBlock *BB = MBB->getBasicBlock();
10409   MachineFunction::iterator I = ++MBB->getIterator();
10410 
10411   unsigned DstReg = MI.getOperand(0).getReg();
10412   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
10413   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
10414   unsigned mainDstReg = MRI.createVirtualRegister(RC);
10415   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
10416 
10417   MVT PVT = getPointerTy(MF->getDataLayout());
10418   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10419          "Invalid Pointer Size!");
10420   // For v = setjmp(buf), we generate
10421   //
10422   // thisMBB:
10423   //  SjLjSetup mainMBB
10424   //  bl mainMBB
10425   //  v_restore = 1
10426   //  b sinkMBB
10427   //
10428   // mainMBB:
10429   //  buf[LabelOffset] = LR
10430   //  v_main = 0
10431   //
10432   // sinkMBB:
10433   //  v = phi(main, restore)
10434   //
10435 
10436   MachineBasicBlock *thisMBB = MBB;
10437   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
10438   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
10439   MF->insert(I, mainMBB);
10440   MF->insert(I, sinkMBB);
10441 
10442   MachineInstrBuilder MIB;
10443 
10444   // Transfer the remainder of BB and its successor edges to sinkMBB.
10445   sinkMBB->splice(sinkMBB->begin(), MBB,
10446                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10447   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
10448 
10449   // Note that the structure of the jmp_buf used here is not compatible
10450   // with that used by libc, and is not designed to be. Specifically, it
10451   // stores only those 'reserved' registers that LLVM does not otherwise
10452   // understand how to spill. Also, by convention, by the time this
10453   // intrinsic is called, Clang has already stored the frame address in the
10454   // first slot of the buffer and stack address in the third. Following the
10455   // X86 target code, we'll store the jump address in the second slot. We also
10456   // need to save the TOC pointer (R2) to handle jumps between shared
10457   // libraries, and that will be stored in the fourth slot. The thread
10458   // identifier (R13) is not affected.
10459 
10460   // thisMBB:
10461   const int64_t LabelOffset = 1 * PVT.getStoreSize();
10462   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
10463   const int64_t BPOffset    = 4 * PVT.getStoreSize();
10464 
10465   // Prepare IP either in reg.
10466   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
10467   unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
10468   unsigned BufReg = MI.getOperand(1).getReg();
10469 
10470   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
10471     setUsesTOCBasePtr(*MBB->getParent());
10472     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
10473               .addReg(PPC::X2)
10474               .addImm(TOCOffset)
10475               .addReg(BufReg)
10476               .cloneMemRefs(MI);
10477   }
10478 
10479   // Naked functions never have a base pointer, and so we use r1. For all
10480   // other functions, this decision must be delayed until during PEI.
10481   unsigned BaseReg;
10482   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
10483     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
10484   else
10485     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
10486 
10487   MIB = BuildMI(*thisMBB, MI, DL,
10488                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
10489             .addReg(BaseReg)
10490             .addImm(BPOffset)
10491             .addReg(BufReg)
10492             .cloneMemRefs(MI);
10493 
10494   // Setup
10495   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
10496   MIB.addRegMask(TRI->getNoPreservedMask());
10497 
10498   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
10499 
10500   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
10501           .addMBB(mainMBB);
10502   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
10503 
10504   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
10505   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
10506 
10507   // mainMBB:
10508   //  mainDstReg = 0
10509   MIB =
10510       BuildMI(mainMBB, DL,
10511               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
10512 
10513   // Store IP
10514   if (Subtarget.isPPC64()) {
10515     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
10516             .addReg(LabelReg)
10517             .addImm(LabelOffset)
10518             .addReg(BufReg);
10519   } else {
10520     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
10521             .addReg(LabelReg)
10522             .addImm(LabelOffset)
10523             .addReg(BufReg);
10524   }
10525   MIB.cloneMemRefs(MI);
10526 
10527   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
10528   mainMBB->addSuccessor(sinkMBB);
10529 
10530   // sinkMBB:
10531   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
10532           TII->get(PPC::PHI), DstReg)
10533     .addReg(mainDstReg).addMBB(mainMBB)
10534     .addReg(restoreDstReg).addMBB(thisMBB);
10535 
10536   MI.eraseFromParent();
10537   return sinkMBB;
10538 }
10539 
10540 MachineBasicBlock *
10541 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
10542                                      MachineBasicBlock *MBB) const {
10543   DebugLoc DL = MI.getDebugLoc();
10544   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10545 
10546   MachineFunction *MF = MBB->getParent();
10547   MachineRegisterInfo &MRI = MF->getRegInfo();
10548 
10549   MVT PVT = getPointerTy(MF->getDataLayout());
10550   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10551          "Invalid Pointer Size!");
10552 
10553   const TargetRegisterClass *RC =
10554     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10555   unsigned Tmp = MRI.createVirtualRegister(RC);
10556   // Since FP is only updated here but NOT referenced, it's treated as GPR.
10557   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
10558   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
10559   unsigned BP =
10560       (PVT == MVT::i64)
10561           ? PPC::X30
10562           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
10563                                                               : PPC::R30);
10564 
10565   MachineInstrBuilder MIB;
10566 
10567   const int64_t LabelOffset = 1 * PVT.getStoreSize();
10568   const int64_t SPOffset    = 2 * PVT.getStoreSize();
10569   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
10570   const int64_t BPOffset    = 4 * PVT.getStoreSize();
10571 
10572   unsigned BufReg = MI.getOperand(0).getReg();
10573 
10574   // Reload FP (the jumped-to function may not have had a
10575   // frame pointer, and if so, then its r31 will be restored
10576   // as necessary).
10577   if (PVT == MVT::i64) {
10578     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
10579             .addImm(0)
10580             .addReg(BufReg);
10581   } else {
10582     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
10583             .addImm(0)
10584             .addReg(BufReg);
10585   }
10586   MIB.cloneMemRefs(MI);
10587 
10588   // Reload IP
10589   if (PVT == MVT::i64) {
10590     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
10591             .addImm(LabelOffset)
10592             .addReg(BufReg);
10593   } else {
10594     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
10595             .addImm(LabelOffset)
10596             .addReg(BufReg);
10597   }
10598   MIB.cloneMemRefs(MI);
10599 
10600   // Reload SP
10601   if (PVT == MVT::i64) {
10602     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
10603             .addImm(SPOffset)
10604             .addReg(BufReg);
10605   } else {
10606     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
10607             .addImm(SPOffset)
10608             .addReg(BufReg);
10609   }
10610   MIB.cloneMemRefs(MI);
10611 
10612   // Reload BP
10613   if (PVT == MVT::i64) {
10614     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
10615             .addImm(BPOffset)
10616             .addReg(BufReg);
10617   } else {
10618     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
10619             .addImm(BPOffset)
10620             .addReg(BufReg);
10621   }
10622   MIB.cloneMemRefs(MI);
10623 
10624   // Reload TOC
10625   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
10626     setUsesTOCBasePtr(*MBB->getParent());
10627     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
10628               .addImm(TOCOffset)
10629               .addReg(BufReg)
10630               .cloneMemRefs(MI);
10631   }
10632 
10633   // Jump
10634   BuildMI(*MBB, MI, DL,
10635           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
10636   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
10637 
10638   MI.eraseFromParent();
10639   return MBB;
10640 }
10641 
10642 MachineBasicBlock *
10643 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
10644                                                MachineBasicBlock *BB) const {
10645   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
10646       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
10647     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
10648         MI.getOpcode() == TargetOpcode::PATCHPOINT) {
10649       // Call lowering should have added an r2 operand to indicate a dependence
10650       // on the TOC base pointer value. It can't however, because there is no
10651       // way to mark the dependence as implicit there, and so the stackmap code
10652       // will confuse it with a regular operand. Instead, add the dependence
10653       // here.
10654       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
10655     }
10656 
10657     return emitPatchPoint(MI, BB);
10658   }
10659 
10660   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
10661       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
10662     return emitEHSjLjSetJmp(MI, BB);
10663   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
10664              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
10665     return emitEHSjLjLongJmp(MI, BB);
10666   }
10667 
10668   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10669 
10670   // To "insert" these instructions we actually have to insert their
10671   // control-flow patterns.
10672   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10673   MachineFunction::iterator It = ++BB->getIterator();
10674 
10675   MachineFunction *F = BB->getParent();
10676 
10677   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10678       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
10679       MI.getOpcode() == PPC::SELECT_I8) {
10680     SmallVector<MachineOperand, 2> Cond;
10681     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10682         MI.getOpcode() == PPC::SELECT_CC_I8)
10683       Cond.push_back(MI.getOperand(4));
10684     else
10685       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
10686     Cond.push_back(MI.getOperand(1));
10687 
10688     DebugLoc dl = MI.getDebugLoc();
10689     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
10690                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
10691   } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10692              MI.getOpcode() == PPC::SELECT_CC_I8 ||
10693              MI.getOpcode() == PPC::SELECT_CC_F4 ||
10694              MI.getOpcode() == PPC::SELECT_CC_F8 ||
10695              MI.getOpcode() == PPC::SELECT_CC_F16 ||
10696              MI.getOpcode() == PPC::SELECT_CC_QFRC ||
10697              MI.getOpcode() == PPC::SELECT_CC_QSRC ||
10698              MI.getOpcode() == PPC::SELECT_CC_QBRC ||
10699              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
10700              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
10701              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
10702              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
10703              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
10704              MI.getOpcode() == PPC::SELECT_CC_SPE ||
10705              MI.getOpcode() == PPC::SELECT_I4 ||
10706              MI.getOpcode() == PPC::SELECT_I8 ||
10707              MI.getOpcode() == PPC::SELECT_F4 ||
10708              MI.getOpcode() == PPC::SELECT_F8 ||
10709              MI.getOpcode() == PPC::SELECT_F16 ||
10710              MI.getOpcode() == PPC::SELECT_QFRC ||
10711              MI.getOpcode() == PPC::SELECT_QSRC ||
10712              MI.getOpcode() == PPC::SELECT_QBRC ||
10713              MI.getOpcode() == PPC::SELECT_SPE ||
10714              MI.getOpcode() == PPC::SELECT_SPE4 ||
10715              MI.getOpcode() == PPC::SELECT_VRRC ||
10716              MI.getOpcode() == PPC::SELECT_VSFRC ||
10717              MI.getOpcode() == PPC::SELECT_VSSRC ||
10718              MI.getOpcode() == PPC::SELECT_VSRC) {
10719     // The incoming instruction knows the destination vreg to set, the
10720     // condition code register to branch on, the true/false values to
10721     // select between, and a branch opcode to use.
10722 
10723     //  thisMBB:
10724     //  ...
10725     //   TrueVal = ...
10726     //   cmpTY ccX, r1, r2
10727     //   bCC copy1MBB
10728     //   fallthrough --> copy0MBB
10729     MachineBasicBlock *thisMBB = BB;
10730     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
10731     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10732     DebugLoc dl = MI.getDebugLoc();
10733     F->insert(It, copy0MBB);
10734     F->insert(It, sinkMBB);
10735 
10736     // Transfer the remainder of BB and its successor edges to sinkMBB.
10737     sinkMBB->splice(sinkMBB->begin(), BB,
10738                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
10739     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
10740 
10741     // Next, add the true and fallthrough blocks as its successors.
10742     BB->addSuccessor(copy0MBB);
10743     BB->addSuccessor(sinkMBB);
10744 
10745     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
10746         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
10747         MI.getOpcode() == PPC::SELECT_F16 ||
10748         MI.getOpcode() == PPC::SELECT_SPE4 ||
10749         MI.getOpcode() == PPC::SELECT_SPE ||
10750         MI.getOpcode() == PPC::SELECT_QFRC ||
10751         MI.getOpcode() == PPC::SELECT_QSRC ||
10752         MI.getOpcode() == PPC::SELECT_QBRC ||
10753         MI.getOpcode() == PPC::SELECT_VRRC ||
10754         MI.getOpcode() == PPC::SELECT_VSFRC ||
10755         MI.getOpcode() == PPC::SELECT_VSSRC ||
10756         MI.getOpcode() == PPC::SELECT_VSRC) {
10757       BuildMI(BB, dl, TII->get(PPC::BC))
10758           .addReg(MI.getOperand(1).getReg())
10759           .addMBB(sinkMBB);
10760     } else {
10761       unsigned SelectPred = MI.getOperand(4).getImm();
10762       BuildMI(BB, dl, TII->get(PPC::BCC))
10763           .addImm(SelectPred)
10764           .addReg(MI.getOperand(1).getReg())
10765           .addMBB(sinkMBB);
10766     }
10767 
10768     //  copy0MBB:
10769     //   %FalseValue = ...
10770     //   # fallthrough to sinkMBB
10771     BB = copy0MBB;
10772 
10773     // Update machine-CFG edges
10774     BB->addSuccessor(sinkMBB);
10775 
10776     //  sinkMBB:
10777     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
10778     //  ...
10779     BB = sinkMBB;
10780     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
10781         .addReg(MI.getOperand(3).getReg())
10782         .addMBB(copy0MBB)
10783         .addReg(MI.getOperand(2).getReg())
10784         .addMBB(thisMBB);
10785   } else if (MI.getOpcode() == PPC::ReadTB) {
10786     // To read the 64-bit time-base register on a 32-bit target, we read the
10787     // two halves. Should the counter have wrapped while it was being read, we
10788     // need to try again.
10789     // ...
10790     // readLoop:
10791     // mfspr Rx,TBU # load from TBU
10792     // mfspr Ry,TB  # load from TB
10793     // mfspr Rz,TBU # load from TBU
10794     // cmpw crX,Rx,Rz # check if 'old'='new'
10795     // bne readLoop   # branch if they're not equal
10796     // ...
10797 
10798     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
10799     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10800     DebugLoc dl = MI.getDebugLoc();
10801     F->insert(It, readMBB);
10802     F->insert(It, sinkMBB);
10803 
10804     // Transfer the remainder of BB and its successor edges to sinkMBB.
10805     sinkMBB->splice(sinkMBB->begin(), BB,
10806                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
10807     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
10808 
10809     BB->addSuccessor(readMBB);
10810     BB = readMBB;
10811 
10812     MachineRegisterInfo &RegInfo = F->getRegInfo();
10813     unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10814     unsigned LoReg = MI.getOperand(0).getReg();
10815     unsigned HiReg = MI.getOperand(1).getReg();
10816 
10817     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
10818     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
10819     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
10820 
10821     unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10822 
10823     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
10824         .addReg(HiReg)
10825         .addReg(ReadAgainReg);
10826     BuildMI(BB, dl, TII->get(PPC::BCC))
10827         .addImm(PPC::PRED_NE)
10828         .addReg(CmpReg)
10829         .addMBB(readMBB);
10830 
10831     BB->addSuccessor(readMBB);
10832     BB->addSuccessor(sinkMBB);
10833   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
10834     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
10835   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
10836     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
10837   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
10838     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
10839   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
10840     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
10841 
10842   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
10843     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
10844   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
10845     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
10846   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
10847     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
10848   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
10849     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
10850 
10851   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
10852     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
10853   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
10854     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
10855   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
10856     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
10857   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
10858     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
10859 
10860   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
10861     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
10862   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
10863     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
10864   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
10865     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
10866   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
10867     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
10868 
10869   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
10870     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
10871   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
10872     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
10873   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
10874     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
10875   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
10876     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
10877 
10878   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
10879     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
10880   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
10881     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
10882   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
10883     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
10884   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
10885     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
10886 
10887   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
10888     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
10889   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
10890     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
10891   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
10892     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
10893   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
10894     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
10895 
10896   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
10897     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
10898   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
10899     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
10900   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
10901     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
10902   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
10903     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
10904 
10905   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
10906     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
10907   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
10908     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
10909   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
10910     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
10911   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
10912     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
10913 
10914   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
10915     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
10916   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
10917     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
10918   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
10919     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
10920   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
10921     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
10922 
10923   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
10924     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
10925   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
10926     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
10927   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
10928     BB = EmitAtomicBinary(MI, BB, 4, 0);
10929   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
10930     BB = EmitAtomicBinary(MI, BB, 8, 0);
10931   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
10932            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
10933            (Subtarget.hasPartwordAtomics() &&
10934             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
10935            (Subtarget.hasPartwordAtomics() &&
10936             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
10937     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
10938 
10939     auto LoadMnemonic = PPC::LDARX;
10940     auto StoreMnemonic = PPC::STDCX;
10941     switch (MI.getOpcode()) {
10942     default:
10943       llvm_unreachable("Compare and swap of unknown size");
10944     case PPC::ATOMIC_CMP_SWAP_I8:
10945       LoadMnemonic = PPC::LBARX;
10946       StoreMnemonic = PPC::STBCX;
10947       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
10948       break;
10949     case PPC::ATOMIC_CMP_SWAP_I16:
10950       LoadMnemonic = PPC::LHARX;
10951       StoreMnemonic = PPC::STHCX;
10952       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
10953       break;
10954     case PPC::ATOMIC_CMP_SWAP_I32:
10955       LoadMnemonic = PPC::LWARX;
10956       StoreMnemonic = PPC::STWCX;
10957       break;
10958     case PPC::ATOMIC_CMP_SWAP_I64:
10959       LoadMnemonic = PPC::LDARX;
10960       StoreMnemonic = PPC::STDCX;
10961       break;
10962     }
10963     unsigned dest = MI.getOperand(0).getReg();
10964     unsigned ptrA = MI.getOperand(1).getReg();
10965     unsigned ptrB = MI.getOperand(2).getReg();
10966     unsigned oldval = MI.getOperand(3).getReg();
10967     unsigned newval = MI.getOperand(4).getReg();
10968     DebugLoc dl = MI.getDebugLoc();
10969 
10970     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
10971     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
10972     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
10973     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10974     F->insert(It, loop1MBB);
10975     F->insert(It, loop2MBB);
10976     F->insert(It, midMBB);
10977     F->insert(It, exitMBB);
10978     exitMBB->splice(exitMBB->begin(), BB,
10979                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
10980     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10981 
10982     //  thisMBB:
10983     //   ...
10984     //   fallthrough --> loopMBB
10985     BB->addSuccessor(loop1MBB);
10986 
10987     // loop1MBB:
10988     //   l[bhwd]arx dest, ptr
10989     //   cmp[wd] dest, oldval
10990     //   bne- midMBB
10991     // loop2MBB:
10992     //   st[bhwd]cx. newval, ptr
10993     //   bne- loopMBB
10994     //   b exitBB
10995     // midMBB:
10996     //   st[bhwd]cx. dest, ptr
10997     // exitBB:
10998     BB = loop1MBB;
10999     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11000     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
11001         .addReg(oldval)
11002         .addReg(dest);
11003     BuildMI(BB, dl, TII->get(PPC::BCC))
11004         .addImm(PPC::PRED_NE)
11005         .addReg(PPC::CR0)
11006         .addMBB(midMBB);
11007     BB->addSuccessor(loop2MBB);
11008     BB->addSuccessor(midMBB);
11009 
11010     BB = loop2MBB;
11011     BuildMI(BB, dl, TII->get(StoreMnemonic))
11012         .addReg(newval)
11013         .addReg(ptrA)
11014         .addReg(ptrB);
11015     BuildMI(BB, dl, TII->get(PPC::BCC))
11016         .addImm(PPC::PRED_NE)
11017         .addReg(PPC::CR0)
11018         .addMBB(loop1MBB);
11019     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11020     BB->addSuccessor(loop1MBB);
11021     BB->addSuccessor(exitMBB);
11022 
11023     BB = midMBB;
11024     BuildMI(BB, dl, TII->get(StoreMnemonic))
11025         .addReg(dest)
11026         .addReg(ptrA)
11027         .addReg(ptrB);
11028     BB->addSuccessor(exitMBB);
11029 
11030     //  exitMBB:
11031     //   ...
11032     BB = exitMBB;
11033   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11034              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
11035     // We must use 64-bit registers for addresses when targeting 64-bit,
11036     // since we're actually doing arithmetic on them.  Other registers
11037     // can be 32-bit.
11038     bool is64bit = Subtarget.isPPC64();
11039     bool isLittleEndian = Subtarget.isLittleEndian();
11040     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11041 
11042     unsigned dest = MI.getOperand(0).getReg();
11043     unsigned ptrA = MI.getOperand(1).getReg();
11044     unsigned ptrB = MI.getOperand(2).getReg();
11045     unsigned oldval = MI.getOperand(3).getReg();
11046     unsigned newval = MI.getOperand(4).getReg();
11047     DebugLoc dl = MI.getDebugLoc();
11048 
11049     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11050     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11051     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11052     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11053     F->insert(It, loop1MBB);
11054     F->insert(It, loop2MBB);
11055     F->insert(It, midMBB);
11056     F->insert(It, exitMBB);
11057     exitMBB->splice(exitMBB->begin(), BB,
11058                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11059     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11060 
11061     MachineRegisterInfo &RegInfo = F->getRegInfo();
11062     const TargetRegisterClass *RC =
11063         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11064     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11065 
11066     Register PtrReg = RegInfo.createVirtualRegister(RC);
11067     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11068     Register ShiftReg =
11069         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11070     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
11071     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
11072     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
11073     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
11074     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11075     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11076     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11077     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11078     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11079     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11080     Register Ptr1Reg;
11081     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
11082     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11083     //  thisMBB:
11084     //   ...
11085     //   fallthrough --> loopMBB
11086     BB->addSuccessor(loop1MBB);
11087 
11088     // The 4-byte load must be aligned, while a char or short may be
11089     // anywhere in the word.  Hence all this nasty bookkeeping code.
11090     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
11091     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11092     //   xori shift, shift1, 24 [16]
11093     //   rlwinm ptr, ptr1, 0, 0, 29
11094     //   slw newval2, newval, shift
11095     //   slw oldval2, oldval,shift
11096     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11097     //   slw mask, mask2, shift
11098     //   and newval3, newval2, mask
11099     //   and oldval3, oldval2, mask
11100     // loop1MBB:
11101     //   lwarx tmpDest, ptr
11102     //   and tmp, tmpDest, mask
11103     //   cmpw tmp, oldval3
11104     //   bne- midMBB
11105     // loop2MBB:
11106     //   andc tmp2, tmpDest, mask
11107     //   or tmp4, tmp2, newval3
11108     //   stwcx. tmp4, ptr
11109     //   bne- loop1MBB
11110     //   b exitBB
11111     // midMBB:
11112     //   stwcx. tmpDest, ptr
11113     // exitBB:
11114     //   srw dest, tmpDest, shift
11115     if (ptrA != ZeroReg) {
11116       Ptr1Reg = RegInfo.createVirtualRegister(RC);
11117       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11118           .addReg(ptrA)
11119           .addReg(ptrB);
11120     } else {
11121       Ptr1Reg = ptrB;
11122     }
11123 
11124     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11125     // mode.
11126     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11127         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11128         .addImm(3)
11129         .addImm(27)
11130         .addImm(is8bit ? 28 : 27);
11131     if (!isLittleEndian)
11132       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11133           .addReg(Shift1Reg)
11134           .addImm(is8bit ? 24 : 16);
11135     if (is64bit)
11136       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11137           .addReg(Ptr1Reg)
11138           .addImm(0)
11139           .addImm(61);
11140     else
11141       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11142           .addReg(Ptr1Reg)
11143           .addImm(0)
11144           .addImm(0)
11145           .addImm(29);
11146     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
11147         .addReg(newval)
11148         .addReg(ShiftReg);
11149     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
11150         .addReg(oldval)
11151         .addReg(ShiftReg);
11152     if (is8bit)
11153       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11154     else {
11155       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11156       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11157           .addReg(Mask3Reg)
11158           .addImm(65535);
11159     }
11160     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11161         .addReg(Mask2Reg)
11162         .addReg(ShiftReg);
11163     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
11164         .addReg(NewVal2Reg)
11165         .addReg(MaskReg);
11166     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
11167         .addReg(OldVal2Reg)
11168         .addReg(MaskReg);
11169 
11170     BB = loop1MBB;
11171     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11172         .addReg(ZeroReg)
11173         .addReg(PtrReg);
11174     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
11175         .addReg(TmpDestReg)
11176         .addReg(MaskReg);
11177     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
11178         .addReg(TmpReg)
11179         .addReg(OldVal3Reg);
11180     BuildMI(BB, dl, TII->get(PPC::BCC))
11181         .addImm(PPC::PRED_NE)
11182         .addReg(PPC::CR0)
11183         .addMBB(midMBB);
11184     BB->addSuccessor(loop2MBB);
11185     BB->addSuccessor(midMBB);
11186 
11187     BB = loop2MBB;
11188     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11189         .addReg(TmpDestReg)
11190         .addReg(MaskReg);
11191     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
11192         .addReg(Tmp2Reg)
11193         .addReg(NewVal3Reg);
11194     BuildMI(BB, dl, TII->get(PPC::STWCX))
11195         .addReg(Tmp4Reg)
11196         .addReg(ZeroReg)
11197         .addReg(PtrReg);
11198     BuildMI(BB, dl, TII->get(PPC::BCC))
11199         .addImm(PPC::PRED_NE)
11200         .addReg(PPC::CR0)
11201         .addMBB(loop1MBB);
11202     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11203     BB->addSuccessor(loop1MBB);
11204     BB->addSuccessor(exitMBB);
11205 
11206     BB = midMBB;
11207     BuildMI(BB, dl, TII->get(PPC::STWCX))
11208         .addReg(TmpDestReg)
11209         .addReg(ZeroReg)
11210         .addReg(PtrReg);
11211     BB->addSuccessor(exitMBB);
11212 
11213     //  exitMBB:
11214     //   ...
11215     BB = exitMBB;
11216     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11217         .addReg(TmpReg)
11218         .addReg(ShiftReg);
11219   } else if (MI.getOpcode() == PPC::FADDrtz) {
11220     // This pseudo performs an FADD with rounding mode temporarily forced
11221     // to round-to-zero.  We emit this via custom inserter since the FPSCR
11222     // is not modeled at the SelectionDAG level.
11223     unsigned Dest = MI.getOperand(0).getReg();
11224     unsigned Src1 = MI.getOperand(1).getReg();
11225     unsigned Src2 = MI.getOperand(2).getReg();
11226     DebugLoc dl = MI.getDebugLoc();
11227 
11228     MachineRegisterInfo &RegInfo = F->getRegInfo();
11229     unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11230 
11231     // Save FPSCR value.
11232     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
11233 
11234     // Set rounding mode to round-to-zero.
11235     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
11236     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
11237 
11238     // Perform addition.
11239     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
11240 
11241     // Restore FPSCR value.
11242     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
11243   } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
11244              MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
11245              MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
11246              MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
11247     unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
11248                        MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
11249                           ? PPC::ANDIo8
11250                           : PPC::ANDIo;
11251     bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
11252                  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
11253 
11254     MachineRegisterInfo &RegInfo = F->getRegInfo();
11255     unsigned Dest = RegInfo.createVirtualRegister(
11256         Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
11257 
11258     DebugLoc dl = MI.getDebugLoc();
11259     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
11260         .addReg(MI.getOperand(1).getReg())
11261         .addImm(1);
11262     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
11263             MI.getOperand(0).getReg())
11264         .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
11265   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
11266     DebugLoc Dl = MI.getDebugLoc();
11267     MachineRegisterInfo &RegInfo = F->getRegInfo();
11268     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11269     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
11270     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11271             MI.getOperand(0).getReg())
11272         .addReg(CRReg);
11273   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
11274     DebugLoc Dl = MI.getDebugLoc();
11275     unsigned Imm = MI.getOperand(1).getImm();
11276     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
11277     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11278             MI.getOperand(0).getReg())
11279         .addReg(PPC::CR0EQ);
11280   } else if (MI.getOpcode() == PPC::SETRNDi) {
11281     DebugLoc dl = MI.getDebugLoc();
11282     unsigned OldFPSCRReg = MI.getOperand(0).getReg();
11283 
11284     // Save FPSCR value.
11285     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11286 
11287     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
11288     // the following settings:
11289     //   00 Round to nearest
11290     //   01 Round to 0
11291     //   10 Round to +inf
11292     //   11 Round to -inf
11293 
11294     // When the operand is immediate, using the two least significant bits of
11295     // the immediate to set the bits 62:63 of FPSCR.
11296     unsigned Mode = MI.getOperand(1).getImm();
11297     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
11298       .addImm(31);
11299 
11300     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
11301       .addImm(30);
11302   } else if (MI.getOpcode() == PPC::SETRND) {
11303     DebugLoc dl = MI.getDebugLoc();
11304 
11305     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
11306     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
11307     // If the target doesn't have DirectMove, we should use stack to do the
11308     // conversion, because the target doesn't have the instructions like mtvsrd
11309     // or mfvsrd to do this conversion directly.
11310     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
11311       if (Subtarget.hasDirectMove()) {
11312         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
11313           .addReg(SrcReg);
11314       } else {
11315         // Use stack to do the register copy.
11316         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
11317         MachineRegisterInfo &RegInfo = F->getRegInfo();
11318         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
11319         if (RC == &PPC::F8RCRegClass) {
11320           // Copy register from F8RCRegClass to G8RCRegclass.
11321           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
11322                  "Unsupported RegClass.");
11323 
11324           StoreOp = PPC::STFD;
11325           LoadOp = PPC::LD;
11326         } else {
11327           // Copy register from G8RCRegClass to F8RCRegclass.
11328           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
11329                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
11330                  "Unsupported RegClass.");
11331         }
11332 
11333         MachineFrameInfo &MFI = F->getFrameInfo();
11334         int FrameIdx = MFI.CreateStackObject(8, 8, false);
11335 
11336         MachineMemOperand *MMOStore = F->getMachineMemOperand(
11337           MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
11338           MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
11339           MFI.getObjectAlignment(FrameIdx));
11340 
11341         // Store the SrcReg into the stack.
11342         BuildMI(*BB, MI, dl, TII->get(StoreOp))
11343           .addReg(SrcReg)
11344           .addImm(0)
11345           .addFrameIndex(FrameIdx)
11346           .addMemOperand(MMOStore);
11347 
11348         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
11349           MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
11350           MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
11351           MFI.getObjectAlignment(FrameIdx));
11352 
11353         // Load from the stack where SrcReg is stored, and save to DestReg,
11354         // so we have done the RegClass conversion from RegClass::SrcReg to
11355         // RegClass::DestReg.
11356         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
11357           .addImm(0)
11358           .addFrameIndex(FrameIdx)
11359           .addMemOperand(MMOLoad);
11360       }
11361     };
11362 
11363     unsigned OldFPSCRReg = MI.getOperand(0).getReg();
11364 
11365     // Save FPSCR value.
11366     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11367 
11368     // When the operand is gprc register, use two least significant bits of the
11369     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
11370     //
11371     // copy OldFPSCRTmpReg, OldFPSCRReg
11372     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
11373     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
11374     // copy NewFPSCRReg, NewFPSCRTmpReg
11375     // mtfsf 255, NewFPSCRReg
11376     MachineOperand SrcOp = MI.getOperand(1);
11377     MachineRegisterInfo &RegInfo = F->getRegInfo();
11378     unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11379 
11380     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
11381 
11382     unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11383     unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11384 
11385     // The first operand of INSERT_SUBREG should be a register which has
11386     // subregisters, we only care about its RegClass, so we should use an
11387     // IMPLICIT_DEF register.
11388     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
11389     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
11390       .addReg(ImDefReg)
11391       .add(SrcOp)
11392       .addImm(1);
11393 
11394     unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11395     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
11396       .addReg(OldFPSCRTmpReg)
11397       .addReg(ExtSrcReg)
11398       .addImm(0)
11399       .addImm(62);
11400 
11401     unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11402     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
11403 
11404     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
11405     // bits of FPSCR.
11406     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
11407       .addImm(255)
11408       .addReg(NewFPSCRReg)
11409       .addImm(0)
11410       .addImm(0);
11411   } else {
11412     llvm_unreachable("Unexpected instr type to insert");
11413   }
11414 
11415   MI.eraseFromParent(); // The pseudo instruction is gone now.
11416   return BB;
11417 }
11418 
11419 //===----------------------------------------------------------------------===//
11420 // Target Optimization Hooks
11421 //===----------------------------------------------------------------------===//
11422 
11423 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
11424   // For the estimates, convergence is quadratic, so we essentially double the
11425   // number of digits correct after every iteration. For both FRE and FRSQRTE,
11426   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
11427   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
11428   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
11429   if (VT.getScalarType() == MVT::f64)
11430     RefinementSteps++;
11431   return RefinementSteps;
11432 }
11433 
11434 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
11435                                            int Enabled, int &RefinementSteps,
11436                                            bool &UseOneConstNR,
11437                                            bool Reciprocal) const {
11438   EVT VT = Operand.getValueType();
11439   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
11440       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
11441       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
11442       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
11443       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
11444       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
11445     if (RefinementSteps == ReciprocalEstimate::Unspecified)
11446       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11447 
11448     // The Newton-Raphson computation with a single constant does not provide
11449     // enough accuracy on some CPUs.
11450     UseOneConstNR = !Subtarget.needsTwoConstNR();
11451     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
11452   }
11453   return SDValue();
11454 }
11455 
11456 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
11457                                             int Enabled,
11458                                             int &RefinementSteps) const {
11459   EVT VT = Operand.getValueType();
11460   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
11461       (VT == MVT::f64 && Subtarget.hasFRE()) ||
11462       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
11463       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
11464       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
11465       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
11466     if (RefinementSteps == ReciprocalEstimate::Unspecified)
11467       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11468     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
11469   }
11470   return SDValue();
11471 }
11472 
11473 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
11474   // Note: This functionality is used only when unsafe-fp-math is enabled, and
11475   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
11476   // enabled for division), this functionality is redundant with the default
11477   // combiner logic (once the division -> reciprocal/multiply transformation
11478   // has taken place). As a result, this matters more for older cores than for
11479   // newer ones.
11480 
11481   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11482   // reciprocal if there are two or more FDIVs (for embedded cores with only
11483   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
11484   switch (Subtarget.getDarwinDirective()) {
11485   default:
11486     return 3;
11487   case PPC::DIR_440:
11488   case PPC::DIR_A2:
11489   case PPC::DIR_E500:
11490   case PPC::DIR_E500mc:
11491   case PPC::DIR_E5500:
11492     return 2;
11493   }
11494 }
11495 
11496 // isConsecutiveLSLoc needs to work even if all adds have not yet been
11497 // collapsed, and so we need to look through chains of them.
11498 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
11499                                      int64_t& Offset, SelectionDAG &DAG) {
11500   if (DAG.isBaseWithConstantOffset(Loc)) {
11501     Base = Loc.getOperand(0);
11502     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
11503 
11504     // The base might itself be a base plus an offset, and if so, accumulate
11505     // that as well.
11506     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
11507   }
11508 }
11509 
11510 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
11511                             unsigned Bytes, int Dist,
11512                             SelectionDAG &DAG) {
11513   if (VT.getSizeInBits() / 8 != Bytes)
11514     return false;
11515 
11516   SDValue BaseLoc = Base->getBasePtr();
11517   if (Loc.getOpcode() == ISD::FrameIndex) {
11518     if (BaseLoc.getOpcode() != ISD::FrameIndex)
11519       return false;
11520     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11521     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
11522     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
11523     int FS  = MFI.getObjectSize(FI);
11524     int BFS = MFI.getObjectSize(BFI);
11525     if (FS != BFS || FS != (int)Bytes) return false;
11526     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
11527   }
11528 
11529   SDValue Base1 = Loc, Base2 = BaseLoc;
11530   int64_t Offset1 = 0, Offset2 = 0;
11531   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
11532   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
11533   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
11534     return true;
11535 
11536   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11537   const GlobalValue *GV1 = nullptr;
11538   const GlobalValue *GV2 = nullptr;
11539   Offset1 = 0;
11540   Offset2 = 0;
11541   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
11542   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
11543   if (isGA1 && isGA2 && GV1 == GV2)
11544     return Offset1 == (Offset2 + Dist*Bytes);
11545   return false;
11546 }
11547 
11548 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
11549 // not enforce equality of the chain operands.
11550 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
11551                             unsigned Bytes, int Dist,
11552                             SelectionDAG &DAG) {
11553   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
11554     EVT VT = LS->getMemoryVT();
11555     SDValue Loc = LS->getBasePtr();
11556     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
11557   }
11558 
11559   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
11560     EVT VT;
11561     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11562     default: return false;
11563     case Intrinsic::ppc_qpx_qvlfd:
11564     case Intrinsic::ppc_qpx_qvlfda:
11565       VT = MVT::v4f64;
11566       break;
11567     case Intrinsic::ppc_qpx_qvlfs:
11568     case Intrinsic::ppc_qpx_qvlfsa:
11569       VT = MVT::v4f32;
11570       break;
11571     case Intrinsic::ppc_qpx_qvlfcd:
11572     case Intrinsic::ppc_qpx_qvlfcda:
11573       VT = MVT::v2f64;
11574       break;
11575     case Intrinsic::ppc_qpx_qvlfcs:
11576     case Intrinsic::ppc_qpx_qvlfcsa:
11577       VT = MVT::v2f32;
11578       break;
11579     case Intrinsic::ppc_qpx_qvlfiwa:
11580     case Intrinsic::ppc_qpx_qvlfiwz:
11581     case Intrinsic::ppc_altivec_lvx:
11582     case Intrinsic::ppc_altivec_lvxl:
11583     case Intrinsic::ppc_vsx_lxvw4x:
11584     case Intrinsic::ppc_vsx_lxvw4x_be:
11585       VT = MVT::v4i32;
11586       break;
11587     case Intrinsic::ppc_vsx_lxvd2x:
11588     case Intrinsic::ppc_vsx_lxvd2x_be:
11589       VT = MVT::v2f64;
11590       break;
11591     case Intrinsic::ppc_altivec_lvebx:
11592       VT = MVT::i8;
11593       break;
11594     case Intrinsic::ppc_altivec_lvehx:
11595       VT = MVT::i16;
11596       break;
11597     case Intrinsic::ppc_altivec_lvewx:
11598       VT = MVT::i32;
11599       break;
11600     }
11601 
11602     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
11603   }
11604 
11605   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
11606     EVT VT;
11607     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11608     default: return false;
11609     case Intrinsic::ppc_qpx_qvstfd:
11610     case Intrinsic::ppc_qpx_qvstfda:
11611       VT = MVT::v4f64;
11612       break;
11613     case Intrinsic::ppc_qpx_qvstfs:
11614     case Intrinsic::ppc_qpx_qvstfsa:
11615       VT = MVT::v4f32;
11616       break;
11617     case Intrinsic::ppc_qpx_qvstfcd:
11618     case Intrinsic::ppc_qpx_qvstfcda:
11619       VT = MVT::v2f64;
11620       break;
11621     case Intrinsic::ppc_qpx_qvstfcs:
11622     case Intrinsic::ppc_qpx_qvstfcsa:
11623       VT = MVT::v2f32;
11624       break;
11625     case Intrinsic::ppc_qpx_qvstfiw:
11626     case Intrinsic::ppc_qpx_qvstfiwa:
11627     case Intrinsic::ppc_altivec_stvx:
11628     case Intrinsic::ppc_altivec_stvxl:
11629     case Intrinsic::ppc_vsx_stxvw4x:
11630       VT = MVT::v4i32;
11631       break;
11632     case Intrinsic::ppc_vsx_stxvd2x:
11633       VT = MVT::v2f64;
11634       break;
11635     case Intrinsic::ppc_vsx_stxvw4x_be:
11636       VT = MVT::v4i32;
11637       break;
11638     case Intrinsic::ppc_vsx_stxvd2x_be:
11639       VT = MVT::v2f64;
11640       break;
11641     case Intrinsic::ppc_altivec_stvebx:
11642       VT = MVT::i8;
11643       break;
11644     case Intrinsic::ppc_altivec_stvehx:
11645       VT = MVT::i16;
11646       break;
11647     case Intrinsic::ppc_altivec_stvewx:
11648       VT = MVT::i32;
11649       break;
11650     }
11651 
11652     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
11653   }
11654 
11655   return false;
11656 }
11657 
11658 // Return true is there is a nearyby consecutive load to the one provided
11659 // (regardless of alignment). We search up and down the chain, looking though
11660 // token factors and other loads (but nothing else). As a result, a true result
11661 // indicates that it is safe to create a new consecutive load adjacent to the
11662 // load provided.
11663 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
11664   SDValue Chain = LD->getChain();
11665   EVT VT = LD->getMemoryVT();
11666 
11667   SmallSet<SDNode *, 16> LoadRoots;
11668   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
11669   SmallSet<SDNode *, 16> Visited;
11670 
11671   // First, search up the chain, branching to follow all token-factor operands.
11672   // If we find a consecutive load, then we're done, otherwise, record all
11673   // nodes just above the top-level loads and token factors.
11674   while (!Queue.empty()) {
11675     SDNode *ChainNext = Queue.pop_back_val();
11676     if (!Visited.insert(ChainNext).second)
11677       continue;
11678 
11679     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
11680       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11681         return true;
11682 
11683       if (!Visited.count(ChainLD->getChain().getNode()))
11684         Queue.push_back(ChainLD->getChain().getNode());
11685     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
11686       for (const SDUse &O : ChainNext->ops())
11687         if (!Visited.count(O.getNode()))
11688           Queue.push_back(O.getNode());
11689     } else
11690       LoadRoots.insert(ChainNext);
11691   }
11692 
11693   // Second, search down the chain, starting from the top-level nodes recorded
11694   // in the first phase. These top-level nodes are the nodes just above all
11695   // loads and token factors. Starting with their uses, recursively look though
11696   // all loads (just the chain uses) and token factors to find a consecutive
11697   // load.
11698   Visited.clear();
11699   Queue.clear();
11700 
11701   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
11702        IE = LoadRoots.end(); I != IE; ++I) {
11703     Queue.push_back(*I);
11704 
11705     while (!Queue.empty()) {
11706       SDNode *LoadRoot = Queue.pop_back_val();
11707       if (!Visited.insert(LoadRoot).second)
11708         continue;
11709 
11710       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
11711         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11712           return true;
11713 
11714       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
11715            UE = LoadRoot->use_end(); UI != UE; ++UI)
11716         if (((isa<MemSDNode>(*UI) &&
11717             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
11718             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
11719           Queue.push_back(*UI);
11720     }
11721   }
11722 
11723   return false;
11724 }
11725 
11726 /// This function is called when we have proved that a SETCC node can be replaced
11727 /// by subtraction (and other supporting instructions) so that the result of
11728 /// comparison is kept in a GPR instead of CR. This function is purely for
11729 /// codegen purposes and has some flags to guide the codegen process.
11730 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
11731                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
11732   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
11733 
11734   // Zero extend the operands to the largest legal integer. Originally, they
11735   // must be of a strictly smaller size.
11736   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
11737                          DAG.getConstant(Size, DL, MVT::i32));
11738   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
11739                          DAG.getConstant(Size, DL, MVT::i32));
11740 
11741   // Swap if needed. Depends on the condition code.
11742   if (Swap)
11743     std::swap(Op0, Op1);
11744 
11745   // Subtract extended integers.
11746   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
11747 
11748   // Move the sign bit to the least significant position and zero out the rest.
11749   // Now the least significant bit carries the result of original comparison.
11750   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
11751                              DAG.getConstant(Size - 1, DL, MVT::i32));
11752   auto Final = Shifted;
11753 
11754   // Complement the result if needed. Based on the condition code.
11755   if (Complement)
11756     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
11757                         DAG.getConstant(1, DL, MVT::i64));
11758 
11759   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
11760 }
11761 
11762 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
11763                                                   DAGCombinerInfo &DCI) const {
11764   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
11765 
11766   SelectionDAG &DAG = DCI.DAG;
11767   SDLoc DL(N);
11768 
11769   // Size of integers being compared has a critical role in the following
11770   // analysis, so we prefer to do this when all types are legal.
11771   if (!DCI.isAfterLegalizeDAG())
11772     return SDValue();
11773 
11774   // If all users of SETCC extend its value to a legal integer type
11775   // then we replace SETCC with a subtraction
11776   for (SDNode::use_iterator UI = N->use_begin(),
11777        UE = N->use_end(); UI != UE; ++UI) {
11778     if (UI->getOpcode() != ISD::ZERO_EXTEND)
11779       return SDValue();
11780   }
11781 
11782   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
11783   auto OpSize = N->getOperand(0).getValueSizeInBits();
11784 
11785   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
11786 
11787   if (OpSize < Size) {
11788     switch (CC) {
11789     default: break;
11790     case ISD::SETULT:
11791       return generateEquivalentSub(N, Size, false, false, DL, DAG);
11792     case ISD::SETULE:
11793       return generateEquivalentSub(N, Size, true, true, DL, DAG);
11794     case ISD::SETUGT:
11795       return generateEquivalentSub(N, Size, false, true, DL, DAG);
11796     case ISD::SETUGE:
11797       return generateEquivalentSub(N, Size, true, false, DL, DAG);
11798     }
11799   }
11800 
11801   return SDValue();
11802 }
11803 
11804 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
11805                                                   DAGCombinerInfo &DCI) const {
11806   SelectionDAG &DAG = DCI.DAG;
11807   SDLoc dl(N);
11808 
11809   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
11810   // If we're tracking CR bits, we need to be careful that we don't have:
11811   //   trunc(binary-ops(zext(x), zext(y)))
11812   // or
11813   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
11814   // such that we're unnecessarily moving things into GPRs when it would be
11815   // better to keep them in CR bits.
11816 
11817   // Note that trunc here can be an actual i1 trunc, or can be the effective
11818   // truncation that comes from a setcc or select_cc.
11819   if (N->getOpcode() == ISD::TRUNCATE &&
11820       N->getValueType(0) != MVT::i1)
11821     return SDValue();
11822 
11823   if (N->getOperand(0).getValueType() != MVT::i32 &&
11824       N->getOperand(0).getValueType() != MVT::i64)
11825     return SDValue();
11826 
11827   if (N->getOpcode() == ISD::SETCC ||
11828       N->getOpcode() == ISD::SELECT_CC) {
11829     // If we're looking at a comparison, then we need to make sure that the
11830     // high bits (all except for the first) don't matter the result.
11831     ISD::CondCode CC =
11832       cast<CondCodeSDNode>(N->getOperand(
11833         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
11834     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
11835 
11836     if (ISD::isSignedIntSetCC(CC)) {
11837       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
11838           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
11839         return SDValue();
11840     } else if (ISD::isUnsignedIntSetCC(CC)) {
11841       if (!DAG.MaskedValueIsZero(N->getOperand(0),
11842                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
11843           !DAG.MaskedValueIsZero(N->getOperand(1),
11844                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
11845         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
11846                                              : SDValue());
11847     } else {
11848       // This is neither a signed nor an unsigned comparison, just make sure
11849       // that the high bits are equal.
11850       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
11851       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
11852 
11853       // We don't really care about what is known about the first bit (if
11854       // anything), so clear it in all masks prior to comparing them.
11855       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
11856       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
11857 
11858       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
11859         return SDValue();
11860     }
11861   }
11862 
11863   // We now know that the higher-order bits are irrelevant, we just need to
11864   // make sure that all of the intermediate operations are bit operations, and
11865   // all inputs are extensions.
11866   if (N->getOperand(0).getOpcode() != ISD::AND &&
11867       N->getOperand(0).getOpcode() != ISD::OR  &&
11868       N->getOperand(0).getOpcode() != ISD::XOR &&
11869       N->getOperand(0).getOpcode() != ISD::SELECT &&
11870       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
11871       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
11872       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
11873       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
11874       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
11875     return SDValue();
11876 
11877   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
11878       N->getOperand(1).getOpcode() != ISD::AND &&
11879       N->getOperand(1).getOpcode() != ISD::OR  &&
11880       N->getOperand(1).getOpcode() != ISD::XOR &&
11881       N->getOperand(1).getOpcode() != ISD::SELECT &&
11882       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
11883       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
11884       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
11885       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
11886       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
11887     return SDValue();
11888 
11889   SmallVector<SDValue, 4> Inputs;
11890   SmallVector<SDValue, 8> BinOps, PromOps;
11891   SmallPtrSet<SDNode *, 16> Visited;
11892 
11893   for (unsigned i = 0; i < 2; ++i) {
11894     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11895           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11896           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
11897           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
11898         isa<ConstantSDNode>(N->getOperand(i)))
11899       Inputs.push_back(N->getOperand(i));
11900     else
11901       BinOps.push_back(N->getOperand(i));
11902 
11903     if (N->getOpcode() == ISD::TRUNCATE)
11904       break;
11905   }
11906 
11907   // Visit all inputs, collect all binary operations (and, or, xor and
11908   // select) that are all fed by extensions.
11909   while (!BinOps.empty()) {
11910     SDValue BinOp = BinOps.back();
11911     BinOps.pop_back();
11912 
11913     if (!Visited.insert(BinOp.getNode()).second)
11914       continue;
11915 
11916     PromOps.push_back(BinOp);
11917 
11918     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
11919       // The condition of the select is not promoted.
11920       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
11921         continue;
11922       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
11923         continue;
11924 
11925       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11926             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11927             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
11928            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
11929           isa<ConstantSDNode>(BinOp.getOperand(i))) {
11930         Inputs.push_back(BinOp.getOperand(i));
11931       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
11932                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
11933                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
11934                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
11935                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
11936                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
11937                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11938                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11939                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
11940         BinOps.push_back(BinOp.getOperand(i));
11941       } else {
11942         // We have an input that is not an extension or another binary
11943         // operation; we'll abort this transformation.
11944         return SDValue();
11945       }
11946     }
11947   }
11948 
11949   // Make sure that this is a self-contained cluster of operations (which
11950   // is not quite the same thing as saying that everything has only one
11951   // use).
11952   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11953     if (isa<ConstantSDNode>(Inputs[i]))
11954       continue;
11955 
11956     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
11957                               UE = Inputs[i].getNode()->use_end();
11958          UI != UE; ++UI) {
11959       SDNode *User = *UI;
11960       if (User != N && !Visited.count(User))
11961         return SDValue();
11962 
11963       // Make sure that we're not going to promote the non-output-value
11964       // operand(s) or SELECT or SELECT_CC.
11965       // FIXME: Although we could sometimes handle this, and it does occur in
11966       // practice that one of the condition inputs to the select is also one of
11967       // the outputs, we currently can't deal with this.
11968       if (User->getOpcode() == ISD::SELECT) {
11969         if (User->getOperand(0) == Inputs[i])
11970           return SDValue();
11971       } else if (User->getOpcode() == ISD::SELECT_CC) {
11972         if (User->getOperand(0) == Inputs[i] ||
11973             User->getOperand(1) == Inputs[i])
11974           return SDValue();
11975       }
11976     }
11977   }
11978 
11979   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
11980     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
11981                               UE = PromOps[i].getNode()->use_end();
11982          UI != UE; ++UI) {
11983       SDNode *User = *UI;
11984       if (User != N && !Visited.count(User))
11985         return SDValue();
11986 
11987       // Make sure that we're not going to promote the non-output-value
11988       // operand(s) or SELECT or SELECT_CC.
11989       // FIXME: Although we could sometimes handle this, and it does occur in
11990       // practice that one of the condition inputs to the select is also one of
11991       // the outputs, we currently can't deal with this.
11992       if (User->getOpcode() == ISD::SELECT) {
11993         if (User->getOperand(0) == PromOps[i])
11994           return SDValue();
11995       } else if (User->getOpcode() == ISD::SELECT_CC) {
11996         if (User->getOperand(0) == PromOps[i] ||
11997             User->getOperand(1) == PromOps[i])
11998           return SDValue();
11999       }
12000     }
12001   }
12002 
12003   // Replace all inputs with the extension operand.
12004   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12005     // Constants may have users outside the cluster of to-be-promoted nodes,
12006     // and so we need to replace those as we do the promotions.
12007     if (isa<ConstantSDNode>(Inputs[i]))
12008       continue;
12009     else
12010       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12011   }
12012 
12013   std::list<HandleSDNode> PromOpHandles;
12014   for (auto &PromOp : PromOps)
12015     PromOpHandles.emplace_back(PromOp);
12016 
12017   // Replace all operations (these are all the same, but have a different
12018   // (i1) return type). DAG.getNode will validate that the types of
12019   // a binary operator match, so go through the list in reverse so that
12020   // we've likely promoted both operands first. Any intermediate truncations or
12021   // extensions disappear.
12022   while (!PromOpHandles.empty()) {
12023     SDValue PromOp = PromOpHandles.back().getValue();
12024     PromOpHandles.pop_back();
12025 
12026     if (PromOp.getOpcode() == ISD::TRUNCATE ||
12027         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12028         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12029         PromOp.getOpcode() == ISD::ANY_EXTEND) {
12030       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12031           PromOp.getOperand(0).getValueType() != MVT::i1) {
12032         // The operand is not yet ready (see comment below).
12033         PromOpHandles.emplace_front(PromOp);
12034         continue;
12035       }
12036 
12037       SDValue RepValue = PromOp.getOperand(0);
12038       if (isa<ConstantSDNode>(RepValue))
12039         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
12040 
12041       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
12042       continue;
12043     }
12044 
12045     unsigned C;
12046     switch (PromOp.getOpcode()) {
12047     default:             C = 0; break;
12048     case ISD::SELECT:    C = 1; break;
12049     case ISD::SELECT_CC: C = 2; break;
12050     }
12051 
12052     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12053          PromOp.getOperand(C).getValueType() != MVT::i1) ||
12054         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12055          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12056       // The to-be-promoted operands of this node have not yet been
12057       // promoted (this should be rare because we're going through the
12058       // list backward, but if one of the operands has several users in
12059       // this cluster of to-be-promoted nodes, it is possible).
12060       PromOpHandles.emplace_front(PromOp);
12061       continue;
12062     }
12063 
12064     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12065                                 PromOp.getNode()->op_end());
12066 
12067     // If there are any constant inputs, make sure they're replaced now.
12068     for (unsigned i = 0; i < 2; ++i)
12069       if (isa<ConstantSDNode>(Ops[C+i]))
12070         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
12071 
12072     DAG.ReplaceAllUsesOfValueWith(PromOp,
12073       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
12074   }
12075 
12076   // Now we're left with the initial truncation itself.
12077   if (N->getOpcode() == ISD::TRUNCATE)
12078     return N->getOperand(0);
12079 
12080   // Otherwise, this is a comparison. The operands to be compared have just
12081   // changed type (to i1), but everything else is the same.
12082   return SDValue(N, 0);
12083 }
12084 
12085 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
12086                                                   DAGCombinerInfo &DCI) const {
12087   SelectionDAG &DAG = DCI.DAG;
12088   SDLoc dl(N);
12089 
12090   // If we're tracking CR bits, we need to be careful that we don't have:
12091   //   zext(binary-ops(trunc(x), trunc(y)))
12092   // or
12093   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12094   // such that we're unnecessarily moving things into CR bits that can more
12095   // efficiently stay in GPRs. Note that if we're not certain that the high
12096   // bits are set as required by the final extension, we still may need to do
12097   // some masking to get the proper behavior.
12098 
12099   // This same functionality is important on PPC64 when dealing with
12100   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12101   // the return values of functions. Because it is so similar, it is handled
12102   // here as well.
12103 
12104   if (N->getValueType(0) != MVT::i32 &&
12105       N->getValueType(0) != MVT::i64)
12106     return SDValue();
12107 
12108   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
12109         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
12110     return SDValue();
12111 
12112   if (N->getOperand(0).getOpcode() != ISD::AND &&
12113       N->getOperand(0).getOpcode() != ISD::OR  &&
12114       N->getOperand(0).getOpcode() != ISD::XOR &&
12115       N->getOperand(0).getOpcode() != ISD::SELECT &&
12116       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
12117     return SDValue();
12118 
12119   SmallVector<SDValue, 4> Inputs;
12120   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
12121   SmallPtrSet<SDNode *, 16> Visited;
12122 
12123   // Visit all inputs, collect all binary operations (and, or, xor and
12124   // select) that are all fed by truncations.
12125   while (!BinOps.empty()) {
12126     SDValue BinOp = BinOps.back();
12127     BinOps.pop_back();
12128 
12129     if (!Visited.insert(BinOp.getNode()).second)
12130       continue;
12131 
12132     PromOps.push_back(BinOp);
12133 
12134     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12135       // The condition of the select is not promoted.
12136       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12137         continue;
12138       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12139         continue;
12140 
12141       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12142           isa<ConstantSDNode>(BinOp.getOperand(i))) {
12143         Inputs.push_back(BinOp.getOperand(i));
12144       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12145                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
12146                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12147                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12148                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
12149         BinOps.push_back(BinOp.getOperand(i));
12150       } else {
12151         // We have an input that is not a truncation or another binary
12152         // operation; we'll abort this transformation.
12153         return SDValue();
12154       }
12155     }
12156   }
12157 
12158   // The operands of a select that must be truncated when the select is
12159   // promoted because the operand is actually part of the to-be-promoted set.
12160   DenseMap<SDNode *, EVT> SelectTruncOp[2];
12161 
12162   // Make sure that this is a self-contained cluster of operations (which
12163   // is not quite the same thing as saying that everything has only one
12164   // use).
12165   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12166     if (isa<ConstantSDNode>(Inputs[i]))
12167       continue;
12168 
12169     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12170                               UE = Inputs[i].getNode()->use_end();
12171          UI != UE; ++UI) {
12172       SDNode *User = *UI;
12173       if (User != N && !Visited.count(User))
12174         return SDValue();
12175 
12176       // If we're going to promote the non-output-value operand(s) or SELECT or
12177       // SELECT_CC, record them for truncation.
12178       if (User->getOpcode() == ISD::SELECT) {
12179         if (User->getOperand(0) == Inputs[i])
12180           SelectTruncOp[0].insert(std::make_pair(User,
12181                                     User->getOperand(0).getValueType()));
12182       } else if (User->getOpcode() == ISD::SELECT_CC) {
12183         if (User->getOperand(0) == Inputs[i])
12184           SelectTruncOp[0].insert(std::make_pair(User,
12185                                     User->getOperand(0).getValueType()));
12186         if (User->getOperand(1) == Inputs[i])
12187           SelectTruncOp[1].insert(std::make_pair(User,
12188                                     User->getOperand(1).getValueType()));
12189       }
12190     }
12191   }
12192 
12193   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12194     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12195                               UE = PromOps[i].getNode()->use_end();
12196          UI != UE; ++UI) {
12197       SDNode *User = *UI;
12198       if (User != N && !Visited.count(User))
12199         return SDValue();
12200 
12201       // If we're going to promote the non-output-value operand(s) or SELECT or
12202       // SELECT_CC, record them for truncation.
12203       if (User->getOpcode() == ISD::SELECT) {
12204         if (User->getOperand(0) == PromOps[i])
12205           SelectTruncOp[0].insert(std::make_pair(User,
12206                                     User->getOperand(0).getValueType()));
12207       } else if (User->getOpcode() == ISD::SELECT_CC) {
12208         if (User->getOperand(0) == PromOps[i])
12209           SelectTruncOp[0].insert(std::make_pair(User,
12210                                     User->getOperand(0).getValueType()));
12211         if (User->getOperand(1) == PromOps[i])
12212           SelectTruncOp[1].insert(std::make_pair(User,
12213                                     User->getOperand(1).getValueType()));
12214       }
12215     }
12216   }
12217 
12218   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
12219   bool ReallyNeedsExt = false;
12220   if (N->getOpcode() != ISD::ANY_EXTEND) {
12221     // If all of the inputs are not already sign/zero extended, then
12222     // we'll still need to do that at the end.
12223     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12224       if (isa<ConstantSDNode>(Inputs[i]))
12225         continue;
12226 
12227       unsigned OpBits =
12228         Inputs[i].getOperand(0).getValueSizeInBits();
12229       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
12230 
12231       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
12232            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
12233                                   APInt::getHighBitsSet(OpBits,
12234                                                         OpBits-PromBits))) ||
12235           (N->getOpcode() == ISD::SIGN_EXTEND &&
12236            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
12237              (OpBits-(PromBits-1)))) {
12238         ReallyNeedsExt = true;
12239         break;
12240       }
12241     }
12242   }
12243 
12244   // Replace all inputs, either with the truncation operand, or a
12245   // truncation or extension to the final output type.
12246   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12247     // Constant inputs need to be replaced with the to-be-promoted nodes that
12248     // use them because they might have users outside of the cluster of
12249     // promoted nodes.
12250     if (isa<ConstantSDNode>(Inputs[i]))
12251       continue;
12252 
12253     SDValue InSrc = Inputs[i].getOperand(0);
12254     if (Inputs[i].getValueType() == N->getValueType(0))
12255       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
12256     else if (N->getOpcode() == ISD::SIGN_EXTEND)
12257       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12258         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
12259     else if (N->getOpcode() == ISD::ZERO_EXTEND)
12260       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12261         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
12262     else
12263       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12264         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
12265   }
12266 
12267   std::list<HandleSDNode> PromOpHandles;
12268   for (auto &PromOp : PromOps)
12269     PromOpHandles.emplace_back(PromOp);
12270 
12271   // Replace all operations (these are all the same, but have a different
12272   // (promoted) return type). DAG.getNode will validate that the types of
12273   // a binary operator match, so go through the list in reverse so that
12274   // we've likely promoted both operands first.
12275   while (!PromOpHandles.empty()) {
12276     SDValue PromOp = PromOpHandles.back().getValue();
12277     PromOpHandles.pop_back();
12278 
12279     unsigned C;
12280     switch (PromOp.getOpcode()) {
12281     default:             C = 0; break;
12282     case ISD::SELECT:    C = 1; break;
12283     case ISD::SELECT_CC: C = 2; break;
12284     }
12285 
12286     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12287          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
12288         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12289          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
12290       // The to-be-promoted operands of this node have not yet been
12291       // promoted (this should be rare because we're going through the
12292       // list backward, but if one of the operands has several users in
12293       // this cluster of to-be-promoted nodes, it is possible).
12294       PromOpHandles.emplace_front(PromOp);
12295       continue;
12296     }
12297 
12298     // For SELECT and SELECT_CC nodes, we do a similar check for any
12299     // to-be-promoted comparison inputs.
12300     if (PromOp.getOpcode() == ISD::SELECT ||
12301         PromOp.getOpcode() == ISD::SELECT_CC) {
12302       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
12303            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
12304           (SelectTruncOp[1].count(PromOp.getNode()) &&
12305            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
12306         PromOpHandles.emplace_front(PromOp);
12307         continue;
12308       }
12309     }
12310 
12311     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12312                                 PromOp.getNode()->op_end());
12313 
12314     // If this node has constant inputs, then they'll need to be promoted here.
12315     for (unsigned i = 0; i < 2; ++i) {
12316       if (!isa<ConstantSDNode>(Ops[C+i]))
12317         continue;
12318       if (Ops[C+i].getValueType() == N->getValueType(0))
12319         continue;
12320 
12321       if (N->getOpcode() == ISD::SIGN_EXTEND)
12322         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12323       else if (N->getOpcode() == ISD::ZERO_EXTEND)
12324         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12325       else
12326         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12327     }
12328 
12329     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
12330     // truncate them again to the original value type.
12331     if (PromOp.getOpcode() == ISD::SELECT ||
12332         PromOp.getOpcode() == ISD::SELECT_CC) {
12333       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
12334       if (SI0 != SelectTruncOp[0].end())
12335         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
12336       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
12337       if (SI1 != SelectTruncOp[1].end())
12338         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
12339     }
12340 
12341     DAG.ReplaceAllUsesOfValueWith(PromOp,
12342       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
12343   }
12344 
12345   // Now we're left with the initial extension itself.
12346   if (!ReallyNeedsExt)
12347     return N->getOperand(0);
12348 
12349   // To zero extend, just mask off everything except for the first bit (in the
12350   // i1 case).
12351   if (N->getOpcode() == ISD::ZERO_EXTEND)
12352     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
12353                        DAG.getConstant(APInt::getLowBitsSet(
12354                                          N->getValueSizeInBits(0), PromBits),
12355                                        dl, N->getValueType(0)));
12356 
12357   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
12358          "Invalid extension type");
12359   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
12360   SDValue ShiftCst =
12361       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
12362   return DAG.getNode(
12363       ISD::SRA, dl, N->getValueType(0),
12364       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
12365       ShiftCst);
12366 }
12367 
12368 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
12369                                         DAGCombinerInfo &DCI) const {
12370   assert(N->getOpcode() == ISD::SETCC &&
12371          "Should be called with a SETCC node");
12372 
12373   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12374   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
12375     SDValue LHS = N->getOperand(0);
12376     SDValue RHS = N->getOperand(1);
12377 
12378     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
12379     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
12380         LHS.hasOneUse())
12381       std::swap(LHS, RHS);
12382 
12383     // x == 0-y --> x+y == 0
12384     // x != 0-y --> x+y != 0
12385     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
12386         RHS.hasOneUse()) {
12387       SDLoc DL(N);
12388       SelectionDAG &DAG = DCI.DAG;
12389       EVT VT = N->getValueType(0);
12390       EVT OpVT = LHS.getValueType();
12391       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
12392       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
12393     }
12394   }
12395 
12396   return DAGCombineTruncBoolExt(N, DCI);
12397 }
12398 
12399 // Is this an extending load from an f32 to an f64?
12400 static bool isFPExtLoad(SDValue Op) {
12401   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
12402     return LD->getExtensionType() == ISD::EXTLOAD &&
12403       Op.getValueType() == MVT::f64;
12404   return false;
12405 }
12406 
12407 /// Reduces the number of fp-to-int conversion when building a vector.
12408 ///
12409 /// If this vector is built out of floating to integer conversions,
12410 /// transform it to a vector built out of floating point values followed by a
12411 /// single floating to integer conversion of the vector.
12412 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
12413 /// becomes (fptosi (build_vector ($A, $B, ...)))
12414 SDValue PPCTargetLowering::
12415 combineElementTruncationToVectorTruncation(SDNode *N,
12416                                            DAGCombinerInfo &DCI) const {
12417   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12418          "Should be called with a BUILD_VECTOR node");
12419 
12420   SelectionDAG &DAG = DCI.DAG;
12421   SDLoc dl(N);
12422 
12423   SDValue FirstInput = N->getOperand(0);
12424   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
12425          "The input operand must be an fp-to-int conversion.");
12426 
12427   // This combine happens after legalization so the fp_to_[su]i nodes are
12428   // already converted to PPCSISD nodes.
12429   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
12430   if (FirstConversion == PPCISD::FCTIDZ ||
12431       FirstConversion == PPCISD::FCTIDUZ ||
12432       FirstConversion == PPCISD::FCTIWZ ||
12433       FirstConversion == PPCISD::FCTIWUZ) {
12434     bool IsSplat = true;
12435     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
12436       FirstConversion == PPCISD::FCTIWUZ;
12437     EVT SrcVT = FirstInput.getOperand(0).getValueType();
12438     SmallVector<SDValue, 4> Ops;
12439     EVT TargetVT = N->getValueType(0);
12440     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
12441       SDValue NextOp = N->getOperand(i);
12442       if (NextOp.getOpcode() != PPCISD::MFVSR)
12443         return SDValue();
12444       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
12445       if (NextConversion != FirstConversion)
12446         return SDValue();
12447       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
12448       // This is not valid if the input was originally double precision. It is
12449       // also not profitable to do unless this is an extending load in which
12450       // case doing this combine will allow us to combine consecutive loads.
12451       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
12452         return SDValue();
12453       if (N->getOperand(i) != FirstInput)
12454         IsSplat = false;
12455     }
12456 
12457     // If this is a splat, we leave it as-is since there will be only a single
12458     // fp-to-int conversion followed by a splat of the integer. This is better
12459     // for 32-bit and smaller ints and neutral for 64-bit ints.
12460     if (IsSplat)
12461       return SDValue();
12462 
12463     // Now that we know we have the right type of node, get its operands
12464     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
12465       SDValue In = N->getOperand(i).getOperand(0);
12466       if (Is32Bit) {
12467         // For 32-bit values, we need to add an FP_ROUND node (if we made it
12468         // here, we know that all inputs are extending loads so this is safe).
12469         if (In.isUndef())
12470           Ops.push_back(DAG.getUNDEF(SrcVT));
12471         else {
12472           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
12473                                       MVT::f32, In.getOperand(0),
12474                                       DAG.getIntPtrConstant(1, dl));
12475           Ops.push_back(Trunc);
12476         }
12477       } else
12478         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
12479     }
12480 
12481     unsigned Opcode;
12482     if (FirstConversion == PPCISD::FCTIDZ ||
12483         FirstConversion == PPCISD::FCTIWZ)
12484       Opcode = ISD::FP_TO_SINT;
12485     else
12486       Opcode = ISD::FP_TO_UINT;
12487 
12488     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
12489     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
12490     return DAG.getNode(Opcode, dl, TargetVT, BV);
12491   }
12492   return SDValue();
12493 }
12494 
12495 /// Reduce the number of loads when building a vector.
12496 ///
12497 /// Building a vector out of multiple loads can be converted to a load
12498 /// of the vector type if the loads are consecutive. If the loads are
12499 /// consecutive but in descending order, a shuffle is added at the end
12500 /// to reorder the vector.
12501 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
12502   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12503          "Should be called with a BUILD_VECTOR node");
12504 
12505   SDLoc dl(N);
12506 
12507   // Return early for non byte-sized type, as they can't be consecutive.
12508   if (!N->getValueType(0).getVectorElementType().isByteSized())
12509     return SDValue();
12510 
12511   bool InputsAreConsecutiveLoads = true;
12512   bool InputsAreReverseConsecutive = true;
12513   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
12514   SDValue FirstInput = N->getOperand(0);
12515   bool IsRoundOfExtLoad = false;
12516 
12517   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
12518       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
12519     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
12520     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
12521   }
12522   // Not a build vector of (possibly fp_rounded) loads.
12523   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
12524       N->getNumOperands() == 1)
12525     return SDValue();
12526 
12527   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
12528     // If any inputs are fp_round(extload), they all must be.
12529     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
12530       return SDValue();
12531 
12532     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
12533       N->getOperand(i);
12534     if (NextInput.getOpcode() != ISD::LOAD)
12535       return SDValue();
12536 
12537     SDValue PreviousInput =
12538       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
12539     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
12540     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
12541 
12542     // If any inputs are fp_round(extload), they all must be.
12543     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
12544       return SDValue();
12545 
12546     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
12547       InputsAreConsecutiveLoads = false;
12548     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
12549       InputsAreReverseConsecutive = false;
12550 
12551     // Exit early if the loads are neither consecutive nor reverse consecutive.
12552     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
12553       return SDValue();
12554   }
12555 
12556   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
12557          "The loads cannot be both consecutive and reverse consecutive.");
12558 
12559   SDValue FirstLoadOp =
12560     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
12561   SDValue LastLoadOp =
12562     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
12563                        N->getOperand(N->getNumOperands()-1);
12564 
12565   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
12566   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
12567   if (InputsAreConsecutiveLoads) {
12568     assert(LD1 && "Input needs to be a LoadSDNode.");
12569     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
12570                        LD1->getBasePtr(), LD1->getPointerInfo(),
12571                        LD1->getAlignment());
12572   }
12573   if (InputsAreReverseConsecutive) {
12574     assert(LDL && "Input needs to be a LoadSDNode.");
12575     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
12576                                LDL->getBasePtr(), LDL->getPointerInfo(),
12577                                LDL->getAlignment());
12578     SmallVector<int, 16> Ops;
12579     for (int i = N->getNumOperands() - 1; i >= 0; i--)
12580       Ops.push_back(i);
12581 
12582     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
12583                                 DAG.getUNDEF(N->getValueType(0)), Ops);
12584   }
12585   return SDValue();
12586 }
12587 
12588 // This function adds the required vector_shuffle needed to get
12589 // the elements of the vector extract in the correct position
12590 // as specified by the CorrectElems encoding.
12591 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
12592                                       SDValue Input, uint64_t Elems,
12593                                       uint64_t CorrectElems) {
12594   SDLoc dl(N);
12595 
12596   unsigned NumElems = Input.getValueType().getVectorNumElements();
12597   SmallVector<int, 16> ShuffleMask(NumElems, -1);
12598 
12599   // Knowing the element indices being extracted from the original
12600   // vector and the order in which they're being inserted, just put
12601   // them at element indices required for the instruction.
12602   for (unsigned i = 0; i < N->getNumOperands(); i++) {
12603     if (DAG.getDataLayout().isLittleEndian())
12604       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
12605     else
12606       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
12607     CorrectElems = CorrectElems >> 8;
12608     Elems = Elems >> 8;
12609   }
12610 
12611   SDValue Shuffle =
12612       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
12613                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
12614 
12615   EVT Ty = N->getValueType(0);
12616   SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
12617   return BV;
12618 }
12619 
12620 // Look for build vector patterns where input operands come from sign
12621 // extended vector_extract elements of specific indices. If the correct indices
12622 // aren't used, add a vector shuffle to fix up the indices and create a new
12623 // PPCISD:SExtVElems node which selects the vector sign extend instructions
12624 // during instruction selection.
12625 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
12626   // This array encodes the indices that the vector sign extend instructions
12627   // extract from when extending from one type to another for both BE and LE.
12628   // The right nibble of each byte corresponds to the LE incides.
12629   // and the left nibble of each byte corresponds to the BE incides.
12630   // For example: 0x3074B8FC  byte->word
12631   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
12632   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
12633   // For example: 0x000070F8  byte->double word
12634   // For LE: the allowed indices are: 0x0,0x8
12635   // For BE: the allowed indices are: 0x7,0xF
12636   uint64_t TargetElems[] = {
12637       0x3074B8FC, // b->w
12638       0x000070F8, // b->d
12639       0x10325476, // h->w
12640       0x00003074, // h->d
12641       0x00001032, // w->d
12642   };
12643 
12644   uint64_t Elems = 0;
12645   int Index;
12646   SDValue Input;
12647 
12648   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
12649     if (!Op)
12650       return false;
12651     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
12652         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
12653       return false;
12654 
12655     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
12656     // of the right width.
12657     SDValue Extract = Op.getOperand(0);
12658     if (Extract.getOpcode() == ISD::ANY_EXTEND)
12659       Extract = Extract.getOperand(0);
12660     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12661       return false;
12662 
12663     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
12664     if (!ExtOp)
12665       return false;
12666 
12667     Index = ExtOp->getZExtValue();
12668     if (Input && Input != Extract.getOperand(0))
12669       return false;
12670 
12671     if (!Input)
12672       Input = Extract.getOperand(0);
12673 
12674     Elems = Elems << 8;
12675     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
12676     Elems |= Index;
12677 
12678     return true;
12679   };
12680 
12681   // If the build vector operands aren't sign extended vector extracts,
12682   // of the same input vector, then return.
12683   for (unsigned i = 0; i < N->getNumOperands(); i++) {
12684     if (!isSExtOfVecExtract(N->getOperand(i))) {
12685       return SDValue();
12686     }
12687   }
12688 
12689   // If the vector extract indicies are not correct, add the appropriate
12690   // vector_shuffle.
12691   int TgtElemArrayIdx;
12692   int InputSize = Input.getValueType().getScalarSizeInBits();
12693   int OutputSize = N->getValueType(0).getScalarSizeInBits();
12694   if (InputSize + OutputSize == 40)
12695     TgtElemArrayIdx = 0;
12696   else if (InputSize + OutputSize == 72)
12697     TgtElemArrayIdx = 1;
12698   else if (InputSize + OutputSize == 48)
12699     TgtElemArrayIdx = 2;
12700   else if (InputSize + OutputSize == 80)
12701     TgtElemArrayIdx = 3;
12702   else if (InputSize + OutputSize == 96)
12703     TgtElemArrayIdx = 4;
12704   else
12705     return SDValue();
12706 
12707   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
12708   CorrectElems = DAG.getDataLayout().isLittleEndian()
12709                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
12710                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
12711   if (Elems != CorrectElems) {
12712     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
12713   }
12714 
12715   // Regular lowering will catch cases where a shuffle is not needed.
12716   return SDValue();
12717 }
12718 
12719 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
12720                                                  DAGCombinerInfo &DCI) const {
12721   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12722          "Should be called with a BUILD_VECTOR node");
12723 
12724   SelectionDAG &DAG = DCI.DAG;
12725   SDLoc dl(N);
12726 
12727   if (!Subtarget.hasVSX())
12728     return SDValue();
12729 
12730   // The target independent DAG combiner will leave a build_vector of
12731   // float-to-int conversions intact. We can generate MUCH better code for
12732   // a float-to-int conversion of a vector of floats.
12733   SDValue FirstInput = N->getOperand(0);
12734   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
12735     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
12736     if (Reduced)
12737       return Reduced;
12738   }
12739 
12740   // If we're building a vector out of consecutive loads, just load that
12741   // vector type.
12742   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
12743   if (Reduced)
12744     return Reduced;
12745 
12746   // If we're building a vector out of extended elements from another vector
12747   // we have P9 vector integer extend instructions. The code assumes legal
12748   // input types (i.e. it can't handle things like v4i16) so do not run before
12749   // legalization.
12750   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
12751     Reduced = combineBVOfVecSExt(N, DAG);
12752     if (Reduced)
12753       return Reduced;
12754   }
12755 
12756 
12757   if (N->getValueType(0) != MVT::v2f64)
12758     return SDValue();
12759 
12760   // Looking for:
12761   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
12762   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
12763       FirstInput.getOpcode() != ISD::UINT_TO_FP)
12764     return SDValue();
12765   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
12766       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
12767     return SDValue();
12768   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
12769     return SDValue();
12770 
12771   SDValue Ext1 = FirstInput.getOperand(0);
12772   SDValue Ext2 = N->getOperand(1).getOperand(0);
12773   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12774      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12775     return SDValue();
12776 
12777   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
12778   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
12779   if (!Ext1Op || !Ext2Op)
12780     return SDValue();
12781   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
12782       Ext1.getOperand(0) != Ext2.getOperand(0))
12783     return SDValue();
12784 
12785   int FirstElem = Ext1Op->getZExtValue();
12786   int SecondElem = Ext2Op->getZExtValue();
12787   int SubvecIdx;
12788   if (FirstElem == 0 && SecondElem == 1)
12789     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
12790   else if (FirstElem == 2 && SecondElem == 3)
12791     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
12792   else
12793     return SDValue();
12794 
12795   SDValue SrcVec = Ext1.getOperand(0);
12796   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
12797     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
12798   return DAG.getNode(NodeType, dl, MVT::v2f64,
12799                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
12800 }
12801 
12802 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
12803                                               DAGCombinerInfo &DCI) const {
12804   assert((N->getOpcode() == ISD::SINT_TO_FP ||
12805           N->getOpcode() == ISD::UINT_TO_FP) &&
12806          "Need an int -> FP conversion node here");
12807 
12808   if (useSoftFloat() || !Subtarget.has64BitSupport())
12809     return SDValue();
12810 
12811   SelectionDAG &DAG = DCI.DAG;
12812   SDLoc dl(N);
12813   SDValue Op(N, 0);
12814 
12815   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
12816   // from the hardware.
12817   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
12818     return SDValue();
12819   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
12820       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
12821     return SDValue();
12822 
12823   SDValue FirstOperand(Op.getOperand(0));
12824   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
12825     (FirstOperand.getValueType() == MVT::i8 ||
12826      FirstOperand.getValueType() == MVT::i16);
12827   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
12828     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
12829     bool DstDouble = Op.getValueType() == MVT::f64;
12830     unsigned ConvOp = Signed ?
12831       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
12832       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
12833     SDValue WidthConst =
12834       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
12835                             dl, false);
12836     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
12837     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
12838     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
12839                                          DAG.getVTList(MVT::f64, MVT::Other),
12840                                          Ops, MVT::i8, LDN->getMemOperand());
12841 
12842     // For signed conversion, we need to sign-extend the value in the VSR
12843     if (Signed) {
12844       SDValue ExtOps[] = { Ld, WidthConst };
12845       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
12846       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
12847     } else
12848       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
12849   }
12850 
12851 
12852   // For i32 intermediate values, unfortunately, the conversion functions
12853   // leave the upper 32 bits of the value are undefined. Within the set of
12854   // scalar instructions, we have no method for zero- or sign-extending the
12855   // value. Thus, we cannot handle i32 intermediate values here.
12856   if (Op.getOperand(0).getValueType() == MVT::i32)
12857     return SDValue();
12858 
12859   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
12860          "UINT_TO_FP is supported only with FPCVT");
12861 
12862   // If we have FCFIDS, then use it when converting to single-precision.
12863   // Otherwise, convert to double-precision and then round.
12864   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
12865                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
12866                                                             : PPCISD::FCFIDS)
12867                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
12868                                                             : PPCISD::FCFID);
12869   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
12870                   ? MVT::f32
12871                   : MVT::f64;
12872 
12873   // If we're converting from a float, to an int, and back to a float again,
12874   // then we don't need the store/load pair at all.
12875   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
12876        Subtarget.hasFPCVT()) ||
12877       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
12878     SDValue Src = Op.getOperand(0).getOperand(0);
12879     if (Src.getValueType() == MVT::f32) {
12880       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
12881       DCI.AddToWorklist(Src.getNode());
12882     } else if (Src.getValueType() != MVT::f64) {
12883       // Make sure that we don't pick up a ppc_fp128 source value.
12884       return SDValue();
12885     }
12886 
12887     unsigned FCTOp =
12888       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
12889                                                         PPCISD::FCTIDUZ;
12890 
12891     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
12892     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
12893 
12894     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
12895       FP = DAG.getNode(ISD::FP_ROUND, dl,
12896                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
12897       DCI.AddToWorklist(FP.getNode());
12898     }
12899 
12900     return FP;
12901   }
12902 
12903   return SDValue();
12904 }
12905 
12906 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
12907 // builtins) into loads with swaps.
12908 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
12909                                               DAGCombinerInfo &DCI) const {
12910   SelectionDAG &DAG = DCI.DAG;
12911   SDLoc dl(N);
12912   SDValue Chain;
12913   SDValue Base;
12914   MachineMemOperand *MMO;
12915 
12916   switch (N->getOpcode()) {
12917   default:
12918     llvm_unreachable("Unexpected opcode for little endian VSX load");
12919   case ISD::LOAD: {
12920     LoadSDNode *LD = cast<LoadSDNode>(N);
12921     Chain = LD->getChain();
12922     Base = LD->getBasePtr();
12923     MMO = LD->getMemOperand();
12924     // If the MMO suggests this isn't a load of a full vector, leave
12925     // things alone.  For a built-in, we have to make the change for
12926     // correctness, so if there is a size problem that will be a bug.
12927     if (MMO->getSize() < 16)
12928       return SDValue();
12929     break;
12930   }
12931   case ISD::INTRINSIC_W_CHAIN: {
12932     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
12933     Chain = Intrin->getChain();
12934     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
12935     // us what we want. Get operand 2 instead.
12936     Base = Intrin->getOperand(2);
12937     MMO = Intrin->getMemOperand();
12938     break;
12939   }
12940   }
12941 
12942   MVT VecTy = N->getValueType(0).getSimpleVT();
12943 
12944   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
12945   // aligned and the type is a vector with elements up to 4 bytes
12946   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
12947       && VecTy.getScalarSizeInBits() <= 32 ) {
12948     return SDValue();
12949   }
12950 
12951   SDValue LoadOps[] = { Chain, Base };
12952   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
12953                                          DAG.getVTList(MVT::v2f64, MVT::Other),
12954                                          LoadOps, MVT::v2f64, MMO);
12955 
12956   DCI.AddToWorklist(Load.getNode());
12957   Chain = Load.getValue(1);
12958   SDValue Swap = DAG.getNode(
12959       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
12960   DCI.AddToWorklist(Swap.getNode());
12961 
12962   // Add a bitcast if the resulting load type doesn't match v2f64.
12963   if (VecTy != MVT::v2f64) {
12964     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
12965     DCI.AddToWorklist(N.getNode());
12966     // Package {bitcast value, swap's chain} to match Load's shape.
12967     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
12968                        N, Swap.getValue(1));
12969   }
12970 
12971   return Swap;
12972 }
12973 
12974 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
12975 // builtins) into stores with swaps.
12976 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
12977                                                DAGCombinerInfo &DCI) const {
12978   SelectionDAG &DAG = DCI.DAG;
12979   SDLoc dl(N);
12980   SDValue Chain;
12981   SDValue Base;
12982   unsigned SrcOpnd;
12983   MachineMemOperand *MMO;
12984 
12985   switch (N->getOpcode()) {
12986   default:
12987     llvm_unreachable("Unexpected opcode for little endian VSX store");
12988   case ISD::STORE: {
12989     StoreSDNode *ST = cast<StoreSDNode>(N);
12990     Chain = ST->getChain();
12991     Base = ST->getBasePtr();
12992     MMO = ST->getMemOperand();
12993     SrcOpnd = 1;
12994     // If the MMO suggests this isn't a store of a full vector, leave
12995     // things alone.  For a built-in, we have to make the change for
12996     // correctness, so if there is a size problem that will be a bug.
12997     if (MMO->getSize() < 16)
12998       return SDValue();
12999     break;
13000   }
13001   case ISD::INTRINSIC_VOID: {
13002     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13003     Chain = Intrin->getChain();
13004     // Intrin->getBasePtr() oddly does not get what we want.
13005     Base = Intrin->getOperand(3);
13006     MMO = Intrin->getMemOperand();
13007     SrcOpnd = 2;
13008     break;
13009   }
13010   }
13011 
13012   SDValue Src = N->getOperand(SrcOpnd);
13013   MVT VecTy = Src.getValueType().getSimpleVT();
13014 
13015   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13016   // aligned and the type is a vector with elements up to 4 bytes
13017   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
13018       && VecTy.getScalarSizeInBits() <= 32 ) {
13019     return SDValue();
13020   }
13021 
13022   // All stores are done as v2f64 and possible bit cast.
13023   if (VecTy != MVT::v2f64) {
13024     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13025     DCI.AddToWorklist(Src.getNode());
13026   }
13027 
13028   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13029                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13030   DCI.AddToWorklist(Swap.getNode());
13031   Chain = Swap.getValue(1);
13032   SDValue StoreOps[] = { Chain, Swap, Base };
13033   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
13034                                           DAG.getVTList(MVT::Other),
13035                                           StoreOps, VecTy, MMO);
13036   DCI.AddToWorklist(Store.getNode());
13037   return Store;
13038 }
13039 
13040 // Handle DAG combine for STORE (FP_TO_INT F).
13041 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13042                                                DAGCombinerInfo &DCI) const {
13043 
13044   SelectionDAG &DAG = DCI.DAG;
13045   SDLoc dl(N);
13046   unsigned Opcode = N->getOperand(1).getOpcode();
13047 
13048   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
13049          && "Not a FP_TO_INT Instruction!");
13050 
13051   SDValue Val = N->getOperand(1).getOperand(0);
13052   EVT Op1VT = N->getOperand(1).getValueType();
13053   EVT ResVT = Val.getValueType();
13054 
13055   // Floating point types smaller than 32 bits are not legal on Power.
13056   if (ResVT.getScalarSizeInBits() < 32)
13057     return SDValue();
13058 
13059   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13060   bool ValidTypeForStoreFltAsInt =
13061         (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
13062          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
13063 
13064   if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() ||
13065       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
13066     return SDValue();
13067 
13068   // Extend f32 values to f64
13069   if (ResVT.getScalarSizeInBits() == 32) {
13070     Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
13071     DCI.AddToWorklist(Val.getNode());
13072   }
13073 
13074   // Set signed or unsigned conversion opcode.
13075   unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
13076                           PPCISD::FP_TO_SINT_IN_VSR :
13077                           PPCISD::FP_TO_UINT_IN_VSR;
13078 
13079   Val = DAG.getNode(ConvOpcode,
13080                     dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
13081   DCI.AddToWorklist(Val.getNode());
13082 
13083   // Set number of bytes being converted.
13084   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
13085   SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
13086                     DAG.getIntPtrConstant(ByteSize, dl, false),
13087                     DAG.getValueType(Op1VT) };
13088 
13089   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
13090           DAG.getVTList(MVT::Other), Ops,
13091           cast<StoreSDNode>(N)->getMemoryVT(),
13092           cast<StoreSDNode>(N)->getMemOperand());
13093 
13094   DCI.AddToWorklist(Val.getNode());
13095   return Val;
13096 }
13097 
13098 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
13099                                              DAGCombinerInfo &DCI) const {
13100   SelectionDAG &DAG = DCI.DAG;
13101   SDLoc dl(N);
13102   switch (N->getOpcode()) {
13103   default: break;
13104   case ISD::ADD:
13105     return combineADD(N, DCI);
13106   case ISD::SHL:
13107     return combineSHL(N, DCI);
13108   case ISD::SRA:
13109     return combineSRA(N, DCI);
13110   case ISD::SRL:
13111     return combineSRL(N, DCI);
13112   case ISD::MUL:
13113     return combineMUL(N, DCI);
13114   case PPCISD::SHL:
13115     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
13116         return N->getOperand(0);
13117     break;
13118   case PPCISD::SRL:
13119     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
13120         return N->getOperand(0);
13121     break;
13122   case PPCISD::SRA:
13123     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
13124       if (C->isNullValue() ||   //  0 >>s V -> 0.
13125           C->isAllOnesValue())    // -1 >>s V -> -1.
13126         return N->getOperand(0);
13127     }
13128     break;
13129   case ISD::SIGN_EXTEND:
13130   case ISD::ZERO_EXTEND:
13131   case ISD::ANY_EXTEND:
13132     return DAGCombineExtBoolTrunc(N, DCI);
13133   case ISD::TRUNCATE:
13134     return combineTRUNCATE(N, DCI);
13135   case ISD::SETCC:
13136     if (SDValue CSCC = combineSetCC(N, DCI))
13137       return CSCC;
13138     LLVM_FALLTHROUGH;
13139   case ISD::SELECT_CC:
13140     return DAGCombineTruncBoolExt(N, DCI);
13141   case ISD::SINT_TO_FP:
13142   case ISD::UINT_TO_FP:
13143     return combineFPToIntToFP(N, DCI);
13144   case ISD::STORE: {
13145 
13146     EVT Op1VT = N->getOperand(1).getValueType();
13147     unsigned Opcode = N->getOperand(1).getOpcode();
13148 
13149     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
13150       SDValue Val= combineStoreFPToInt(N, DCI);
13151       if (Val)
13152         return Val;
13153     }
13154 
13155     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
13156     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
13157         N->getOperand(1).getNode()->hasOneUse() &&
13158         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
13159          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
13160 
13161       // STBRX can only handle simple types and it makes no sense to store less
13162       // two bytes in byte-reversed order.
13163       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
13164       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
13165         break;
13166 
13167       SDValue BSwapOp = N->getOperand(1).getOperand(0);
13168       // Do an any-extend to 32-bits if this is a half-word input.
13169       if (BSwapOp.getValueType() == MVT::i16)
13170         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
13171 
13172       // If the type of BSWAP operand is wider than stored memory width
13173       // it need to be shifted to the right side before STBRX.
13174       if (Op1VT.bitsGT(mVT)) {
13175         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
13176         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
13177                               DAG.getConstant(Shift, dl, MVT::i32));
13178         // Need to truncate if this is a bswap of i64 stored as i32/i16.
13179         if (Op1VT == MVT::i64)
13180           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
13181       }
13182 
13183       SDValue Ops[] = {
13184         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
13185       };
13186       return
13187         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
13188                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
13189                                 cast<StoreSDNode>(N)->getMemOperand());
13190     }
13191 
13192     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
13193     // So it can increase the chance of CSE constant construction.
13194     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
13195         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
13196       // Need to sign-extended to 64-bits to handle negative values.
13197       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
13198       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
13199                                     MemVT.getSizeInBits());
13200       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
13201 
13202       // DAG.getTruncStore() can't be used here because it doesn't accept
13203       // the general (base + offset) addressing mode.
13204       // So we use UpdateNodeOperands and setTruncatingStore instead.
13205       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
13206                              N->getOperand(3));
13207       cast<StoreSDNode>(N)->setTruncatingStore(true);
13208       return SDValue(N, 0);
13209     }
13210 
13211     // For little endian, VSX stores require generating xxswapd/lxvd2x.
13212     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13213     if (Op1VT.isSimple()) {
13214       MVT StoreVT = Op1VT.getSimpleVT();
13215       if (Subtarget.needsSwapsForVSXMemOps() &&
13216           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
13217            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
13218         return expandVSXStoreForLE(N, DCI);
13219     }
13220     break;
13221   }
13222   case ISD::LOAD: {
13223     LoadSDNode *LD = cast<LoadSDNode>(N);
13224     EVT VT = LD->getValueType(0);
13225 
13226     // For little endian, VSX loads require generating lxvd2x/xxswapd.
13227     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13228     if (VT.isSimple()) {
13229       MVT LoadVT = VT.getSimpleVT();
13230       if (Subtarget.needsSwapsForVSXMemOps() &&
13231           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
13232            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
13233         return expandVSXLoadForLE(N, DCI);
13234     }
13235 
13236     // We sometimes end up with a 64-bit integer load, from which we extract
13237     // two single-precision floating-point numbers. This happens with
13238     // std::complex<float>, and other similar structures, because of the way we
13239     // canonicalize structure copies. However, if we lack direct moves,
13240     // then the final bitcasts from the extracted integer values to the
13241     // floating-point numbers turn into store/load pairs. Even with direct moves,
13242     // just loading the two floating-point numbers is likely better.
13243     auto ReplaceTwoFloatLoad = [&]() {
13244       if (VT != MVT::i64)
13245         return false;
13246 
13247       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
13248           LD->isVolatile())
13249         return false;
13250 
13251       //  We're looking for a sequence like this:
13252       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
13253       //      t16: i64 = srl t13, Constant:i32<32>
13254       //    t17: i32 = truncate t16
13255       //  t18: f32 = bitcast t17
13256       //    t19: i32 = truncate t13
13257       //  t20: f32 = bitcast t19
13258 
13259       if (!LD->hasNUsesOfValue(2, 0))
13260         return false;
13261 
13262       auto UI = LD->use_begin();
13263       while (UI.getUse().getResNo() != 0) ++UI;
13264       SDNode *Trunc = *UI++;
13265       while (UI.getUse().getResNo() != 0) ++UI;
13266       SDNode *RightShift = *UI;
13267       if (Trunc->getOpcode() != ISD::TRUNCATE)
13268         std::swap(Trunc, RightShift);
13269 
13270       if (Trunc->getOpcode() != ISD::TRUNCATE ||
13271           Trunc->getValueType(0) != MVT::i32 ||
13272           !Trunc->hasOneUse())
13273         return false;
13274       if (RightShift->getOpcode() != ISD::SRL ||
13275           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
13276           RightShift->getConstantOperandVal(1) != 32 ||
13277           !RightShift->hasOneUse())
13278         return false;
13279 
13280       SDNode *Trunc2 = *RightShift->use_begin();
13281       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
13282           Trunc2->getValueType(0) != MVT::i32 ||
13283           !Trunc2->hasOneUse())
13284         return false;
13285 
13286       SDNode *Bitcast = *Trunc->use_begin();
13287       SDNode *Bitcast2 = *Trunc2->use_begin();
13288 
13289       if (Bitcast->getOpcode() != ISD::BITCAST ||
13290           Bitcast->getValueType(0) != MVT::f32)
13291         return false;
13292       if (Bitcast2->getOpcode() != ISD::BITCAST ||
13293           Bitcast2->getValueType(0) != MVT::f32)
13294         return false;
13295 
13296       if (Subtarget.isLittleEndian())
13297         std::swap(Bitcast, Bitcast2);
13298 
13299       // Bitcast has the second float (in memory-layout order) and Bitcast2
13300       // has the first one.
13301 
13302       SDValue BasePtr = LD->getBasePtr();
13303       if (LD->isIndexed()) {
13304         assert(LD->getAddressingMode() == ISD::PRE_INC &&
13305                "Non-pre-inc AM on PPC?");
13306         BasePtr =
13307           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
13308                       LD->getOffset());
13309       }
13310 
13311       auto MMOFlags =
13312           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
13313       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
13314                                       LD->getPointerInfo(), LD->getAlignment(),
13315                                       MMOFlags, LD->getAAInfo());
13316       SDValue AddPtr =
13317         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
13318                     BasePtr, DAG.getIntPtrConstant(4, dl));
13319       SDValue FloatLoad2 = DAG.getLoad(
13320           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
13321           LD->getPointerInfo().getWithOffset(4),
13322           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
13323 
13324       if (LD->isIndexed()) {
13325         // Note that DAGCombine should re-form any pre-increment load(s) from
13326         // what is produced here if that makes sense.
13327         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
13328       }
13329 
13330       DCI.CombineTo(Bitcast2, FloatLoad);
13331       DCI.CombineTo(Bitcast, FloatLoad2);
13332 
13333       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
13334                                     SDValue(FloatLoad2.getNode(), 1));
13335       return true;
13336     };
13337 
13338     if (ReplaceTwoFloatLoad())
13339       return SDValue(N, 0);
13340 
13341     EVT MemVT = LD->getMemoryVT();
13342     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
13343     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
13344     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
13345     unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
13346     if (LD->isUnindexed() && VT.isVector() &&
13347         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
13348           // P8 and later hardware should just use LOAD.
13349           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
13350                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
13351          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
13352           LD->getAlignment() >= ScalarABIAlignment)) &&
13353         LD->getAlignment() < ABIAlignment) {
13354       // This is a type-legal unaligned Altivec or QPX load.
13355       SDValue Chain = LD->getChain();
13356       SDValue Ptr = LD->getBasePtr();
13357       bool isLittleEndian = Subtarget.isLittleEndian();
13358 
13359       // This implements the loading of unaligned vectors as described in
13360       // the venerable Apple Velocity Engine overview. Specifically:
13361       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
13362       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
13363       //
13364       // The general idea is to expand a sequence of one or more unaligned
13365       // loads into an alignment-based permutation-control instruction (lvsl
13366       // or lvsr), a series of regular vector loads (which always truncate
13367       // their input address to an aligned address), and a series of
13368       // permutations.  The results of these permutations are the requested
13369       // loaded values.  The trick is that the last "extra" load is not taken
13370       // from the address you might suspect (sizeof(vector) bytes after the
13371       // last requested load), but rather sizeof(vector) - 1 bytes after the
13372       // last requested vector. The point of this is to avoid a page fault if
13373       // the base address happened to be aligned. This works because if the
13374       // base address is aligned, then adding less than a full vector length
13375       // will cause the last vector in the sequence to be (re)loaded.
13376       // Otherwise, the next vector will be fetched as you might suspect was
13377       // necessary.
13378 
13379       // We might be able to reuse the permutation generation from
13380       // a different base address offset from this one by an aligned amount.
13381       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
13382       // optimization later.
13383       Intrinsic::ID Intr, IntrLD, IntrPerm;
13384       MVT PermCntlTy, PermTy, LDTy;
13385       if (Subtarget.hasAltivec()) {
13386         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
13387                                  Intrinsic::ppc_altivec_lvsl;
13388         IntrLD = Intrinsic::ppc_altivec_lvx;
13389         IntrPerm = Intrinsic::ppc_altivec_vperm;
13390         PermCntlTy = MVT::v16i8;
13391         PermTy = MVT::v4i32;
13392         LDTy = MVT::v4i32;
13393       } else {
13394         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
13395                                        Intrinsic::ppc_qpx_qvlpcls;
13396         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
13397                                        Intrinsic::ppc_qpx_qvlfs;
13398         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
13399         PermCntlTy = MVT::v4f64;
13400         PermTy = MVT::v4f64;
13401         LDTy = MemVT.getSimpleVT();
13402       }
13403 
13404       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
13405 
13406       // Create the new MMO for the new base load. It is like the original MMO,
13407       // but represents an area in memory almost twice the vector size centered
13408       // on the original address. If the address is unaligned, we might start
13409       // reading up to (sizeof(vector)-1) bytes below the address of the
13410       // original unaligned load.
13411       MachineFunction &MF = DAG.getMachineFunction();
13412       MachineMemOperand *BaseMMO =
13413         MF.getMachineMemOperand(LD->getMemOperand(),
13414                                 -(long)MemVT.getStoreSize()+1,
13415                                 2*MemVT.getStoreSize()-1);
13416 
13417       // Create the new base load.
13418       SDValue LDXIntID =
13419           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
13420       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
13421       SDValue BaseLoad =
13422         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
13423                                 DAG.getVTList(PermTy, MVT::Other),
13424                                 BaseLoadOps, LDTy, BaseMMO);
13425 
13426       // Note that the value of IncOffset (which is provided to the next
13427       // load's pointer info offset value, and thus used to calculate the
13428       // alignment), and the value of IncValue (which is actually used to
13429       // increment the pointer value) are different! This is because we
13430       // require the next load to appear to be aligned, even though it
13431       // is actually offset from the base pointer by a lesser amount.
13432       int IncOffset = VT.getSizeInBits() / 8;
13433       int IncValue = IncOffset;
13434 
13435       // Walk (both up and down) the chain looking for another load at the real
13436       // (aligned) offset (the alignment of the other load does not matter in
13437       // this case). If found, then do not use the offset reduction trick, as
13438       // that will prevent the loads from being later combined (as they would
13439       // otherwise be duplicates).
13440       if (!findConsecutiveLoad(LD, DAG))
13441         --IncValue;
13442 
13443       SDValue Increment =
13444           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
13445       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
13446 
13447       MachineMemOperand *ExtraMMO =
13448         MF.getMachineMemOperand(LD->getMemOperand(),
13449                                 1, 2*MemVT.getStoreSize()-1);
13450       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
13451       SDValue ExtraLoad =
13452         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
13453                                 DAG.getVTList(PermTy, MVT::Other),
13454                                 ExtraLoadOps, LDTy, ExtraMMO);
13455 
13456       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
13457         BaseLoad.getValue(1), ExtraLoad.getValue(1));
13458 
13459       // Because vperm has a big-endian bias, we must reverse the order
13460       // of the input vectors and complement the permute control vector
13461       // when generating little endian code.  We have already handled the
13462       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
13463       // and ExtraLoad here.
13464       SDValue Perm;
13465       if (isLittleEndian)
13466         Perm = BuildIntrinsicOp(IntrPerm,
13467                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
13468       else
13469         Perm = BuildIntrinsicOp(IntrPerm,
13470                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
13471 
13472       if (VT != PermTy)
13473         Perm = Subtarget.hasAltivec() ?
13474                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
13475                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
13476                                DAG.getTargetConstant(1, dl, MVT::i64));
13477                                // second argument is 1 because this rounding
13478                                // is always exact.
13479 
13480       // The output of the permutation is our loaded result, the TokenFactor is
13481       // our new chain.
13482       DCI.CombineTo(N, Perm, TF);
13483       return SDValue(N, 0);
13484     }
13485     }
13486     break;
13487     case ISD::INTRINSIC_WO_CHAIN: {
13488       bool isLittleEndian = Subtarget.isLittleEndian();
13489       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
13490       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
13491                                            : Intrinsic::ppc_altivec_lvsl);
13492       if ((IID == Intr ||
13493            IID == Intrinsic::ppc_qpx_qvlpcld  ||
13494            IID == Intrinsic::ppc_qpx_qvlpcls) &&
13495         N->getOperand(1)->getOpcode() == ISD::ADD) {
13496         SDValue Add = N->getOperand(1);
13497 
13498         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
13499                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
13500 
13501         if (DAG.MaskedValueIsZero(Add->getOperand(1),
13502                                   APInt::getAllOnesValue(Bits /* alignment */)
13503                                       .zext(Add.getScalarValueSizeInBits()))) {
13504           SDNode *BasePtr = Add->getOperand(0).getNode();
13505           for (SDNode::use_iterator UI = BasePtr->use_begin(),
13506                                     UE = BasePtr->use_end();
13507                UI != UE; ++UI) {
13508             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13509                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
13510               // We've found another LVSL/LVSR, and this address is an aligned
13511               // multiple of that one. The results will be the same, so use the
13512               // one we've just found instead.
13513 
13514               return SDValue(*UI, 0);
13515             }
13516           }
13517         }
13518 
13519         if (isa<ConstantSDNode>(Add->getOperand(1))) {
13520           SDNode *BasePtr = Add->getOperand(0).getNode();
13521           for (SDNode::use_iterator UI = BasePtr->use_begin(),
13522                UE = BasePtr->use_end(); UI != UE; ++UI) {
13523             if (UI->getOpcode() == ISD::ADD &&
13524                 isa<ConstantSDNode>(UI->getOperand(1)) &&
13525                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
13526                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
13527                 (1ULL << Bits) == 0) {
13528               SDNode *OtherAdd = *UI;
13529               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
13530                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
13531                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13532                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
13533                   return SDValue(*VI, 0);
13534                 }
13535               }
13536             }
13537           }
13538         }
13539       }
13540 
13541       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
13542       // Expose the vabsduw/h/b opportunity for down stream
13543       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
13544           (IID == Intrinsic::ppc_altivec_vmaxsw ||
13545            IID == Intrinsic::ppc_altivec_vmaxsh ||
13546            IID == Intrinsic::ppc_altivec_vmaxsb)) {
13547         SDValue V1 = N->getOperand(1);
13548         SDValue V2 = N->getOperand(2);
13549         if ((V1.getSimpleValueType() == MVT::v4i32 ||
13550              V1.getSimpleValueType() == MVT::v8i16 ||
13551              V1.getSimpleValueType() == MVT::v16i8) &&
13552             V1.getSimpleValueType() == V2.getSimpleValueType()) {
13553           // (0-a, a)
13554           if (V1.getOpcode() == ISD::SUB &&
13555               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
13556               V1.getOperand(1) == V2) {
13557             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
13558           }
13559           // (a, 0-a)
13560           if (V2.getOpcode() == ISD::SUB &&
13561               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
13562               V2.getOperand(1) == V1) {
13563             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13564           }
13565           // (x-y, y-x)
13566           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
13567               V1.getOperand(0) == V2.getOperand(1) &&
13568               V1.getOperand(1) == V2.getOperand(0)) {
13569             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13570           }
13571         }
13572       }
13573     }
13574 
13575     break;
13576   case ISD::INTRINSIC_W_CHAIN:
13577     // For little endian, VSX loads require generating lxvd2x/xxswapd.
13578     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13579     if (Subtarget.needsSwapsForVSXMemOps()) {
13580       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13581       default:
13582         break;
13583       case Intrinsic::ppc_vsx_lxvw4x:
13584       case Intrinsic::ppc_vsx_lxvd2x:
13585         return expandVSXLoadForLE(N, DCI);
13586       }
13587     }
13588     break;
13589   case ISD::INTRINSIC_VOID:
13590     // For little endian, VSX stores require generating xxswapd/stxvd2x.
13591     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13592     if (Subtarget.needsSwapsForVSXMemOps()) {
13593       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13594       default:
13595         break;
13596       case Intrinsic::ppc_vsx_stxvw4x:
13597       case Intrinsic::ppc_vsx_stxvd2x:
13598         return expandVSXStoreForLE(N, DCI);
13599       }
13600     }
13601     break;
13602   case ISD::BSWAP:
13603     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
13604     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
13605         N->getOperand(0).hasOneUse() &&
13606         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
13607          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
13608           N->getValueType(0) == MVT::i64))) {
13609       SDValue Load = N->getOperand(0);
13610       LoadSDNode *LD = cast<LoadSDNode>(Load);
13611       // Create the byte-swapping load.
13612       SDValue Ops[] = {
13613         LD->getChain(),    // Chain
13614         LD->getBasePtr(),  // Ptr
13615         DAG.getValueType(N->getValueType(0)) // VT
13616       };
13617       SDValue BSLoad =
13618         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
13619                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
13620                                               MVT::i64 : MVT::i32, MVT::Other),
13621                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
13622 
13623       // If this is an i16 load, insert the truncate.
13624       SDValue ResVal = BSLoad;
13625       if (N->getValueType(0) == MVT::i16)
13626         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
13627 
13628       // First, combine the bswap away.  This makes the value produced by the
13629       // load dead.
13630       DCI.CombineTo(N, ResVal);
13631 
13632       // Next, combine the load away, we give it a bogus result value but a real
13633       // chain result.  The result value is dead because the bswap is dead.
13634       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
13635 
13636       // Return N so it doesn't get rechecked!
13637       return SDValue(N, 0);
13638     }
13639     break;
13640   case PPCISD::VCMP:
13641     // If a VCMPo node already exists with exactly the same operands as this
13642     // node, use its result instead of this node (VCMPo computes both a CR6 and
13643     // a normal output).
13644     //
13645     if (!N->getOperand(0).hasOneUse() &&
13646         !N->getOperand(1).hasOneUse() &&
13647         !N->getOperand(2).hasOneUse()) {
13648 
13649       // Scan all of the users of the LHS, looking for VCMPo's that match.
13650       SDNode *VCMPoNode = nullptr;
13651 
13652       SDNode *LHSN = N->getOperand(0).getNode();
13653       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
13654            UI != E; ++UI)
13655         if (UI->getOpcode() == PPCISD::VCMPo &&
13656             UI->getOperand(1) == N->getOperand(1) &&
13657             UI->getOperand(2) == N->getOperand(2) &&
13658             UI->getOperand(0) == N->getOperand(0)) {
13659           VCMPoNode = *UI;
13660           break;
13661         }
13662 
13663       // If there is no VCMPo node, or if the flag value has a single use, don't
13664       // transform this.
13665       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
13666         break;
13667 
13668       // Look at the (necessarily single) use of the flag value.  If it has a
13669       // chain, this transformation is more complex.  Note that multiple things
13670       // could use the value result, which we should ignore.
13671       SDNode *FlagUser = nullptr;
13672       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
13673            FlagUser == nullptr; ++UI) {
13674         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
13675         SDNode *User = *UI;
13676         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
13677           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
13678             FlagUser = User;
13679             break;
13680           }
13681         }
13682       }
13683 
13684       // If the user is a MFOCRF instruction, we know this is safe.
13685       // Otherwise we give up for right now.
13686       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
13687         return SDValue(VCMPoNode, 0);
13688     }
13689     break;
13690   case ISD::BRCOND: {
13691     SDValue Cond = N->getOperand(1);
13692     SDValue Target = N->getOperand(2);
13693 
13694     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13695         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
13696           Intrinsic::loop_decrement) {
13697 
13698       // We now need to make the intrinsic dead (it cannot be instruction
13699       // selected).
13700       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
13701       assert(Cond.getNode()->hasOneUse() &&
13702              "Counter decrement has more than one use");
13703 
13704       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
13705                          N->getOperand(0), Target);
13706     }
13707   }
13708   break;
13709   case ISD::BR_CC: {
13710     // If this is a branch on an altivec predicate comparison, lower this so
13711     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
13712     // lowering is done pre-legalize, because the legalizer lowers the predicate
13713     // compare down to code that is difficult to reassemble.
13714     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
13715     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
13716 
13717     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
13718     // value. If so, pass-through the AND to get to the intrinsic.
13719     if (LHS.getOpcode() == ISD::AND &&
13720         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13721         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
13722           Intrinsic::loop_decrement &&
13723         isa<ConstantSDNode>(LHS.getOperand(1)) &&
13724         !isNullConstant(LHS.getOperand(1)))
13725       LHS = LHS.getOperand(0);
13726 
13727     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13728         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
13729           Intrinsic::loop_decrement &&
13730         isa<ConstantSDNode>(RHS)) {
13731       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
13732              "Counter decrement comparison is not EQ or NE");
13733 
13734       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13735       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
13736                     (CC == ISD::SETNE && !Val);
13737 
13738       // We now need to make the intrinsic dead (it cannot be instruction
13739       // selected).
13740       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
13741       assert(LHS.getNode()->hasOneUse() &&
13742              "Counter decrement has more than one use");
13743 
13744       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
13745                          N->getOperand(0), N->getOperand(4));
13746     }
13747 
13748     int CompareOpc;
13749     bool isDot;
13750 
13751     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13752         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
13753         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
13754       assert(isDot && "Can't compare against a vector result!");
13755 
13756       // If this is a comparison against something other than 0/1, then we know
13757       // that the condition is never/always true.
13758       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13759       if (Val != 0 && Val != 1) {
13760         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
13761           return N->getOperand(0);
13762         // Always !=, turn it into an unconditional branch.
13763         return DAG.getNode(ISD::BR, dl, MVT::Other,
13764                            N->getOperand(0), N->getOperand(4));
13765       }
13766 
13767       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
13768 
13769       // Create the PPCISD altivec 'dot' comparison node.
13770       SDValue Ops[] = {
13771         LHS.getOperand(2),  // LHS of compare
13772         LHS.getOperand(3),  // RHS of compare
13773         DAG.getConstant(CompareOpc, dl, MVT::i32)
13774       };
13775       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
13776       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
13777 
13778       // Unpack the result based on how the target uses it.
13779       PPC::Predicate CompOpc;
13780       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
13781       default:  // Can't happen, don't crash on invalid number though.
13782       case 0:   // Branch on the value of the EQ bit of CR6.
13783         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
13784         break;
13785       case 1:   // Branch on the inverted value of the EQ bit of CR6.
13786         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
13787         break;
13788       case 2:   // Branch on the value of the LT bit of CR6.
13789         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
13790         break;
13791       case 3:   // Branch on the inverted value of the LT bit of CR6.
13792         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
13793         break;
13794       }
13795 
13796       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
13797                          DAG.getConstant(CompOpc, dl, MVT::i32),
13798                          DAG.getRegister(PPC::CR6, MVT::i32),
13799                          N->getOperand(4), CompNode.getValue(1));
13800     }
13801     break;
13802   }
13803   case ISD::BUILD_VECTOR:
13804     return DAGCombineBuildVector(N, DCI);
13805   case ISD::ABS:
13806     return combineABS(N, DCI);
13807   case ISD::VSELECT:
13808     return combineVSelect(N, DCI);
13809   }
13810 
13811   return SDValue();
13812 }
13813 
13814 SDValue
13815 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
13816                                  SelectionDAG &DAG,
13817                                  SmallVectorImpl<SDNode *> &Created) const {
13818   // fold (sdiv X, pow2)
13819   EVT VT = N->getValueType(0);
13820   if (VT == MVT::i64 && !Subtarget.isPPC64())
13821     return SDValue();
13822   if ((VT != MVT::i32 && VT != MVT::i64) ||
13823       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
13824     return SDValue();
13825 
13826   SDLoc DL(N);
13827   SDValue N0 = N->getOperand(0);
13828 
13829   bool IsNegPow2 = (-Divisor).isPowerOf2();
13830   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
13831   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
13832 
13833   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
13834   Created.push_back(Op.getNode());
13835 
13836   if (IsNegPow2) {
13837     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
13838     Created.push_back(Op.getNode());
13839   }
13840 
13841   return Op;
13842 }
13843 
13844 //===----------------------------------------------------------------------===//
13845 // Inline Assembly Support
13846 //===----------------------------------------------------------------------===//
13847 
13848 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
13849                                                       KnownBits &Known,
13850                                                       const APInt &DemandedElts,
13851                                                       const SelectionDAG &DAG,
13852                                                       unsigned Depth) const {
13853   Known.resetAll();
13854   switch (Op.getOpcode()) {
13855   default: break;
13856   case PPCISD::LBRX: {
13857     // lhbrx is known to have the top bits cleared out.
13858     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
13859       Known.Zero = 0xFFFF0000;
13860     break;
13861   }
13862   case ISD::INTRINSIC_WO_CHAIN: {
13863     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
13864     default: break;
13865     case Intrinsic::ppc_altivec_vcmpbfp_p:
13866     case Intrinsic::ppc_altivec_vcmpeqfp_p:
13867     case Intrinsic::ppc_altivec_vcmpequb_p:
13868     case Intrinsic::ppc_altivec_vcmpequh_p:
13869     case Intrinsic::ppc_altivec_vcmpequw_p:
13870     case Intrinsic::ppc_altivec_vcmpequd_p:
13871     case Intrinsic::ppc_altivec_vcmpgefp_p:
13872     case Intrinsic::ppc_altivec_vcmpgtfp_p:
13873     case Intrinsic::ppc_altivec_vcmpgtsb_p:
13874     case Intrinsic::ppc_altivec_vcmpgtsh_p:
13875     case Intrinsic::ppc_altivec_vcmpgtsw_p:
13876     case Intrinsic::ppc_altivec_vcmpgtsd_p:
13877     case Intrinsic::ppc_altivec_vcmpgtub_p:
13878     case Intrinsic::ppc_altivec_vcmpgtuh_p:
13879     case Intrinsic::ppc_altivec_vcmpgtuw_p:
13880     case Intrinsic::ppc_altivec_vcmpgtud_p:
13881       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
13882       break;
13883     }
13884   }
13885   }
13886 }
13887 
13888 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
13889   switch (Subtarget.getDarwinDirective()) {
13890   default: break;
13891   case PPC::DIR_970:
13892   case PPC::DIR_PWR4:
13893   case PPC::DIR_PWR5:
13894   case PPC::DIR_PWR5X:
13895   case PPC::DIR_PWR6:
13896   case PPC::DIR_PWR6X:
13897   case PPC::DIR_PWR7:
13898   case PPC::DIR_PWR8:
13899   case PPC::DIR_PWR9: {
13900     if (!ML)
13901       break;
13902 
13903     if (!DisableInnermostLoopAlign32) {
13904       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
13905       // so that we can decrease cache misses and branch-prediction misses.
13906       // Actual alignment of the loop will depend on the hotness check and other
13907       // logic in alignBlocks.
13908       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
13909         return 5;
13910     }
13911 
13912     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13913 
13914     // For small loops (between 5 and 8 instructions), align to a 32-byte
13915     // boundary so that the entire loop fits in one instruction-cache line.
13916     uint64_t LoopSize = 0;
13917     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
13918       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
13919         LoopSize += TII->getInstSizeInBytes(*J);
13920         if (LoopSize > 32)
13921           break;
13922       }
13923 
13924     if (LoopSize > 16 && LoopSize <= 32)
13925       return 5;
13926 
13927     break;
13928   }
13929   }
13930 
13931   return TargetLowering::getPrefLoopAlignment(ML);
13932 }
13933 
13934 /// getConstraintType - Given a constraint, return the type of
13935 /// constraint it is for this target.
13936 PPCTargetLowering::ConstraintType
13937 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
13938   if (Constraint.size() == 1) {
13939     switch (Constraint[0]) {
13940     default: break;
13941     case 'b':
13942     case 'r':
13943     case 'f':
13944     case 'd':
13945     case 'v':
13946     case 'y':
13947       return C_RegisterClass;
13948     case 'Z':
13949       // FIXME: While Z does indicate a memory constraint, it specifically
13950       // indicates an r+r address (used in conjunction with the 'y' modifier
13951       // in the replacement string). Currently, we're forcing the base
13952       // register to be r0 in the asm printer (which is interpreted as zero)
13953       // and forming the complete address in the second register. This is
13954       // suboptimal.
13955       return C_Memory;
13956     }
13957   } else if (Constraint == "wc") { // individual CR bits.
13958     return C_RegisterClass;
13959   } else if (Constraint == "wa" || Constraint == "wd" ||
13960              Constraint == "wf" || Constraint == "ws" ||
13961              Constraint == "wi") {
13962     return C_RegisterClass; // VSX registers.
13963   }
13964   return TargetLowering::getConstraintType(Constraint);
13965 }
13966 
13967 /// Examine constraint type and operand type and determine a weight value.
13968 /// This object must already have been set up with the operand type
13969 /// and the current alternative constraint selected.
13970 TargetLowering::ConstraintWeight
13971 PPCTargetLowering::getSingleConstraintMatchWeight(
13972     AsmOperandInfo &info, const char *constraint) const {
13973   ConstraintWeight weight = CW_Invalid;
13974   Value *CallOperandVal = info.CallOperandVal;
13975     // If we don't have a value, we can't do a match,
13976     // but allow it at the lowest weight.
13977   if (!CallOperandVal)
13978     return CW_Default;
13979   Type *type = CallOperandVal->getType();
13980 
13981   // Look at the constraint type.
13982   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
13983     return CW_Register; // an individual CR bit.
13984   else if ((StringRef(constraint) == "wa" ||
13985             StringRef(constraint) == "wd" ||
13986             StringRef(constraint) == "wf") &&
13987            type->isVectorTy())
13988     return CW_Register;
13989   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
13990     return CW_Register;
13991   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
13992     return CW_Register; // just hold 64-bit integers data.
13993 
13994   switch (*constraint) {
13995   default:
13996     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
13997     break;
13998   case 'b':
13999     if (type->isIntegerTy())
14000       weight = CW_Register;
14001     break;
14002   case 'f':
14003     if (type->isFloatTy())
14004       weight = CW_Register;
14005     break;
14006   case 'd':
14007     if (type->isDoubleTy())
14008       weight = CW_Register;
14009     break;
14010   case 'v':
14011     if (type->isVectorTy())
14012       weight = CW_Register;
14013     break;
14014   case 'y':
14015     weight = CW_Register;
14016     break;
14017   case 'Z':
14018     weight = CW_Memory;
14019     break;
14020   }
14021   return weight;
14022 }
14023 
14024 std::pair<unsigned, const TargetRegisterClass *>
14025 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
14026                                                 StringRef Constraint,
14027                                                 MVT VT) const {
14028   if (Constraint.size() == 1) {
14029     // GCC RS6000 Constraint Letters
14030     switch (Constraint[0]) {
14031     case 'b':   // R1-R31
14032       if (VT == MVT::i64 && Subtarget.isPPC64())
14033         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
14034       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
14035     case 'r':   // R0-R31
14036       if (VT == MVT::i64 && Subtarget.isPPC64())
14037         return std::make_pair(0U, &PPC::G8RCRegClass);
14038       return std::make_pair(0U, &PPC::GPRCRegClass);
14039     // 'd' and 'f' constraints are both defined to be "the floating point
14040     // registers", where one is for 32-bit and the other for 64-bit. We don't
14041     // really care overly much here so just give them all the same reg classes.
14042     case 'd':
14043     case 'f':
14044       if (Subtarget.hasSPE()) {
14045         if (VT == MVT::f32 || VT == MVT::i32)
14046           return std::make_pair(0U, &PPC::SPE4RCRegClass);
14047         if (VT == MVT::f64 || VT == MVT::i64)
14048           return std::make_pair(0U, &PPC::SPERCRegClass);
14049       } else {
14050         if (VT == MVT::f32 || VT == MVT::i32)
14051           return std::make_pair(0U, &PPC::F4RCRegClass);
14052         if (VT == MVT::f64 || VT == MVT::i64)
14053           return std::make_pair(0U, &PPC::F8RCRegClass);
14054         if (VT == MVT::v4f64 && Subtarget.hasQPX())
14055           return std::make_pair(0U, &PPC::QFRCRegClass);
14056         if (VT == MVT::v4f32 && Subtarget.hasQPX())
14057           return std::make_pair(0U, &PPC::QSRCRegClass);
14058       }
14059       break;
14060     case 'v':
14061       if (VT == MVT::v4f64 && Subtarget.hasQPX())
14062         return std::make_pair(0U, &PPC::QFRCRegClass);
14063       if (VT == MVT::v4f32 && Subtarget.hasQPX())
14064         return std::make_pair(0U, &PPC::QSRCRegClass);
14065       if (Subtarget.hasAltivec())
14066         return std::make_pair(0U, &PPC::VRRCRegClass);
14067       break;
14068     case 'y':   // crrc
14069       return std::make_pair(0U, &PPC::CRRCRegClass);
14070     }
14071   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
14072     // An individual CR bit.
14073     return std::make_pair(0U, &PPC::CRBITRCRegClass);
14074   } else if ((Constraint == "wa" || Constraint == "wd" ||
14075              Constraint == "wf" || Constraint == "wi") &&
14076              Subtarget.hasVSX()) {
14077     return std::make_pair(0U, &PPC::VSRCRegClass);
14078   } else if (Constraint == "ws" && Subtarget.hasVSX()) {
14079     if (VT == MVT::f32 && Subtarget.hasP8Vector())
14080       return std::make_pair(0U, &PPC::VSSRCRegClass);
14081     else
14082       return std::make_pair(0U, &PPC::VSFRCRegClass);
14083   }
14084 
14085   std::pair<unsigned, const TargetRegisterClass *> R =
14086       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
14087 
14088   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
14089   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
14090   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
14091   // register.
14092   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
14093   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
14094   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
14095       PPC::GPRCRegClass.contains(R.first))
14096     return std::make_pair(TRI->getMatchingSuperReg(R.first,
14097                             PPC::sub_32, &PPC::G8RCRegClass),
14098                           &PPC::G8RCRegClass);
14099 
14100   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
14101   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
14102     R.first = PPC::CR0;
14103     R.second = &PPC::CRRCRegClass;
14104   }
14105 
14106   return R;
14107 }
14108 
14109 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
14110 /// vector.  If it is invalid, don't add anything to Ops.
14111 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
14112                                                      std::string &Constraint,
14113                                                      std::vector<SDValue>&Ops,
14114                                                      SelectionDAG &DAG) const {
14115   SDValue Result;
14116 
14117   // Only support length 1 constraints.
14118   if (Constraint.length() > 1) return;
14119 
14120   char Letter = Constraint[0];
14121   switch (Letter) {
14122   default: break;
14123   case 'I':
14124   case 'J':
14125   case 'K':
14126   case 'L':
14127   case 'M':
14128   case 'N':
14129   case 'O':
14130   case 'P': {
14131     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
14132     if (!CST) return; // Must be an immediate to match.
14133     SDLoc dl(Op);
14134     int64_t Value = CST->getSExtValue();
14135     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
14136                          // numbers are printed as such.
14137     switch (Letter) {
14138     default: llvm_unreachable("Unknown constraint letter!");
14139     case 'I':  // "I" is a signed 16-bit constant.
14140       if (isInt<16>(Value))
14141         Result = DAG.getTargetConstant(Value, dl, TCVT);
14142       break;
14143     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
14144       if (isShiftedUInt<16, 16>(Value))
14145         Result = DAG.getTargetConstant(Value, dl, TCVT);
14146       break;
14147     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
14148       if (isShiftedInt<16, 16>(Value))
14149         Result = DAG.getTargetConstant(Value, dl, TCVT);
14150       break;
14151     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
14152       if (isUInt<16>(Value))
14153         Result = DAG.getTargetConstant(Value, dl, TCVT);
14154       break;
14155     case 'M':  // "M" is a constant that is greater than 31.
14156       if (Value > 31)
14157         Result = DAG.getTargetConstant(Value, dl, TCVT);
14158       break;
14159     case 'N':  // "N" is a positive constant that is an exact power of two.
14160       if (Value > 0 && isPowerOf2_64(Value))
14161         Result = DAG.getTargetConstant(Value, dl, TCVT);
14162       break;
14163     case 'O':  // "O" is the constant zero.
14164       if (Value == 0)
14165         Result = DAG.getTargetConstant(Value, dl, TCVT);
14166       break;
14167     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
14168       if (isInt<16>(-Value))
14169         Result = DAG.getTargetConstant(Value, dl, TCVT);
14170       break;
14171     }
14172     break;
14173   }
14174   }
14175 
14176   if (Result.getNode()) {
14177     Ops.push_back(Result);
14178     return;
14179   }
14180 
14181   // Handle standard constraint letters.
14182   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
14183 }
14184 
14185 // isLegalAddressingMode - Return true if the addressing mode represented
14186 // by AM is legal for this target, for a load/store of the specified type.
14187 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
14188                                               const AddrMode &AM, Type *Ty,
14189                                               unsigned AS, Instruction *I) const {
14190   // PPC does not allow r+i addressing modes for vectors!
14191   if (Ty->isVectorTy() && AM.BaseOffs != 0)
14192     return false;
14193 
14194   // PPC allows a sign-extended 16-bit immediate field.
14195   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
14196     return false;
14197 
14198   // No global is ever allowed as a base.
14199   if (AM.BaseGV)
14200     return false;
14201 
14202   // PPC only support r+r,
14203   switch (AM.Scale) {
14204   case 0:  // "r+i" or just "i", depending on HasBaseReg.
14205     break;
14206   case 1:
14207     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
14208       return false;
14209     // Otherwise we have r+r or r+i.
14210     break;
14211   case 2:
14212     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
14213       return false;
14214     // Allow 2*r as r+r.
14215     break;
14216   default:
14217     // No other scales are supported.
14218     return false;
14219   }
14220 
14221   return true;
14222 }
14223 
14224 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
14225                                            SelectionDAG &DAG) const {
14226   MachineFunction &MF = DAG.getMachineFunction();
14227   MachineFrameInfo &MFI = MF.getFrameInfo();
14228   MFI.setReturnAddressIsTaken(true);
14229 
14230   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
14231     return SDValue();
14232 
14233   SDLoc dl(Op);
14234   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
14235 
14236   // Make sure the function does not optimize away the store of the RA to
14237   // the stack.
14238   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
14239   FuncInfo->setLRStoreRequired();
14240   bool isPPC64 = Subtarget.isPPC64();
14241   auto PtrVT = getPointerTy(MF.getDataLayout());
14242 
14243   if (Depth > 0) {
14244     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
14245     SDValue Offset =
14246         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
14247                         isPPC64 ? MVT::i64 : MVT::i32);
14248     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
14249                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
14250                        MachinePointerInfo());
14251   }
14252 
14253   // Just load the return address off the stack.
14254   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
14255   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
14256                      MachinePointerInfo());
14257 }
14258 
14259 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
14260                                           SelectionDAG &DAG) const {
14261   SDLoc dl(Op);
14262   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
14263 
14264   MachineFunction &MF = DAG.getMachineFunction();
14265   MachineFrameInfo &MFI = MF.getFrameInfo();
14266   MFI.setFrameAddressIsTaken(true);
14267 
14268   EVT PtrVT = getPointerTy(MF.getDataLayout());
14269   bool isPPC64 = PtrVT == MVT::i64;
14270 
14271   // Naked functions never have a frame pointer, and so we use r1. For all
14272   // other functions, this decision must be delayed until during PEI.
14273   unsigned FrameReg;
14274   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
14275     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
14276   else
14277     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
14278 
14279   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
14280                                          PtrVT);
14281   while (Depth--)
14282     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
14283                             FrameAddr, MachinePointerInfo());
14284   return FrameAddr;
14285 }
14286 
14287 // FIXME? Maybe this could be a TableGen attribute on some registers and
14288 // this table could be generated automatically from RegInfo.
14289 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
14290                                               SelectionDAG &DAG) const {
14291   bool isPPC64 = Subtarget.isPPC64();
14292   bool isDarwinABI = Subtarget.isDarwinABI();
14293 
14294   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
14295       (!isPPC64 && VT != MVT::i32))
14296     report_fatal_error("Invalid register global variable type");
14297 
14298   bool is64Bit = isPPC64 && VT == MVT::i64;
14299   unsigned Reg = StringSwitch<unsigned>(RegName)
14300                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
14301                    .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
14302                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
14303                                   (is64Bit ? PPC::X13 : PPC::R13))
14304                    .Default(0);
14305 
14306   if (Reg)
14307     return Reg;
14308   report_fatal_error("Invalid register name global variable");
14309 }
14310 
14311 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
14312   // 32-bit SVR4 ABI access everything as got-indirect.
14313   if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
14314     return true;
14315 
14316   CodeModel::Model CModel = getTargetMachine().getCodeModel();
14317   // If it is small or large code model, module locals are accessed
14318   // indirectly by loading their address from .toc/.got. The difference
14319   // is that for large code model we have ADDISTocHa + LDtocL and for
14320   // small code model we simply have LDtoc.
14321   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
14322     return true;
14323 
14324   // JumpTable and BlockAddress are accessed as got-indirect.
14325   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
14326     return true;
14327 
14328   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
14329     const GlobalValue *GV = G->getGlobal();
14330     unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
14331     // The NLP flag indicates that a global access has to use an
14332     // extra indirection.
14333     if (GVFlags & PPCII::MO_NLP_FLAG)
14334       return true;
14335   }
14336 
14337   return false;
14338 }
14339 
14340 bool
14341 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
14342   // The PowerPC target isn't yet aware of offsets.
14343   return false;
14344 }
14345 
14346 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
14347                                            const CallInst &I,
14348                                            MachineFunction &MF,
14349                                            unsigned Intrinsic) const {
14350   switch (Intrinsic) {
14351   case Intrinsic::ppc_qpx_qvlfd:
14352   case Intrinsic::ppc_qpx_qvlfs:
14353   case Intrinsic::ppc_qpx_qvlfcd:
14354   case Intrinsic::ppc_qpx_qvlfcs:
14355   case Intrinsic::ppc_qpx_qvlfiwa:
14356   case Intrinsic::ppc_qpx_qvlfiwz:
14357   case Intrinsic::ppc_altivec_lvx:
14358   case Intrinsic::ppc_altivec_lvxl:
14359   case Intrinsic::ppc_altivec_lvebx:
14360   case Intrinsic::ppc_altivec_lvehx:
14361   case Intrinsic::ppc_altivec_lvewx:
14362   case Intrinsic::ppc_vsx_lxvd2x:
14363   case Intrinsic::ppc_vsx_lxvw4x: {
14364     EVT VT;
14365     switch (Intrinsic) {
14366     case Intrinsic::ppc_altivec_lvebx:
14367       VT = MVT::i8;
14368       break;
14369     case Intrinsic::ppc_altivec_lvehx:
14370       VT = MVT::i16;
14371       break;
14372     case Intrinsic::ppc_altivec_lvewx:
14373       VT = MVT::i32;
14374       break;
14375     case Intrinsic::ppc_vsx_lxvd2x:
14376       VT = MVT::v2f64;
14377       break;
14378     case Intrinsic::ppc_qpx_qvlfd:
14379       VT = MVT::v4f64;
14380       break;
14381     case Intrinsic::ppc_qpx_qvlfs:
14382       VT = MVT::v4f32;
14383       break;
14384     case Intrinsic::ppc_qpx_qvlfcd:
14385       VT = MVT::v2f64;
14386       break;
14387     case Intrinsic::ppc_qpx_qvlfcs:
14388       VT = MVT::v2f32;
14389       break;
14390     default:
14391       VT = MVT::v4i32;
14392       break;
14393     }
14394 
14395     Info.opc = ISD::INTRINSIC_W_CHAIN;
14396     Info.memVT = VT;
14397     Info.ptrVal = I.getArgOperand(0);
14398     Info.offset = -VT.getStoreSize()+1;
14399     Info.size = 2*VT.getStoreSize()-1;
14400     Info.align = 1;
14401     Info.flags = MachineMemOperand::MOLoad;
14402     return true;
14403   }
14404   case Intrinsic::ppc_qpx_qvlfda:
14405   case Intrinsic::ppc_qpx_qvlfsa:
14406   case Intrinsic::ppc_qpx_qvlfcda:
14407   case Intrinsic::ppc_qpx_qvlfcsa:
14408   case Intrinsic::ppc_qpx_qvlfiwaa:
14409   case Intrinsic::ppc_qpx_qvlfiwza: {
14410     EVT VT;
14411     switch (Intrinsic) {
14412     case Intrinsic::ppc_qpx_qvlfda:
14413       VT = MVT::v4f64;
14414       break;
14415     case Intrinsic::ppc_qpx_qvlfsa:
14416       VT = MVT::v4f32;
14417       break;
14418     case Intrinsic::ppc_qpx_qvlfcda:
14419       VT = MVT::v2f64;
14420       break;
14421     case Intrinsic::ppc_qpx_qvlfcsa:
14422       VT = MVT::v2f32;
14423       break;
14424     default:
14425       VT = MVT::v4i32;
14426       break;
14427     }
14428 
14429     Info.opc = ISD::INTRINSIC_W_CHAIN;
14430     Info.memVT = VT;
14431     Info.ptrVal = I.getArgOperand(0);
14432     Info.offset = 0;
14433     Info.size = VT.getStoreSize();
14434     Info.align = 1;
14435     Info.flags = MachineMemOperand::MOLoad;
14436     return true;
14437   }
14438   case Intrinsic::ppc_qpx_qvstfd:
14439   case Intrinsic::ppc_qpx_qvstfs:
14440   case Intrinsic::ppc_qpx_qvstfcd:
14441   case Intrinsic::ppc_qpx_qvstfcs:
14442   case Intrinsic::ppc_qpx_qvstfiw:
14443   case Intrinsic::ppc_altivec_stvx:
14444   case Intrinsic::ppc_altivec_stvxl:
14445   case Intrinsic::ppc_altivec_stvebx:
14446   case Intrinsic::ppc_altivec_stvehx:
14447   case Intrinsic::ppc_altivec_stvewx:
14448   case Intrinsic::ppc_vsx_stxvd2x:
14449   case Intrinsic::ppc_vsx_stxvw4x: {
14450     EVT VT;
14451     switch (Intrinsic) {
14452     case Intrinsic::ppc_altivec_stvebx:
14453       VT = MVT::i8;
14454       break;
14455     case Intrinsic::ppc_altivec_stvehx:
14456       VT = MVT::i16;
14457       break;
14458     case Intrinsic::ppc_altivec_stvewx:
14459       VT = MVT::i32;
14460       break;
14461     case Intrinsic::ppc_vsx_stxvd2x:
14462       VT = MVT::v2f64;
14463       break;
14464     case Intrinsic::ppc_qpx_qvstfd:
14465       VT = MVT::v4f64;
14466       break;
14467     case Intrinsic::ppc_qpx_qvstfs:
14468       VT = MVT::v4f32;
14469       break;
14470     case Intrinsic::ppc_qpx_qvstfcd:
14471       VT = MVT::v2f64;
14472       break;
14473     case Intrinsic::ppc_qpx_qvstfcs:
14474       VT = MVT::v2f32;
14475       break;
14476     default:
14477       VT = MVT::v4i32;
14478       break;
14479     }
14480 
14481     Info.opc = ISD::INTRINSIC_VOID;
14482     Info.memVT = VT;
14483     Info.ptrVal = I.getArgOperand(1);
14484     Info.offset = -VT.getStoreSize()+1;
14485     Info.size = 2*VT.getStoreSize()-1;
14486     Info.align = 1;
14487     Info.flags = MachineMemOperand::MOStore;
14488     return true;
14489   }
14490   case Intrinsic::ppc_qpx_qvstfda:
14491   case Intrinsic::ppc_qpx_qvstfsa:
14492   case Intrinsic::ppc_qpx_qvstfcda:
14493   case Intrinsic::ppc_qpx_qvstfcsa:
14494   case Intrinsic::ppc_qpx_qvstfiwa: {
14495     EVT VT;
14496     switch (Intrinsic) {
14497     case Intrinsic::ppc_qpx_qvstfda:
14498       VT = MVT::v4f64;
14499       break;
14500     case Intrinsic::ppc_qpx_qvstfsa:
14501       VT = MVT::v4f32;
14502       break;
14503     case Intrinsic::ppc_qpx_qvstfcda:
14504       VT = MVT::v2f64;
14505       break;
14506     case Intrinsic::ppc_qpx_qvstfcsa:
14507       VT = MVT::v2f32;
14508       break;
14509     default:
14510       VT = MVT::v4i32;
14511       break;
14512     }
14513 
14514     Info.opc = ISD::INTRINSIC_VOID;
14515     Info.memVT = VT;
14516     Info.ptrVal = I.getArgOperand(1);
14517     Info.offset = 0;
14518     Info.size = VT.getStoreSize();
14519     Info.align = 1;
14520     Info.flags = MachineMemOperand::MOStore;
14521     return true;
14522   }
14523   default:
14524     break;
14525   }
14526 
14527   return false;
14528 }
14529 
14530 /// getOptimalMemOpType - Returns the target specific optimal type for load
14531 /// and store operations as a result of memset, memcpy, and memmove
14532 /// lowering. If DstAlign is zero that means it's safe to destination
14533 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
14534 /// means there isn't a need to check it against alignment requirement,
14535 /// probably because the source does not need to be loaded. If 'IsMemset' is
14536 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
14537 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
14538 /// source is constant so it does not need to be loaded.
14539 /// It returns EVT::Other if the type should be determined using generic
14540 /// target-independent logic.
14541 EVT PPCTargetLowering::getOptimalMemOpType(
14542     uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
14543     bool ZeroMemset, bool MemcpyStrSrc,
14544     const AttributeList &FuncAttributes) const {
14545   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
14546     // When expanding a memset, require at least two QPX instructions to cover
14547     // the cost of loading the value to be stored from the constant pool.
14548     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
14549        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
14550         !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
14551       return MVT::v4f64;
14552     }
14553 
14554     // We should use Altivec/VSX loads and stores when available. For unaligned
14555     // addresses, unaligned VSX loads are only fast starting with the P8.
14556     if (Subtarget.hasAltivec() && Size >= 16 &&
14557         (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
14558          ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
14559       return MVT::v4i32;
14560   }
14561 
14562   if (Subtarget.isPPC64()) {
14563     return MVT::i64;
14564   }
14565 
14566   return MVT::i32;
14567 }
14568 
14569 /// Returns true if it is beneficial to convert a load of a constant
14570 /// to just the constant itself.
14571 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
14572                                                           Type *Ty) const {
14573   assert(Ty->isIntegerTy());
14574 
14575   unsigned BitSize = Ty->getPrimitiveSizeInBits();
14576   return !(BitSize == 0 || BitSize > 64);
14577 }
14578 
14579 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
14580   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
14581     return false;
14582   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
14583   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
14584   return NumBits1 == 64 && NumBits2 == 32;
14585 }
14586 
14587 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
14588   if (!VT1.isInteger() || !VT2.isInteger())
14589     return false;
14590   unsigned NumBits1 = VT1.getSizeInBits();
14591   unsigned NumBits2 = VT2.getSizeInBits();
14592   return NumBits1 == 64 && NumBits2 == 32;
14593 }
14594 
14595 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
14596   // Generally speaking, zexts are not free, but they are free when they can be
14597   // folded with other operations.
14598   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
14599     EVT MemVT = LD->getMemoryVT();
14600     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
14601          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
14602         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
14603          LD->getExtensionType() == ISD::ZEXTLOAD))
14604       return true;
14605   }
14606 
14607   // FIXME: Add other cases...
14608   //  - 32-bit shifts with a zext to i64
14609   //  - zext after ctlz, bswap, etc.
14610   //  - zext after and by a constant mask
14611 
14612   return TargetLowering::isZExtFree(Val, VT2);
14613 }
14614 
14615 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
14616   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
14617          "invalid fpext types");
14618   // Extending to float128 is not free.
14619   if (DestVT == MVT::f128)
14620     return false;
14621   return true;
14622 }
14623 
14624 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
14625   return isInt<16>(Imm) || isUInt<16>(Imm);
14626 }
14627 
14628 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
14629   return isInt<16>(Imm) || isUInt<16>(Imm);
14630 }
14631 
14632 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
14633                                                        unsigned,
14634                                                        unsigned,
14635                                                        MachineMemOperand::Flags,
14636                                                        bool *Fast) const {
14637   if (DisablePPCUnaligned)
14638     return false;
14639 
14640   // PowerPC supports unaligned memory access for simple non-vector types.
14641   // Although accessing unaligned addresses is not as efficient as accessing
14642   // aligned addresses, it is generally more efficient than manual expansion,
14643   // and generally only traps for software emulation when crossing page
14644   // boundaries.
14645 
14646   if (!VT.isSimple())
14647     return false;
14648 
14649   if (VT.getSimpleVT().isVector()) {
14650     if (Subtarget.hasVSX()) {
14651       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
14652           VT != MVT::v4f32 && VT != MVT::v4i32)
14653         return false;
14654     } else {
14655       return false;
14656     }
14657   }
14658 
14659   if (VT == MVT::ppcf128)
14660     return false;
14661 
14662   if (Fast)
14663     *Fast = true;
14664 
14665   return true;
14666 }
14667 
14668 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
14669   VT = VT.getScalarType();
14670 
14671   if (!VT.isSimple())
14672     return false;
14673 
14674   switch (VT.getSimpleVT().SimpleTy) {
14675   case MVT::f32:
14676   case MVT::f64:
14677     return true;
14678   case MVT::f128:
14679     return (EnableQuadPrecision && Subtarget.hasP9Vector());
14680   default:
14681     break;
14682   }
14683 
14684   return false;
14685 }
14686 
14687 const MCPhysReg *
14688 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
14689   // LR is a callee-save register, but we must treat it as clobbered by any call
14690   // site. Hence we include LR in the scratch registers, which are in turn added
14691   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
14692   // to CTR, which is used by any indirect call.
14693   static const MCPhysReg ScratchRegs[] = {
14694     PPC::X12, PPC::LR8, PPC::CTR8, 0
14695   };
14696 
14697   return ScratchRegs;
14698 }
14699 
14700 unsigned PPCTargetLowering::getExceptionPointerRegister(
14701     const Constant *PersonalityFn) const {
14702   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
14703 }
14704 
14705 unsigned PPCTargetLowering::getExceptionSelectorRegister(
14706     const Constant *PersonalityFn) const {
14707   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
14708 }
14709 
14710 bool
14711 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
14712                      EVT VT , unsigned DefinedValues) const {
14713   if (VT == MVT::v2i64)
14714     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
14715 
14716   if (Subtarget.hasVSX() || Subtarget.hasQPX())
14717     return true;
14718 
14719   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
14720 }
14721 
14722 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
14723   if (DisableILPPref || Subtarget.enableMachineScheduler())
14724     return TargetLowering::getSchedulingPreference(N);
14725 
14726   return Sched::ILP;
14727 }
14728 
14729 // Create a fast isel object.
14730 FastISel *
14731 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
14732                                   const TargetLibraryInfo *LibInfo) const {
14733   return PPC::createFastISel(FuncInfo, LibInfo);
14734 }
14735 
14736 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
14737   if (Subtarget.isDarwinABI()) return;
14738   if (!Subtarget.isPPC64()) return;
14739 
14740   // Update IsSplitCSR in PPCFunctionInfo
14741   PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
14742   PFI->setIsSplitCSR(true);
14743 }
14744 
14745 void PPCTargetLowering::insertCopiesSplitCSR(
14746   MachineBasicBlock *Entry,
14747   const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
14748   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
14749   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
14750   if (!IStart)
14751     return;
14752 
14753   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
14754   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
14755   MachineBasicBlock::iterator MBBI = Entry->begin();
14756   for (const MCPhysReg *I = IStart; *I; ++I) {
14757     const TargetRegisterClass *RC = nullptr;
14758     if (PPC::G8RCRegClass.contains(*I))
14759       RC = &PPC::G8RCRegClass;
14760     else if (PPC::F8RCRegClass.contains(*I))
14761       RC = &PPC::F8RCRegClass;
14762     else if (PPC::CRRCRegClass.contains(*I))
14763       RC = &PPC::CRRCRegClass;
14764     else if (PPC::VRRCRegClass.contains(*I))
14765       RC = &PPC::VRRCRegClass;
14766     else
14767       llvm_unreachable("Unexpected register class in CSRsViaCopy!");
14768 
14769     unsigned NewVR = MRI->createVirtualRegister(RC);
14770     // Create copy from CSR to a virtual register.
14771     // FIXME: this currently does not emit CFI pseudo-instructions, it works
14772     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
14773     // nounwind. If we want to generalize this later, we may need to emit
14774     // CFI pseudo-instructions.
14775     assert(Entry->getParent()->getFunction().hasFnAttribute(
14776              Attribute::NoUnwind) &&
14777            "Function should be nounwind in insertCopiesSplitCSR!");
14778     Entry->addLiveIn(*I);
14779     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
14780       .addReg(*I);
14781 
14782     // Insert the copy-back instructions right before the terminator.
14783     for (auto *Exit : Exits)
14784       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
14785               TII->get(TargetOpcode::COPY), *I)
14786         .addReg(NewVR);
14787   }
14788 }
14789 
14790 // Override to enable LOAD_STACK_GUARD lowering on Linux.
14791 bool PPCTargetLowering::useLoadStackGuardNode() const {
14792   if (!Subtarget.isTargetLinux())
14793     return TargetLowering::useLoadStackGuardNode();
14794   return true;
14795 }
14796 
14797 // Override to disable global variable loading on Linux.
14798 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
14799   if (!Subtarget.isTargetLinux())
14800     return TargetLowering::insertSSPDeclarations(M);
14801 }
14802 
14803 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
14804                                      bool ForCodeSize) const {
14805   if (!VT.isSimple() || !Subtarget.hasVSX())
14806     return false;
14807 
14808   switch(VT.getSimpleVT().SimpleTy) {
14809   default:
14810     // For FP types that are currently not supported by PPC backend, return
14811     // false. Examples: f16, f80.
14812     return false;
14813   case MVT::f32:
14814   case MVT::f64:
14815   case MVT::ppcf128:
14816     return Imm.isPosZero();
14817   }
14818 }
14819 
14820 // For vector shift operation op, fold
14821 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
14822 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
14823                                   SelectionDAG &DAG) {
14824   SDValue N0 = N->getOperand(0);
14825   SDValue N1 = N->getOperand(1);
14826   EVT VT = N0.getValueType();
14827   unsigned OpSizeInBits = VT.getScalarSizeInBits();
14828   unsigned Opcode = N->getOpcode();
14829   unsigned TargetOpcode;
14830 
14831   switch (Opcode) {
14832   default:
14833     llvm_unreachable("Unexpected shift operation");
14834   case ISD::SHL:
14835     TargetOpcode = PPCISD::SHL;
14836     break;
14837   case ISD::SRL:
14838     TargetOpcode = PPCISD::SRL;
14839     break;
14840   case ISD::SRA:
14841     TargetOpcode = PPCISD::SRA;
14842     break;
14843   }
14844 
14845   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
14846       N1->getOpcode() == ISD::AND)
14847     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
14848       if (Mask->getZExtValue() == OpSizeInBits - 1)
14849         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
14850 
14851   return SDValue();
14852 }
14853 
14854 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
14855   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14856     return Value;
14857 
14858   SDValue N0 = N->getOperand(0);
14859   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
14860   if (!Subtarget.isISA3_0() ||
14861       N0.getOpcode() != ISD::SIGN_EXTEND ||
14862       N0.getOperand(0).getValueType() != MVT::i32 ||
14863       CN1 == nullptr || N->getValueType(0) != MVT::i64)
14864     return SDValue();
14865 
14866   // We can't save an operation here if the value is already extended, and
14867   // the existing shift is easier to combine.
14868   SDValue ExtsSrc = N0.getOperand(0);
14869   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
14870       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
14871     return SDValue();
14872 
14873   SDLoc DL(N0);
14874   SDValue ShiftBy = SDValue(CN1, 0);
14875   // We want the shift amount to be i32 on the extswli, but the shift could
14876   // have an i64.
14877   if (ShiftBy.getValueType() == MVT::i64)
14878     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
14879 
14880   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
14881                          ShiftBy);
14882 }
14883 
14884 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
14885   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14886     return Value;
14887 
14888   return SDValue();
14889 }
14890 
14891 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
14892   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14893     return Value;
14894 
14895   return SDValue();
14896 }
14897 
14898 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
14899 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
14900 // When C is zero, the equation (addi Z, -C) can be simplified to Z
14901 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
14902 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
14903                                  const PPCSubtarget &Subtarget) {
14904   if (!Subtarget.isPPC64())
14905     return SDValue();
14906 
14907   SDValue LHS = N->getOperand(0);
14908   SDValue RHS = N->getOperand(1);
14909 
14910   auto isZextOfCompareWithConstant = [](SDValue Op) {
14911     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
14912         Op.getValueType() != MVT::i64)
14913       return false;
14914 
14915     SDValue Cmp = Op.getOperand(0);
14916     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
14917         Cmp.getOperand(0).getValueType() != MVT::i64)
14918       return false;
14919 
14920     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
14921       int64_t NegConstant = 0 - Constant->getSExtValue();
14922       // Due to the limitations of the addi instruction,
14923       // -C is required to be [-32768, 32767].
14924       return isInt<16>(NegConstant);
14925     }
14926 
14927     return false;
14928   };
14929 
14930   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
14931   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
14932 
14933   // If there is a pattern, canonicalize a zext operand to the RHS.
14934   if (LHSHasPattern && !RHSHasPattern)
14935     std::swap(LHS, RHS);
14936   else if (!LHSHasPattern && !RHSHasPattern)
14937     return SDValue();
14938 
14939   SDLoc DL(N);
14940   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
14941   SDValue Cmp = RHS.getOperand(0);
14942   SDValue Z = Cmp.getOperand(0);
14943   auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
14944 
14945   assert(Constant && "Constant Should not be a null pointer.");
14946   int64_t NegConstant = 0 - Constant->getSExtValue();
14947 
14948   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
14949   default: break;
14950   case ISD::SETNE: {
14951     //                                 when C == 0
14952     //                             --> addze X, (addic Z, -1).carry
14953     //                            /
14954     // add X, (zext(setne Z, C))--
14955     //                            \    when -32768 <= -C <= 32767 && C != 0
14956     //                             --> addze X, (addic (addi Z, -C), -1).carry
14957     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
14958                               DAG.getConstant(NegConstant, DL, MVT::i64));
14959     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
14960     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
14961                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
14962     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
14963                        SDValue(Addc.getNode(), 1));
14964     }
14965   case ISD::SETEQ: {
14966     //                                 when C == 0
14967     //                             --> addze X, (subfic Z, 0).carry
14968     //                            /
14969     // add X, (zext(sete  Z, C))--
14970     //                            \    when -32768 <= -C <= 32767 && C != 0
14971     //                             --> addze X, (subfic (addi Z, -C), 0).carry
14972     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
14973                               DAG.getConstant(NegConstant, DL, MVT::i64));
14974     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
14975     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
14976                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
14977     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
14978                        SDValue(Subc.getNode(), 1));
14979     }
14980   }
14981 
14982   return SDValue();
14983 }
14984 
14985 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
14986   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
14987     return Value;
14988 
14989   return SDValue();
14990 }
14991 
14992 // Detect TRUNCATE operations on bitcasts of float128 values.
14993 // What we are looking for here is the situtation where we extract a subset
14994 // of bits from a 128 bit float.
14995 // This can be of two forms:
14996 // 1) BITCAST of f128 feeding TRUNCATE
14997 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
14998 // The reason this is required is because we do not have a legal i128 type
14999 // and so we want to prevent having to store the f128 and then reload part
15000 // of it.
15001 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
15002                                            DAGCombinerInfo &DCI) const {
15003   // If we are using CRBits then try that first.
15004   if (Subtarget.useCRBits()) {
15005     // Check if CRBits did anything and return that if it did.
15006     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
15007       return CRTruncValue;
15008   }
15009 
15010   SDLoc dl(N);
15011   SDValue Op0 = N->getOperand(0);
15012 
15013   // Looking for a truncate of i128 to i64.
15014   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
15015     return SDValue();
15016 
15017   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
15018 
15019   // SRL feeding TRUNCATE.
15020   if (Op0.getOpcode() == ISD::SRL) {
15021     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
15022     // The right shift has to be by 64 bits.
15023     if (!ConstNode || ConstNode->getZExtValue() != 64)
15024       return SDValue();
15025 
15026     // Switch the element number to extract.
15027     EltToExtract = EltToExtract ? 0 : 1;
15028     // Update Op0 past the SRL.
15029     Op0 = Op0.getOperand(0);
15030   }
15031 
15032   // BITCAST feeding a TRUNCATE possibly via SRL.
15033   if (Op0.getOpcode() == ISD::BITCAST &&
15034       Op0.getValueType() == MVT::i128 &&
15035       Op0.getOperand(0).getValueType() == MVT::f128) {
15036     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
15037     return DCI.DAG.getNode(
15038         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
15039         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
15040   }
15041   return SDValue();
15042 }
15043 
15044 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
15045   SelectionDAG &DAG = DCI.DAG;
15046 
15047   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
15048   if (!ConstOpOrElement)
15049     return SDValue();
15050 
15051   // An imul is usually smaller than the alternative sequence for legal type.
15052   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
15053       isOperationLegal(ISD::MUL, N->getValueType(0)))
15054     return SDValue();
15055 
15056   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
15057     switch (this->Subtarget.getDarwinDirective()) {
15058     default:
15059       // TODO: enhance the condition for subtarget before pwr8
15060       return false;
15061     case PPC::DIR_PWR8:
15062       //  type        mul     add    shl
15063       // scalar        4       1      1
15064       // vector        7       2      2
15065       return true;
15066     case PPC::DIR_PWR9:
15067       //  type        mul     add    shl
15068       // scalar        5       2      2
15069       // vector        7       2      2
15070 
15071       // The cycle RATIO of related operations are showed as a table above.
15072       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
15073       // scalar and vector type. For 2 instrs patterns, add/sub + shl
15074       // are 4, it is always profitable; but for 3 instrs patterns
15075       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
15076       // So we should only do it for vector type.
15077       return IsAddOne && IsNeg ? VT.isVector() : true;
15078     }
15079   };
15080 
15081   EVT VT = N->getValueType(0);
15082   SDLoc DL(N);
15083 
15084   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
15085   bool IsNeg = MulAmt.isNegative();
15086   APInt MulAmtAbs = MulAmt.abs();
15087 
15088   if ((MulAmtAbs - 1).isPowerOf2()) {
15089     // (mul x, 2^N + 1) => (add (shl x, N), x)
15090     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
15091 
15092     if (!IsProfitable(IsNeg, true, VT))
15093       return SDValue();
15094 
15095     SDValue Op0 = N->getOperand(0);
15096     SDValue Op1 =
15097         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15098                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
15099     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
15100 
15101     if (!IsNeg)
15102       return Res;
15103 
15104     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
15105   } else if ((MulAmtAbs + 1).isPowerOf2()) {
15106     // (mul x, 2^N - 1) => (sub (shl x, N), x)
15107     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
15108 
15109     if (!IsProfitable(IsNeg, false, VT))
15110       return SDValue();
15111 
15112     SDValue Op0 = N->getOperand(0);
15113     SDValue Op1 =
15114         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15115                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
15116 
15117     if (!IsNeg)
15118       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
15119     else
15120       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
15121 
15122   } else {
15123     return SDValue();
15124   }
15125 }
15126 
15127 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
15128   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
15129   if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
15130     return false;
15131 
15132   // If not a tail call then no need to proceed.
15133   if (!CI->isTailCall())
15134     return false;
15135 
15136   // If tail calls are disabled for the caller then we are done.
15137   const Function *Caller = CI->getParent()->getParent();
15138   auto Attr = Caller->getFnAttribute("disable-tail-calls");
15139   if (Attr.getValueAsString() == "true")
15140     return false;
15141 
15142   // If sibling calls have been disabled and tail-calls aren't guaranteed
15143   // there is no reason to duplicate.
15144   auto &TM = getTargetMachine();
15145   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
15146     return false;
15147 
15148   // Can't tail call a function called indirectly, or if it has variadic args.
15149   const Function *Callee = CI->getCalledFunction();
15150   if (!Callee || Callee->isVarArg())
15151     return false;
15152 
15153   // Make sure the callee and caller calling conventions are eligible for tco.
15154   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
15155                                            CI->getCallingConv()))
15156       return false;
15157 
15158   // If the function is local then we have a good chance at tail-calling it
15159   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
15160 }
15161 
15162 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
15163   if (!Subtarget.hasVSX())
15164     return false;
15165   if (Subtarget.hasP9Vector() && VT == MVT::f128)
15166     return true;
15167   return VT == MVT::f32 || VT == MVT::f64 ||
15168     VT == MVT::v4f32 || VT == MVT::v2f64;
15169 }
15170 
15171 bool PPCTargetLowering::
15172 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
15173   const Value *Mask = AndI.getOperand(1);
15174   // If the mask is suitable for andi. or andis. we should sink the and.
15175   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
15176     // Can't handle constants wider than 64-bits.
15177     if (CI->getBitWidth() > 64)
15178       return false;
15179     int64_t ConstVal = CI->getZExtValue();
15180     return isUInt<16>(ConstVal) ||
15181       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
15182   }
15183 
15184   // For non-constant masks, we can always use the record-form and.
15185   return true;
15186 }
15187 
15188 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
15189 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
15190 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
15191 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
15192 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
15193 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
15194   assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
15195   assert(Subtarget.hasP9Altivec() &&
15196          "Only combine this when P9 altivec supported!");
15197   EVT VT = N->getValueType(0);
15198   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
15199     return SDValue();
15200 
15201   SelectionDAG &DAG = DCI.DAG;
15202   SDLoc dl(N);
15203   if (N->getOperand(0).getOpcode() == ISD::SUB) {
15204     // Even for signed integers, if it's known to be positive (as signed
15205     // integer) due to zero-extended inputs.
15206     unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
15207     unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
15208     if ((SubOpcd0 == ISD::ZERO_EXTEND ||
15209          SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
15210         (SubOpcd1 == ISD::ZERO_EXTEND ||
15211          SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
15212       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
15213                          N->getOperand(0)->getOperand(0),
15214                          N->getOperand(0)->getOperand(1),
15215                          DAG.getTargetConstant(0, dl, MVT::i32));
15216     }
15217 
15218     // For type v4i32, it can be optimized with xvnegsp + vabsduw
15219     if (N->getOperand(0).getValueType() == MVT::v4i32 &&
15220         N->getOperand(0).hasOneUse()) {
15221       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
15222                          N->getOperand(0)->getOperand(0),
15223                          N->getOperand(0)->getOperand(1),
15224                          DAG.getTargetConstant(1, dl, MVT::i32));
15225     }
15226   }
15227 
15228   return SDValue();
15229 }
15230 
15231 // For type v4i32/v8ii16/v16i8, transform
15232 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
15233 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
15234 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
15235 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
15236 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
15237                                           DAGCombinerInfo &DCI) const {
15238   assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
15239   assert(Subtarget.hasP9Altivec() &&
15240          "Only combine this when P9 altivec supported!");
15241 
15242   SelectionDAG &DAG = DCI.DAG;
15243   SDLoc dl(N);
15244   SDValue Cond = N->getOperand(0);
15245   SDValue TrueOpnd = N->getOperand(1);
15246   SDValue FalseOpnd = N->getOperand(2);
15247   EVT VT = N->getOperand(1).getValueType();
15248 
15249   if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
15250       FalseOpnd.getOpcode() != ISD::SUB)
15251     return SDValue();
15252 
15253   // ABSD only available for type v4i32/v8i16/v16i8
15254   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
15255     return SDValue();
15256 
15257   // At least to save one more dependent computation
15258   if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
15259     return SDValue();
15260 
15261   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15262 
15263   // Can only handle unsigned comparison here
15264   switch (CC) {
15265   default:
15266     return SDValue();
15267   case ISD::SETUGT:
15268   case ISD::SETUGE:
15269     break;
15270   case ISD::SETULT:
15271   case ISD::SETULE:
15272     std::swap(TrueOpnd, FalseOpnd);
15273     break;
15274   }
15275 
15276   SDValue CmpOpnd1 = Cond.getOperand(0);
15277   SDValue CmpOpnd2 = Cond.getOperand(1);
15278 
15279   // SETCC CmpOpnd1 CmpOpnd2 cond
15280   // TrueOpnd = CmpOpnd1 - CmpOpnd2
15281   // FalseOpnd = CmpOpnd2 - CmpOpnd1
15282   if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
15283       TrueOpnd.getOperand(1) == CmpOpnd2 &&
15284       FalseOpnd.getOperand(0) == CmpOpnd2 &&
15285       FalseOpnd.getOperand(1) == CmpOpnd1) {
15286     return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
15287                        CmpOpnd1, CmpOpnd2,
15288                        DAG.getTargetConstant(0, dl, MVT::i32));
15289   }
15290 
15291   return SDValue();
15292 }
15293