1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56 #include "llvm/CodeGen/TargetRegisterInfo.h"
57 #include "llvm/CodeGen/ValueTypes.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSymbolXCOFF.h"
78 #include "llvm/Support/AtomicOrdering.h"
79 #include "llvm/Support/BranchProbability.h"
80 #include "llvm/Support/Casting.h"
81 #include "llvm/Support/CodeGen.h"
82 #include "llvm/Support/CommandLine.h"
83 #include "llvm/Support/Compiler.h"
84 #include "llvm/Support/Debug.h"
85 #include "llvm/Support/ErrorHandling.h"
86 #include "llvm/Support/Format.h"
87 #include "llvm/Support/KnownBits.h"
88 #include "llvm/Support/MachineValueType.h"
89 #include "llvm/Support/MathExtras.h"
90 #include "llvm/Support/raw_ostream.h"
91 #include "llvm/Target/TargetMachine.h"
92 #include "llvm/Target/TargetOptions.h"
93 #include <algorithm>
94 #include <cassert>
95 #include <cstdint>
96 #include <iterator>
97 #include <list>
98 #include <utility>
99 #include <vector>
100 
101 using namespace llvm;
102 
103 #define DEBUG_TYPE "ppc-lowering"
104 
105 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
106 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
107 
108 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
109 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
110 
111 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
112 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
113 
114 static cl::opt<bool> DisableSCO("disable-ppc-sco",
115 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
116 
117 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
118 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
119 
120 static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
121 cl::desc("enable quad precision float support on ppc"), cl::Hidden);
122 
123 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
124 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
125 
126 STATISTIC(NumTailCalls, "Number of tail calls");
127 STATISTIC(NumSiblingCalls, "Number of sibling calls");
128 
129 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
130 
131 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
132 
133 // FIXME: Remove this once the bug has been fixed!
134 extern cl::opt<bool> ANDIGlueBug;
135 
136 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
137                                      const PPCSubtarget &STI)
138     : TargetLowering(TM), Subtarget(STI) {
139   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140   // arguments are at least 4/8 bytes aligned.
141   bool isPPC64 = Subtarget.isPPC64();
142   setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
143 
144   // Set up the register classes.
145   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
146   if (!useSoftFloat()) {
147     if (hasSPE()) {
148       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
149       addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
150     } else {
151       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
152       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
153     }
154   }
155 
156   // Match BITREVERSE to customized fast code sequence in the td file.
157   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
158   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
159 
160   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
162 
163   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164   for (MVT VT : MVT::integer_valuetypes()) {
165     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
166     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
167   }
168 
169   if (Subtarget.isISA3_0()) {
170     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
171     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
172     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
173     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
174   } else {
175     // No extending loads from f16 or HW conversions back and forth.
176     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
177     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
178     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
179     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
180     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
181     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
182     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
183     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
184   }
185 
186   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
187 
188   // PowerPC has pre-inc load and store's.
189   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
190   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
191   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
192   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
193   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
194   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
195   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
196   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
197   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
198   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
199   if (!Subtarget.hasSPE()) {
200     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
201     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
202     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
203     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
204   }
205 
206   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
207   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
208   for (MVT VT : ScalarIntVTs) {
209     setOperationAction(ISD::ADDC, VT, Legal);
210     setOperationAction(ISD::ADDE, VT, Legal);
211     setOperationAction(ISD::SUBC, VT, Legal);
212     setOperationAction(ISD::SUBE, VT, Legal);
213   }
214 
215   if (Subtarget.useCRBits()) {
216     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
217 
218     if (isPPC64 || Subtarget.hasFPCVT()) {
219       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
220       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
221                          isPPC64 ? MVT::i64 : MVT::i32);
222       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
223       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
224                         isPPC64 ? MVT::i64 : MVT::i32);
225     } else {
226       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
227       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
228     }
229 
230     // PowerPC does not support direct load/store of condition registers.
231     setOperationAction(ISD::LOAD, MVT::i1, Custom);
232     setOperationAction(ISD::STORE, MVT::i1, Custom);
233 
234     // FIXME: Remove this once the ANDI glue bug is fixed:
235     if (ANDIGlueBug)
236       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
237 
238     for (MVT VT : MVT::integer_valuetypes()) {
239       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
240       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
241       setTruncStoreAction(VT, MVT::i1, Expand);
242     }
243 
244     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
245   }
246 
247   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
248   // PPC (the libcall is not available).
249   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
250   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
251 
252   // We do not currently implement these libm ops for PowerPC.
253   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
254   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
255   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
256   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
257   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
258   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
259 
260   // PowerPC has no SREM/UREM instructions unless we are on P9
261   // On P9 we may use a hardware instruction to compute the remainder.
262   // The instructions are not legalized directly because in the cases where the
263   // result of both the remainder and the division is required it is more
264   // efficient to compute the remainder from the result of the division rather
265   // than use the remainder instruction.
266   if (Subtarget.isISA3_0()) {
267     setOperationAction(ISD::SREM, MVT::i32, Custom);
268     setOperationAction(ISD::UREM, MVT::i32, Custom);
269     setOperationAction(ISD::SREM, MVT::i64, Custom);
270     setOperationAction(ISD::UREM, MVT::i64, Custom);
271   } else {
272     setOperationAction(ISD::SREM, MVT::i32, Expand);
273     setOperationAction(ISD::UREM, MVT::i32, Expand);
274     setOperationAction(ISD::SREM, MVT::i64, Expand);
275     setOperationAction(ISD::UREM, MVT::i64, Expand);
276   }
277 
278   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
279   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
280   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
281   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
282   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
283   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
284   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
285   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
286   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
287 
288   // Handle constrained floating-point operations of scalar.
289   // TODO: Handle SPE specific operation.
290   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
291   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
292   setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
293   setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
294   setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
295 
296   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
297   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
298   setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
299   setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
300   setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
301 
302   if (Subtarget.hasFSQRT()) {
303     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
304     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
305   }
306 
307   // We don't support sin/cos/sqrt/fmod/pow
308   setOperationAction(ISD::FSIN , MVT::f64, Expand);
309   setOperationAction(ISD::FCOS , MVT::f64, Expand);
310   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
311   setOperationAction(ISD::FREM , MVT::f64, Expand);
312   setOperationAction(ISD::FPOW , MVT::f64, Expand);
313   setOperationAction(ISD::FSIN , MVT::f32, Expand);
314   setOperationAction(ISD::FCOS , MVT::f32, Expand);
315   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
316   setOperationAction(ISD::FREM , MVT::f32, Expand);
317   setOperationAction(ISD::FPOW , MVT::f32, Expand);
318   if (Subtarget.hasSPE()) {
319     setOperationAction(ISD::FMA  , MVT::f64, Expand);
320     setOperationAction(ISD::FMA  , MVT::f32, Expand);
321   } else {
322     setOperationAction(ISD::FMA  , MVT::f64, Legal);
323     setOperationAction(ISD::FMA  , MVT::f32, Legal);
324   }
325 
326   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
327 
328   // If we're enabling GP optimizations, use hardware square root
329   if (!Subtarget.hasFSQRT() &&
330       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
331         Subtarget.hasFRE()))
332     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
333 
334   if (!Subtarget.hasFSQRT() &&
335       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
336         Subtarget.hasFRES()))
337     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
338 
339   if (Subtarget.hasFCPSGN()) {
340     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
341     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
342   } else {
343     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
344     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
345   }
346 
347   if (Subtarget.hasFPRND()) {
348     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
349     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
350     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
351     setOperationAction(ISD::FROUND, MVT::f64, Legal);
352 
353     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
354     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
355     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
356     setOperationAction(ISD::FROUND, MVT::f32, Legal);
357   }
358 
359   // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
360   // to speed up scalar BSWAP64.
361   // CTPOP or CTTZ were introduced in P8/P9 respectively
362   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
363   if (Subtarget.hasP9Vector())
364     setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
365   else
366     setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
367   if (Subtarget.isISA3_0()) {
368     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
369     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
370   } else {
371     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
372     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
373   }
374 
375   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
376     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
377     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
378   } else {
379     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
380     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
381   }
382 
383   // PowerPC does not have ROTR
384   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
385   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
386 
387   if (!Subtarget.useCRBits()) {
388     // PowerPC does not have Select
389     setOperationAction(ISD::SELECT, MVT::i32, Expand);
390     setOperationAction(ISD::SELECT, MVT::i64, Expand);
391     setOperationAction(ISD::SELECT, MVT::f32, Expand);
392     setOperationAction(ISD::SELECT, MVT::f64, Expand);
393   }
394 
395   // PowerPC wants to turn select_cc of FP into fsel when possible.
396   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
397   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
398 
399   // PowerPC wants to optimize integer setcc a bit
400   if (!Subtarget.useCRBits())
401     setOperationAction(ISD::SETCC, MVT::i32, Custom);
402 
403   // PowerPC does not have BRCOND which requires SetCC
404   if (!Subtarget.useCRBits())
405     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
406 
407   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
408 
409   if (Subtarget.hasSPE()) {
410     // SPE has built-in conversions
411     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
412     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
413     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
414   } else {
415     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
416     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
417 
418     // PowerPC does not have [U|S]INT_TO_FP
419     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
420     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
421   }
422 
423   if (Subtarget.hasDirectMove() && isPPC64) {
424     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
425     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
426     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
427     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
428     if (TM.Options.UnsafeFPMath) {
429       setOperationAction(ISD::LRINT, MVT::f64, Legal);
430       setOperationAction(ISD::LRINT, MVT::f32, Legal);
431       setOperationAction(ISD::LLRINT, MVT::f64, Legal);
432       setOperationAction(ISD::LLRINT, MVT::f32, Legal);
433       setOperationAction(ISD::LROUND, MVT::f64, Legal);
434       setOperationAction(ISD::LROUND, MVT::f32, Legal);
435       setOperationAction(ISD::LLROUND, MVT::f64, Legal);
436       setOperationAction(ISD::LLROUND, MVT::f32, Legal);
437     }
438   } else {
439     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
440     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
441     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
442     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
443   }
444 
445   // We cannot sextinreg(i1).  Expand to shifts.
446   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
447 
448   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
449   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
450   // support continuation, user-level threading, and etc.. As a result, no
451   // other SjLj exception interfaces are implemented and please don't build
452   // your own exception handling based on them.
453   // LLVM/Clang supports zero-cost DWARF exception handling.
454   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
455   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
456 
457   // We want to legalize GlobalAddress and ConstantPool nodes into the
458   // appropriate instructions to materialize the address.
459   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
460   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
461   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
462   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
463   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
464   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
465   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
466   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
467   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
468   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
469 
470   // TRAP is legal.
471   setOperationAction(ISD::TRAP, MVT::Other, Legal);
472 
473   // TRAMPOLINE is custom lowered.
474   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
475   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
476 
477   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
478   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
479 
480   if (Subtarget.is64BitELFABI()) {
481     // VAARG always uses double-word chunks, so promote anything smaller.
482     setOperationAction(ISD::VAARG, MVT::i1, Promote);
483     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
484     setOperationAction(ISD::VAARG, MVT::i8, Promote);
485     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
486     setOperationAction(ISD::VAARG, MVT::i16, Promote);
487     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
488     setOperationAction(ISD::VAARG, MVT::i32, Promote);
489     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
490     setOperationAction(ISD::VAARG, MVT::Other, Expand);
491   } else if (Subtarget.is32BitELFABI()) {
492     // VAARG is custom lowered with the 32-bit SVR4 ABI.
493     setOperationAction(ISD::VAARG, MVT::Other, Custom);
494     setOperationAction(ISD::VAARG, MVT::i64, Custom);
495   } else
496     setOperationAction(ISD::VAARG, MVT::Other, Expand);
497 
498   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
499   if (Subtarget.is32BitELFABI())
500     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
501   else
502     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
503 
504   // Use the default implementation.
505   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
506   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
507   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
508   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
509   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
510   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
511   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
512   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
513   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
514 
515   // We want to custom lower some of our intrinsics.
516   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
517 
518   // To handle counter-based loop conditions.
519   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
520 
521   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
522   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
523   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
524   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
525 
526   // Comparisons that require checking two conditions.
527   if (Subtarget.hasSPE()) {
528     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
529     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
530     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
531     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
532   }
533   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
534   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
535   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
536   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
537   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
538   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
539   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
540   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
541   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
542   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
543   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
544   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
545 
546   if (Subtarget.has64BitSupport()) {
547     // They also have instructions for converting between i64 and fp.
548     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
549     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
550     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
551     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
552     // This is just the low 32 bits of a (signed) fp->i64 conversion.
553     // We cannot do this with Promote because i64 is not a legal type.
554     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
555 
556     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
557       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
558   } else {
559     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
560     if (Subtarget.hasSPE())
561       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
562     else
563       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
564   }
565 
566   // With the instructions enabled under FPCVT, we can do everything.
567   if (Subtarget.hasFPCVT()) {
568     if (Subtarget.has64BitSupport()) {
569       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
570       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
571       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
572       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
573     }
574 
575     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
576     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
577     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
578     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
579   }
580 
581   if (Subtarget.use64BitRegs()) {
582     // 64-bit PowerPC implementations can support i64 types directly
583     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
584     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
585     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
586     // 64-bit PowerPC wants to expand i128 shifts itself.
587     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
588     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
589     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
590   } else {
591     // 32-bit PowerPC wants to expand i64 shifts itself.
592     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
593     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
594     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
595   }
596 
597   if (Subtarget.hasVSX()) {
598     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
599     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
600     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
601     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
602   }
603 
604   if (Subtarget.hasAltivec()) {
605     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
606       setOperationAction(ISD::SADDSAT, VT, Legal);
607       setOperationAction(ISD::SSUBSAT, VT, Legal);
608       setOperationAction(ISD::UADDSAT, VT, Legal);
609       setOperationAction(ISD::USUBSAT, VT, Legal);
610     }
611     // First set operation action for all vector types to expand. Then we
612     // will selectively turn on ones that can be effectively codegen'd.
613     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
614       // add/sub are legal for all supported vector VT's.
615       setOperationAction(ISD::ADD, VT, Legal);
616       setOperationAction(ISD::SUB, VT, Legal);
617 
618       // For v2i64, these are only valid with P8Vector. This is corrected after
619       // the loop.
620       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
621         setOperationAction(ISD::SMAX, VT, Legal);
622         setOperationAction(ISD::SMIN, VT, Legal);
623         setOperationAction(ISD::UMAX, VT, Legal);
624         setOperationAction(ISD::UMIN, VT, Legal);
625       }
626       else {
627         setOperationAction(ISD::SMAX, VT, Expand);
628         setOperationAction(ISD::SMIN, VT, Expand);
629         setOperationAction(ISD::UMAX, VT, Expand);
630         setOperationAction(ISD::UMIN, VT, Expand);
631       }
632 
633       if (Subtarget.hasVSX()) {
634         setOperationAction(ISD::FMAXNUM, VT, Legal);
635         setOperationAction(ISD::FMINNUM, VT, Legal);
636       }
637 
638       // Vector instructions introduced in P8
639       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
640         setOperationAction(ISD::CTPOP, VT, Legal);
641         setOperationAction(ISD::CTLZ, VT, Legal);
642       }
643       else {
644         setOperationAction(ISD::CTPOP, VT, Expand);
645         setOperationAction(ISD::CTLZ, VT, Expand);
646       }
647 
648       // Vector instructions introduced in P9
649       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
650         setOperationAction(ISD::CTTZ, VT, Legal);
651       else
652         setOperationAction(ISD::CTTZ, VT, Expand);
653 
654       // We promote all shuffles to v16i8.
655       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
656       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
657 
658       // We promote all non-typed operations to v4i32.
659       setOperationAction(ISD::AND   , VT, Promote);
660       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
661       setOperationAction(ISD::OR    , VT, Promote);
662       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
663       setOperationAction(ISD::XOR   , VT, Promote);
664       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
665       setOperationAction(ISD::LOAD  , VT, Promote);
666       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
667       setOperationAction(ISD::SELECT, VT, Promote);
668       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
669       setOperationAction(ISD::VSELECT, VT, Legal);
670       setOperationAction(ISD::SELECT_CC, VT, Promote);
671       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
672       setOperationAction(ISD::STORE, VT, Promote);
673       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
674 
675       // No other operations are legal.
676       setOperationAction(ISD::MUL , VT, Expand);
677       setOperationAction(ISD::SDIV, VT, Expand);
678       setOperationAction(ISD::SREM, VT, Expand);
679       setOperationAction(ISD::UDIV, VT, Expand);
680       setOperationAction(ISD::UREM, VT, Expand);
681       setOperationAction(ISD::FDIV, VT, Expand);
682       setOperationAction(ISD::FREM, VT, Expand);
683       setOperationAction(ISD::FNEG, VT, Expand);
684       setOperationAction(ISD::FSQRT, VT, Expand);
685       setOperationAction(ISD::FLOG, VT, Expand);
686       setOperationAction(ISD::FLOG10, VT, Expand);
687       setOperationAction(ISD::FLOG2, VT, Expand);
688       setOperationAction(ISD::FEXP, VT, Expand);
689       setOperationAction(ISD::FEXP2, VT, Expand);
690       setOperationAction(ISD::FSIN, VT, Expand);
691       setOperationAction(ISD::FCOS, VT, Expand);
692       setOperationAction(ISD::FABS, VT, Expand);
693       setOperationAction(ISD::FFLOOR, VT, Expand);
694       setOperationAction(ISD::FCEIL,  VT, Expand);
695       setOperationAction(ISD::FTRUNC, VT, Expand);
696       setOperationAction(ISD::FRINT,  VT, Expand);
697       setOperationAction(ISD::FNEARBYINT, VT, Expand);
698       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
699       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
700       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
701       setOperationAction(ISD::MULHU, VT, Expand);
702       setOperationAction(ISD::MULHS, VT, Expand);
703       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
704       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
705       setOperationAction(ISD::UDIVREM, VT, Expand);
706       setOperationAction(ISD::SDIVREM, VT, Expand);
707       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
708       setOperationAction(ISD::FPOW, VT, Expand);
709       setOperationAction(ISD::BSWAP, VT, Expand);
710       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
711       setOperationAction(ISD::ROTL, VT, Expand);
712       setOperationAction(ISD::ROTR, VT, Expand);
713 
714       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
715         setTruncStoreAction(VT, InnerVT, Expand);
716         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
717         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
718         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
719       }
720     }
721     if (!Subtarget.hasP8Vector()) {
722       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
723       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
724       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
725       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
726     }
727 
728     for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
729       setOperationAction(ISD::ABS, VT, Custom);
730 
731     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
732     // with merges, splats, etc.
733     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
734 
735     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
736     // are cheap, so handle them before they get expanded to scalar.
737     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
738     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
739     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
740     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
741     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
742 
743     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
744     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
745     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
746     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
747     setOperationAction(ISD::SELECT, MVT::v4i32,
748                        Subtarget.useCRBits() ? Legal : Expand);
749     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
750     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
751     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
752     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
753     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
754     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
755     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
756     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
757     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
758 
759     // Without hasP8Altivec set, v2i64 SMAX isn't available.
760     // But ABS custom lowering requires SMAX support.
761     if (!Subtarget.hasP8Altivec())
762       setOperationAction(ISD::ABS, MVT::v2i64, Expand);
763 
764     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
765     if (Subtarget.hasAltivec())
766       for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
767         setOperationAction(ISD::ROTL, VT, Legal);
768     // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
769     if (Subtarget.hasP8Altivec())
770       setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
771 
772     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
773     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
774     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
775     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
776 
777     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
778     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
779 
780     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
781       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
782       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
783     }
784 
785     if (Subtarget.hasP8Altivec())
786       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
787     else
788       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
789 
790     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
791     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
792 
793     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
794     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
795 
796     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
797     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
798     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
799     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
800 
801     // Altivec does not contain unordered floating-point compare instructions
802     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
803     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
804     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
805     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
806 
807     if (Subtarget.hasVSX()) {
808       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
809       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
810       if (Subtarget.hasP8Vector()) {
811         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
812         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
813       }
814       if (Subtarget.hasDirectMove() && isPPC64) {
815         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
816         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
817         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
818         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
819         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
820         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
821         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
822         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
823       }
824       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
825 
826       // The nearbyint variants are not allowed to raise the inexact exception
827       // so we can only code-gen them with unsafe math.
828       if (TM.Options.UnsafeFPMath) {
829         setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
830         setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
831       }
832 
833       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
834       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
835       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
836       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
837       setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
838       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
839       setOperationAction(ISD::FROUND, MVT::f64, Legal);
840       setOperationAction(ISD::FRINT, MVT::f64, Legal);
841 
842       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
843       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
844       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
845       setOperationAction(ISD::FROUND, MVT::f32, Legal);
846       setOperationAction(ISD::FRINT, MVT::f32, Legal);
847 
848       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
849       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
850 
851       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
852       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
853 
854       // Share the Altivec comparison restrictions.
855       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
856       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
857       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
858       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
859 
860       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
861       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
862 
863       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
864 
865       if (Subtarget.hasP8Vector())
866         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
867 
868       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
869 
870       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
871       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
872       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
873 
874       if (Subtarget.hasP8Altivec()) {
875         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
876         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
877         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
878 
879         // 128 bit shifts can be accomplished via 3 instructions for SHL and
880         // SRL, but not for SRA because of the instructions available:
881         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
882         // doing
883         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
884         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
885         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
886 
887         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
888       }
889       else {
890         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
891         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
892         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
893 
894         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
895 
896         // VSX v2i64 only supports non-arithmetic operations.
897         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
898         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
899       }
900 
901       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
902       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
903       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
904       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
905 
906       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
907 
908       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
909       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
910       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
911       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
912 
913       // Custom handling for partial vectors of integers converted to
914       // floating point. We already have optimal handling for v2i32 through
915       // the DAG combine, so those aren't necessary.
916       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
917       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
918       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
919       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
920       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
921       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
922       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
923       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
924 
925       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
926       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
927       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
928       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
929       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
930       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
931 
932       if (Subtarget.hasDirectMove())
933         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
934       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
935 
936       // Handle constrained floating-point operations of vector.
937       // The predictor is `hasVSX` because altivec instruction has
938       // no exception but VSX vector instruction has.
939       setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
940       setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
941       setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
942       setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
943       setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
944       setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
945       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
946       setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
947 
948       setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
949       setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
950       setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
951       setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
952       setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
953       setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
954       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
955       setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
956 
957       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
958     }
959 
960     if (Subtarget.hasP8Altivec()) {
961       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
962       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
963     }
964 
965     if (Subtarget.hasP9Vector()) {
966       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
967       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
968 
969       // 128 bit shifts can be accomplished via 3 instructions for SHL and
970       // SRL, but not for SRA because of the instructions available:
971       // VS{RL} and VS{RL}O.
972       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
973       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
974       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
975 
976       if (EnableQuadPrecision) {
977         addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
978         setOperationAction(ISD::FADD, MVT::f128, Legal);
979         setOperationAction(ISD::FSUB, MVT::f128, Legal);
980         setOperationAction(ISD::FDIV, MVT::f128, Legal);
981         setOperationAction(ISD::FMUL, MVT::f128, Legal);
982         setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
983         // No extending loads to f128 on PPC.
984         for (MVT FPT : MVT::fp_valuetypes())
985           setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
986         setOperationAction(ISD::FMA, MVT::f128, Legal);
987         setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
988         setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
989         setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
990         setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
991         setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
992         setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
993 
994         setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
995         setOperationAction(ISD::FRINT, MVT::f128, Legal);
996         setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
997         setOperationAction(ISD::FCEIL, MVT::f128, Legal);
998         setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
999         setOperationAction(ISD::FROUND, MVT::f128, Legal);
1000 
1001         setOperationAction(ISD::SELECT, MVT::f128, Expand);
1002         setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1003         setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1004         setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1005         setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1006         setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1007         // No implementation for these ops for PowerPC.
1008         setOperationAction(ISD::FSIN , MVT::f128, Expand);
1009         setOperationAction(ISD::FCOS , MVT::f128, Expand);
1010         setOperationAction(ISD::FPOW, MVT::f128, Expand);
1011         setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1012         setOperationAction(ISD::FREM, MVT::f128, Expand);
1013 
1014         // Handle constrained floating-point operations of fp128
1015         setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1016         setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1017         setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1018         setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1019         setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1020         setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1021       }
1022       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1023       setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1024       setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1025       setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1026       setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1027     }
1028 
1029     if (Subtarget.hasP9Altivec()) {
1030       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1031       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1032 
1033       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1034       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1035       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1036       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1037       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1038       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1039       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1040     }
1041   }
1042 
1043   if (Subtarget.hasQPX()) {
1044     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
1045     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
1046     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
1047     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
1048 
1049     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
1050     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
1051 
1052     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
1053     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
1054 
1055     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
1056     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
1057 
1058     if (!Subtarget.useCRBits())
1059       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
1060     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
1061 
1062     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
1063     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
1064     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
1065     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
1066     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
1067     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
1068     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
1069 
1070     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
1071     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
1072 
1073     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
1074     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
1075 
1076     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
1077     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
1078     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
1079     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
1080     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
1081     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
1082     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
1083     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
1084     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
1085     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
1086 
1087     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
1088     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
1089 
1090     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
1091     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
1092 
1093     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
1094 
1095     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
1096     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
1097     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
1098     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
1099 
1100     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1101     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
1102 
1103     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
1104     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
1105 
1106     if (!Subtarget.useCRBits())
1107       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
1108     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
1109 
1110     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
1111     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
1112     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
1113     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
1114     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
1115     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
1116     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
1117 
1118     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
1119     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
1120 
1121     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
1122     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
1123     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
1124     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
1125     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
1126     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
1127     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
1128     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
1129     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
1130     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
1131 
1132     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1133     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1134 
1135     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
1136     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
1137 
1138     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
1139 
1140     setOperationAction(ISD::AND , MVT::v4i1, Legal);
1141     setOperationAction(ISD::OR , MVT::v4i1, Legal);
1142     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
1143 
1144     if (!Subtarget.useCRBits())
1145       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
1146     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
1147 
1148     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
1149     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
1150 
1151     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
1152     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
1153     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
1154     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
1155     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
1156     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
1157     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1158 
1159     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1160     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1161 
1162     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
1163 
1164     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1165     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
1166     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1167     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
1168 
1169     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1170     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
1171     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1172     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1173 
1174     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
1175     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
1176 
1177     // These need to set FE_INEXACT, and so cannot be vectorized here.
1178     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
1179     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
1180 
1181     if (TM.Options.UnsafeFPMath) {
1182       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1183       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1184 
1185       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
1186       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
1187     } else {
1188       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
1189       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
1190 
1191       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
1192       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
1193     }
1194 
1195     // TODO: Handle constrained floating-point operations of v4f64
1196   }
1197 
1198   if (Subtarget.has64BitSupport())
1199     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1200 
1201   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1202 
1203   if (!isPPC64) {
1204     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1205     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1206   }
1207 
1208   setBooleanContents(ZeroOrOneBooleanContent);
1209 
1210   if (Subtarget.hasAltivec()) {
1211     // Altivec instructions set fields to all zeros or all ones.
1212     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1213   }
1214 
1215   if (!isPPC64) {
1216     // These libcalls are not available in 32-bit.
1217     setLibcallName(RTLIB::SHL_I128, nullptr);
1218     setLibcallName(RTLIB::SRL_I128, nullptr);
1219     setLibcallName(RTLIB::SRA_I128, nullptr);
1220   }
1221 
1222   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1223 
1224   // We have target-specific dag combine patterns for the following nodes:
1225   setTargetDAGCombine(ISD::ADD);
1226   setTargetDAGCombine(ISD::SHL);
1227   setTargetDAGCombine(ISD::SRA);
1228   setTargetDAGCombine(ISD::SRL);
1229   setTargetDAGCombine(ISD::MUL);
1230   setTargetDAGCombine(ISD::SINT_TO_FP);
1231   setTargetDAGCombine(ISD::BUILD_VECTOR);
1232   if (Subtarget.hasFPCVT())
1233     setTargetDAGCombine(ISD::UINT_TO_FP);
1234   setTargetDAGCombine(ISD::LOAD);
1235   setTargetDAGCombine(ISD::STORE);
1236   setTargetDAGCombine(ISD::BR_CC);
1237   if (Subtarget.useCRBits())
1238     setTargetDAGCombine(ISD::BRCOND);
1239   setTargetDAGCombine(ISD::BSWAP);
1240   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1241   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1242   setTargetDAGCombine(ISD::INTRINSIC_VOID);
1243 
1244   setTargetDAGCombine(ISD::SIGN_EXTEND);
1245   setTargetDAGCombine(ISD::ZERO_EXTEND);
1246   setTargetDAGCombine(ISD::ANY_EXTEND);
1247 
1248   setTargetDAGCombine(ISD::TRUNCATE);
1249   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1250 
1251 
1252   if (Subtarget.useCRBits()) {
1253     setTargetDAGCombine(ISD::TRUNCATE);
1254     setTargetDAGCombine(ISD::SETCC);
1255     setTargetDAGCombine(ISD::SELECT_CC);
1256   }
1257 
1258   // Use reciprocal estimates.
1259   if (TM.Options.UnsafeFPMath) {
1260     setTargetDAGCombine(ISD::FDIV);
1261     setTargetDAGCombine(ISD::FSQRT);
1262   }
1263 
1264   if (Subtarget.hasP9Altivec()) {
1265     setTargetDAGCombine(ISD::ABS);
1266     setTargetDAGCombine(ISD::VSELECT);
1267   }
1268 
1269   if (EnableQuadPrecision) {
1270     setLibcallName(RTLIB::LOG_F128, "logf128");
1271     setLibcallName(RTLIB::LOG2_F128, "log2f128");
1272     setLibcallName(RTLIB::LOG10_F128, "log10f128");
1273     setLibcallName(RTLIB::EXP_F128, "expf128");
1274     setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1275     setLibcallName(RTLIB::SIN_F128, "sinf128");
1276     setLibcallName(RTLIB::COS_F128, "cosf128");
1277     setLibcallName(RTLIB::POW_F128, "powf128");
1278     setLibcallName(RTLIB::FMIN_F128, "fminf128");
1279     setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1280     setLibcallName(RTLIB::POWI_F128, "__powikf2");
1281     setLibcallName(RTLIB::REM_F128, "fmodf128");
1282   }
1283 
1284   // With 32 condition bits, we don't need to sink (and duplicate) compares
1285   // aggressively in CodeGenPrep.
1286   if (Subtarget.useCRBits()) {
1287     setHasMultipleConditionRegisters();
1288     setJumpIsExpensive();
1289   }
1290 
1291   setMinFunctionAlignment(Align(4));
1292 
1293   switch (Subtarget.getCPUDirective()) {
1294   default: break;
1295   case PPC::DIR_970:
1296   case PPC::DIR_A2:
1297   case PPC::DIR_E500:
1298   case PPC::DIR_E500mc:
1299   case PPC::DIR_E5500:
1300   case PPC::DIR_PWR4:
1301   case PPC::DIR_PWR5:
1302   case PPC::DIR_PWR5X:
1303   case PPC::DIR_PWR6:
1304   case PPC::DIR_PWR6X:
1305   case PPC::DIR_PWR7:
1306   case PPC::DIR_PWR8:
1307   case PPC::DIR_PWR9:
1308   case PPC::DIR_PWR_FUTURE:
1309     setPrefLoopAlignment(Align(16));
1310     setPrefFunctionAlignment(Align(16));
1311     break;
1312   }
1313 
1314   if (Subtarget.enableMachineScheduler())
1315     setSchedulingPreference(Sched::Source);
1316   else
1317     setSchedulingPreference(Sched::Hybrid);
1318 
1319   computeRegisterProperties(STI.getRegisterInfo());
1320 
1321   // The Freescale cores do better with aggressive inlining of memcpy and
1322   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1323   if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1324       Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1325     MaxStoresPerMemset = 32;
1326     MaxStoresPerMemsetOptSize = 16;
1327     MaxStoresPerMemcpy = 32;
1328     MaxStoresPerMemcpyOptSize = 8;
1329     MaxStoresPerMemmove = 32;
1330     MaxStoresPerMemmoveOptSize = 8;
1331   } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1332     // The A2 also benefits from (very) aggressive inlining of memcpy and
1333     // friends. The overhead of a the function call, even when warm, can be
1334     // over one hundred cycles.
1335     MaxStoresPerMemset = 128;
1336     MaxStoresPerMemcpy = 128;
1337     MaxStoresPerMemmove = 128;
1338     MaxLoadsPerMemcmp = 128;
1339   } else {
1340     MaxLoadsPerMemcmp = 8;
1341     MaxLoadsPerMemcmpOptSize = 4;
1342   }
1343 
1344   // Let the subtarget (CPU) decide if a predictable select is more expensive
1345   // than the corresponding branch. This information is used in CGP to decide
1346   // when to convert selects into branches.
1347   PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1348 }
1349 
1350 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1351 /// the desired ByVal argument alignment.
1352 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1353                              unsigned MaxMaxAlign) {
1354   if (MaxAlign == MaxMaxAlign)
1355     return;
1356   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1357     if (MaxMaxAlign >= 32 &&
1358         VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1359       MaxAlign = 32;
1360     else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1361              MaxAlign < 16)
1362       MaxAlign = 16;
1363   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1364     unsigned EltAlign = 0;
1365     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1366     if (EltAlign > MaxAlign)
1367       MaxAlign = EltAlign;
1368   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1369     for (auto *EltTy : STy->elements()) {
1370       unsigned EltAlign = 0;
1371       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1372       if (EltAlign > MaxAlign)
1373         MaxAlign = EltAlign;
1374       if (MaxAlign == MaxMaxAlign)
1375         break;
1376     }
1377   }
1378 }
1379 
1380 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1381 /// function arguments in the caller parameter area.
1382 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1383                                                   const DataLayout &DL) const {
1384   // 16byte and wider vectors are passed on 16byte boundary.
1385   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1386   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1387   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1388     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1389   return Align;
1390 }
1391 
1392 bool PPCTargetLowering::useSoftFloat() const {
1393   return Subtarget.useSoftFloat();
1394 }
1395 
1396 bool PPCTargetLowering::hasSPE() const {
1397   return Subtarget.hasSPE();
1398 }
1399 
1400 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1401   return VT.isScalarInteger();
1402 }
1403 
1404 /// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific
1405 /// type is cheaper than a multiply followed by a shift.
1406 /// This is true for words and doublewords on 64-bit PowerPC.
1407 bool PPCTargetLowering::isMulhCheaperThanMulShift(EVT Type) const {
1408   if (Subtarget.isPPC64() && (isOperationLegal(ISD::MULHS, Type) ||
1409                               isOperationLegal(ISD::MULHU, Type)))
1410     return true;
1411   return TargetLowering::isMulhCheaperThanMulShift(Type);
1412 }
1413 
1414 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1415   switch ((PPCISD::NodeType)Opcode) {
1416   case PPCISD::FIRST_NUMBER:    break;
1417   case PPCISD::FSEL:            return "PPCISD::FSEL";
1418   case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
1419   case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
1420   case PPCISD::FCFID:           return "PPCISD::FCFID";
1421   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1422   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1423   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1424   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1425   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1426   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1427   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1428   case PPCISD::FP_TO_UINT_IN_VSR:
1429                                 return "PPCISD::FP_TO_UINT_IN_VSR,";
1430   case PPCISD::FP_TO_SINT_IN_VSR:
1431                                 return "PPCISD::FP_TO_SINT_IN_VSR";
1432   case PPCISD::FRE:             return "PPCISD::FRE";
1433   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1434   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1435   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1436   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1437   case PPCISD::VPERM:           return "PPCISD::VPERM";
1438   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1439   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1440   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1441   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1442   case PPCISD::CMPB:            return "PPCISD::CMPB";
1443   case PPCISD::Hi:              return "PPCISD::Hi";
1444   case PPCISD::Lo:              return "PPCISD::Lo";
1445   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1446   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1447   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1448   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1449   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1450   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1451   case PPCISD::SRL:             return "PPCISD::SRL";
1452   case PPCISD::SRA:             return "PPCISD::SRA";
1453   case PPCISD::SHL:             return "PPCISD::SHL";
1454   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1455   case PPCISD::CALL:            return "PPCISD::CALL";
1456   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1457   case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1458   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1459   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1460   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1461   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1462   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1463   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1464   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1465   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1466   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1467   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1468   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1469   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1470   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1471   case PPCISD::ANDI_rec_1_EQ_BIT:
1472     return "PPCISD::ANDI_rec_1_EQ_BIT";
1473   case PPCISD::ANDI_rec_1_GT_BIT:
1474     return "PPCISD::ANDI_rec_1_GT_BIT";
1475   case PPCISD::VCMP:            return "PPCISD::VCMP";
1476   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1477   case PPCISD::LBRX:            return "PPCISD::LBRX";
1478   case PPCISD::STBRX:           return "PPCISD::STBRX";
1479   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1480   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1481   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1482   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1483   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1484   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1485   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1486   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1487   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1488   case PPCISD::ST_VSR_SCAL_INT:
1489                                 return "PPCISD::ST_VSR_SCAL_INT";
1490   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1491   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1492   case PPCISD::BDZ:             return "PPCISD::BDZ";
1493   case PPCISD::MFFS:            return "PPCISD::MFFS";
1494   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1495   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1496   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1497   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1498   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1499   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1500   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1501   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1502   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1503   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1504   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1505   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1506   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1507   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1508   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1509   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1510   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1511   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1512   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1513   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1514   case PPCISD::SC:              return "PPCISD::SC";
1515   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1516   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1517   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1518   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1519   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1520   case PPCISD::VABSD:           return "PPCISD::VABSD";
1521   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1522   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1523   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1524   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1525   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1526   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1527   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1528   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1529   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1530   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1531   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1532   case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1533   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1534   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1535   }
1536   return nullptr;
1537 }
1538 
1539 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1540                                           EVT VT) const {
1541   if (!VT.isVector())
1542     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1543 
1544   if (Subtarget.hasQPX())
1545     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1546 
1547   return VT.changeVectorElementTypeToInteger();
1548 }
1549 
1550 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1551   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1552   return true;
1553 }
1554 
1555 //===----------------------------------------------------------------------===//
1556 // Node matching predicates, for use by the tblgen matching code.
1557 //===----------------------------------------------------------------------===//
1558 
1559 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1560 static bool isFloatingPointZero(SDValue Op) {
1561   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1562     return CFP->getValueAPF().isZero();
1563   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1564     // Maybe this has already been legalized into the constant pool?
1565     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1566       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1567         return CFP->getValueAPF().isZero();
1568   }
1569   return false;
1570 }
1571 
1572 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1573 /// true if Op is undef or if it matches the specified value.
1574 static bool isConstantOrUndef(int Op, int Val) {
1575   return Op < 0 || Op == Val;
1576 }
1577 
1578 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1579 /// VPKUHUM instruction.
1580 /// The ShuffleKind distinguishes between big-endian operations with
1581 /// two different inputs (0), either-endian operations with two identical
1582 /// inputs (1), and little-endian operations with two different inputs (2).
1583 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1584 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1585                                SelectionDAG &DAG) {
1586   bool IsLE = DAG.getDataLayout().isLittleEndian();
1587   if (ShuffleKind == 0) {
1588     if (IsLE)
1589       return false;
1590     for (unsigned i = 0; i != 16; ++i)
1591       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1592         return false;
1593   } else if (ShuffleKind == 2) {
1594     if (!IsLE)
1595       return false;
1596     for (unsigned i = 0; i != 16; ++i)
1597       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1598         return false;
1599   } else if (ShuffleKind == 1) {
1600     unsigned j = IsLE ? 0 : 1;
1601     for (unsigned i = 0; i != 8; ++i)
1602       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1603           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1604         return false;
1605   }
1606   return true;
1607 }
1608 
1609 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1610 /// VPKUWUM instruction.
1611 /// The ShuffleKind distinguishes between big-endian operations with
1612 /// two different inputs (0), either-endian operations with two identical
1613 /// inputs (1), and little-endian operations with two different inputs (2).
1614 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1615 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1616                                SelectionDAG &DAG) {
1617   bool IsLE = DAG.getDataLayout().isLittleEndian();
1618   if (ShuffleKind == 0) {
1619     if (IsLE)
1620       return false;
1621     for (unsigned i = 0; i != 16; i += 2)
1622       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1623           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1624         return false;
1625   } else if (ShuffleKind == 2) {
1626     if (!IsLE)
1627       return false;
1628     for (unsigned i = 0; i != 16; i += 2)
1629       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1630           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1631         return false;
1632   } else if (ShuffleKind == 1) {
1633     unsigned j = IsLE ? 0 : 2;
1634     for (unsigned i = 0; i != 8; i += 2)
1635       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1636           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1637           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1638           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1639         return false;
1640   }
1641   return true;
1642 }
1643 
1644 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1645 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1646 /// current subtarget.
1647 ///
1648 /// The ShuffleKind distinguishes between big-endian operations with
1649 /// two different inputs (0), either-endian operations with two identical
1650 /// inputs (1), and little-endian operations with two different inputs (2).
1651 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1652 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1653                                SelectionDAG &DAG) {
1654   const PPCSubtarget& Subtarget =
1655       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1656   if (!Subtarget.hasP8Vector())
1657     return false;
1658 
1659   bool IsLE = DAG.getDataLayout().isLittleEndian();
1660   if (ShuffleKind == 0) {
1661     if (IsLE)
1662       return false;
1663     for (unsigned i = 0; i != 16; i += 4)
1664       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1665           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1666           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1667           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1668         return false;
1669   } else if (ShuffleKind == 2) {
1670     if (!IsLE)
1671       return false;
1672     for (unsigned i = 0; i != 16; i += 4)
1673       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1674           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1675           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1676           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1677         return false;
1678   } else if (ShuffleKind == 1) {
1679     unsigned j = IsLE ? 0 : 4;
1680     for (unsigned i = 0; i != 8; i += 4)
1681       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1682           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1683           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1684           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1685           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1686           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1687           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1688           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1689         return false;
1690   }
1691   return true;
1692 }
1693 
1694 /// isVMerge - Common function, used to match vmrg* shuffles.
1695 ///
1696 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1697                      unsigned LHSStart, unsigned RHSStart) {
1698   if (N->getValueType(0) != MVT::v16i8)
1699     return false;
1700   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1701          "Unsupported merge size!");
1702 
1703   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1704     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1705       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1706                              LHSStart+j+i*UnitSize) ||
1707           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1708                              RHSStart+j+i*UnitSize))
1709         return false;
1710     }
1711   return true;
1712 }
1713 
1714 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1715 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1716 /// The ShuffleKind distinguishes between big-endian merges with two
1717 /// different inputs (0), either-endian merges with two identical inputs (1),
1718 /// and little-endian merges with two different inputs (2).  For the latter,
1719 /// the input operands are swapped (see PPCInstrAltivec.td).
1720 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1721                              unsigned ShuffleKind, SelectionDAG &DAG) {
1722   if (DAG.getDataLayout().isLittleEndian()) {
1723     if (ShuffleKind == 1) // unary
1724       return isVMerge(N, UnitSize, 0, 0);
1725     else if (ShuffleKind == 2) // swapped
1726       return isVMerge(N, UnitSize, 0, 16);
1727     else
1728       return false;
1729   } else {
1730     if (ShuffleKind == 1) // unary
1731       return isVMerge(N, UnitSize, 8, 8);
1732     else if (ShuffleKind == 0) // normal
1733       return isVMerge(N, UnitSize, 8, 24);
1734     else
1735       return false;
1736   }
1737 }
1738 
1739 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1740 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1741 /// The ShuffleKind distinguishes between big-endian merges with two
1742 /// different inputs (0), either-endian merges with two identical inputs (1),
1743 /// and little-endian merges with two different inputs (2).  For the latter,
1744 /// the input operands are swapped (see PPCInstrAltivec.td).
1745 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1746                              unsigned ShuffleKind, SelectionDAG &DAG) {
1747   if (DAG.getDataLayout().isLittleEndian()) {
1748     if (ShuffleKind == 1) // unary
1749       return isVMerge(N, UnitSize, 8, 8);
1750     else if (ShuffleKind == 2) // swapped
1751       return isVMerge(N, UnitSize, 8, 24);
1752     else
1753       return false;
1754   } else {
1755     if (ShuffleKind == 1) // unary
1756       return isVMerge(N, UnitSize, 0, 0);
1757     else if (ShuffleKind == 0) // normal
1758       return isVMerge(N, UnitSize, 0, 16);
1759     else
1760       return false;
1761   }
1762 }
1763 
1764 /**
1765  * Common function used to match vmrgew and vmrgow shuffles
1766  *
1767  * The indexOffset determines whether to look for even or odd words in
1768  * the shuffle mask. This is based on the of the endianness of the target
1769  * machine.
1770  *   - Little Endian:
1771  *     - Use offset of 0 to check for odd elements
1772  *     - Use offset of 4 to check for even elements
1773  *   - Big Endian:
1774  *     - Use offset of 0 to check for even elements
1775  *     - Use offset of 4 to check for odd elements
1776  * A detailed description of the vector element ordering for little endian and
1777  * big endian can be found at
1778  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1779  * Targeting your applications - what little endian and big endian IBM XL C/C++
1780  * compiler differences mean to you
1781  *
1782  * The mask to the shuffle vector instruction specifies the indices of the
1783  * elements from the two input vectors to place in the result. The elements are
1784  * numbered in array-access order, starting with the first vector. These vectors
1785  * are always of type v16i8, thus each vector will contain 16 elements of size
1786  * 8. More info on the shuffle vector can be found in the
1787  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1788  * Language Reference.
1789  *
1790  * The RHSStartValue indicates whether the same input vectors are used (unary)
1791  * or two different input vectors are used, based on the following:
1792  *   - If the instruction uses the same vector for both inputs, the range of the
1793  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1794  *     be 0.
1795  *   - If the instruction has two different vectors then the range of the
1796  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1797  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1798  *     to 31 specify elements in the second vector).
1799  *
1800  * \param[in] N The shuffle vector SD Node to analyze
1801  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1802  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1803  * vector to the shuffle_vector instruction
1804  * \return true iff this shuffle vector represents an even or odd word merge
1805  */
1806 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1807                      unsigned RHSStartValue) {
1808   if (N->getValueType(0) != MVT::v16i8)
1809     return false;
1810 
1811   for (unsigned i = 0; i < 2; ++i)
1812     for (unsigned j = 0; j < 4; ++j)
1813       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1814                              i*RHSStartValue+j+IndexOffset) ||
1815           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1816                              i*RHSStartValue+j+IndexOffset+8))
1817         return false;
1818   return true;
1819 }
1820 
1821 /**
1822  * Determine if the specified shuffle mask is suitable for the vmrgew or
1823  * vmrgow instructions.
1824  *
1825  * \param[in] N The shuffle vector SD Node to analyze
1826  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1827  * \param[in] ShuffleKind Identify the type of merge:
1828  *   - 0 = big-endian merge with two different inputs;
1829  *   - 1 = either-endian merge with two identical inputs;
1830  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1831  *     little-endian merges).
1832  * \param[in] DAG The current SelectionDAG
1833  * \return true iff this shuffle mask
1834  */
1835 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1836                               unsigned ShuffleKind, SelectionDAG &DAG) {
1837   if (DAG.getDataLayout().isLittleEndian()) {
1838     unsigned indexOffset = CheckEven ? 4 : 0;
1839     if (ShuffleKind == 1) // Unary
1840       return isVMerge(N, indexOffset, 0);
1841     else if (ShuffleKind == 2) // swapped
1842       return isVMerge(N, indexOffset, 16);
1843     else
1844       return false;
1845   }
1846   else {
1847     unsigned indexOffset = CheckEven ? 0 : 4;
1848     if (ShuffleKind == 1) // Unary
1849       return isVMerge(N, indexOffset, 0);
1850     else if (ShuffleKind == 0) // Normal
1851       return isVMerge(N, indexOffset, 16);
1852     else
1853       return false;
1854   }
1855   return false;
1856 }
1857 
1858 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1859 /// amount, otherwise return -1.
1860 /// The ShuffleKind distinguishes between big-endian operations with two
1861 /// different inputs (0), either-endian operations with two identical inputs
1862 /// (1), and little-endian operations with two different inputs (2).  For the
1863 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1864 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1865                              SelectionDAG &DAG) {
1866   if (N->getValueType(0) != MVT::v16i8)
1867     return -1;
1868 
1869   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1870 
1871   // Find the first non-undef value in the shuffle mask.
1872   unsigned i;
1873   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1874     /*search*/;
1875 
1876   if (i == 16) return -1;  // all undef.
1877 
1878   // Otherwise, check to see if the rest of the elements are consecutively
1879   // numbered from this value.
1880   unsigned ShiftAmt = SVOp->getMaskElt(i);
1881   if (ShiftAmt < i) return -1;
1882 
1883   ShiftAmt -= i;
1884   bool isLE = DAG.getDataLayout().isLittleEndian();
1885 
1886   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1887     // Check the rest of the elements to see if they are consecutive.
1888     for (++i; i != 16; ++i)
1889       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1890         return -1;
1891   } else if (ShuffleKind == 1) {
1892     // Check the rest of the elements to see if they are consecutive.
1893     for (++i; i != 16; ++i)
1894       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1895         return -1;
1896   } else
1897     return -1;
1898 
1899   if (isLE)
1900     ShiftAmt = 16 - ShiftAmt;
1901 
1902   return ShiftAmt;
1903 }
1904 
1905 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1906 /// specifies a splat of a single element that is suitable for input to
1907 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1908 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1909   assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1910          EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
1911 
1912   // The consecutive indices need to specify an element, not part of two
1913   // different elements.  So abandon ship early if this isn't the case.
1914   if (N->getMaskElt(0) % EltSize != 0)
1915     return false;
1916 
1917   // This is a splat operation if each element of the permute is the same, and
1918   // if the value doesn't reference the second vector.
1919   unsigned ElementBase = N->getMaskElt(0);
1920 
1921   // FIXME: Handle UNDEF elements too!
1922   if (ElementBase >= 16)
1923     return false;
1924 
1925   // Check that the indices are consecutive, in the case of a multi-byte element
1926   // splatted with a v16i8 mask.
1927   for (unsigned i = 1; i != EltSize; ++i)
1928     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1929       return false;
1930 
1931   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1932     if (N->getMaskElt(i) < 0) continue;
1933     for (unsigned j = 0; j != EltSize; ++j)
1934       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1935         return false;
1936   }
1937   return true;
1938 }
1939 
1940 /// Check that the mask is shuffling N byte elements. Within each N byte
1941 /// element of the mask, the indices could be either in increasing or
1942 /// decreasing order as long as they are consecutive.
1943 /// \param[in] N the shuffle vector SD Node to analyze
1944 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1945 /// Word/DoubleWord/QuadWord).
1946 /// \param[in] StepLen the delta indices number among the N byte element, if
1947 /// the mask is in increasing/decreasing order then it is 1/-1.
1948 /// \return true iff the mask is shuffling N byte elements.
1949 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1950                                    int StepLen) {
1951   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1952          "Unexpected element width.");
1953   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1954 
1955   unsigned NumOfElem = 16 / Width;
1956   unsigned MaskVal[16]; //  Width is never greater than 16
1957   for (unsigned i = 0; i < NumOfElem; ++i) {
1958     MaskVal[0] = N->getMaskElt(i * Width);
1959     if ((StepLen == 1) && (MaskVal[0] % Width)) {
1960       return false;
1961     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1962       return false;
1963     }
1964 
1965     for (unsigned int j = 1; j < Width; ++j) {
1966       MaskVal[j] = N->getMaskElt(i * Width + j);
1967       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1968         return false;
1969       }
1970     }
1971   }
1972 
1973   return true;
1974 }
1975 
1976 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1977                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1978   if (!isNByteElemShuffleMask(N, 4, 1))
1979     return false;
1980 
1981   // Now we look at mask elements 0,4,8,12
1982   unsigned M0 = N->getMaskElt(0) / 4;
1983   unsigned M1 = N->getMaskElt(4) / 4;
1984   unsigned M2 = N->getMaskElt(8) / 4;
1985   unsigned M3 = N->getMaskElt(12) / 4;
1986   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1987   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1988 
1989   // Below, let H and L be arbitrary elements of the shuffle mask
1990   // where H is in the range [4,7] and L is in the range [0,3].
1991   // H, 1, 2, 3 or L, 5, 6, 7
1992   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1993       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1994     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1995     InsertAtByte = IsLE ? 12 : 0;
1996     Swap = M0 < 4;
1997     return true;
1998   }
1999   // 0, H, 2, 3 or 4, L, 6, 7
2000   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2001       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2002     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2003     InsertAtByte = IsLE ? 8 : 4;
2004     Swap = M1 < 4;
2005     return true;
2006   }
2007   // 0, 1, H, 3 or 4, 5, L, 7
2008   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2009       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2010     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2011     InsertAtByte = IsLE ? 4 : 8;
2012     Swap = M2 < 4;
2013     return true;
2014   }
2015   // 0, 1, 2, H or 4, 5, 6, L
2016   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2017       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2018     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2019     InsertAtByte = IsLE ? 0 : 12;
2020     Swap = M3 < 4;
2021     return true;
2022   }
2023 
2024   // If both vector operands for the shuffle are the same vector, the mask will
2025   // contain only elements from the first one and the second one will be undef.
2026   if (N->getOperand(1).isUndef()) {
2027     ShiftElts = 0;
2028     Swap = true;
2029     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2030     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2031       InsertAtByte = IsLE ? 12 : 0;
2032       return true;
2033     }
2034     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2035       InsertAtByte = IsLE ? 8 : 4;
2036       return true;
2037     }
2038     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2039       InsertAtByte = IsLE ? 4 : 8;
2040       return true;
2041     }
2042     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2043       InsertAtByte = IsLE ? 0 : 12;
2044       return true;
2045     }
2046   }
2047 
2048   return false;
2049 }
2050 
2051 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2052                                bool &Swap, bool IsLE) {
2053   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2054   // Ensure each byte index of the word is consecutive.
2055   if (!isNByteElemShuffleMask(N, 4, 1))
2056     return false;
2057 
2058   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2059   unsigned M0 = N->getMaskElt(0) / 4;
2060   unsigned M1 = N->getMaskElt(4) / 4;
2061   unsigned M2 = N->getMaskElt(8) / 4;
2062   unsigned M3 = N->getMaskElt(12) / 4;
2063 
2064   // If both vector operands for the shuffle are the same vector, the mask will
2065   // contain only elements from the first one and the second one will be undef.
2066   if (N->getOperand(1).isUndef()) {
2067     assert(M0 < 4 && "Indexing into an undef vector?");
2068     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2069       return false;
2070 
2071     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2072     Swap = false;
2073     return true;
2074   }
2075 
2076   // Ensure each word index of the ShuffleVector Mask is consecutive.
2077   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2078     return false;
2079 
2080   if (IsLE) {
2081     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2082       // Input vectors don't need to be swapped if the leading element
2083       // of the result is one of the 3 left elements of the second vector
2084       // (or if there is no shift to be done at all).
2085       Swap = false;
2086       ShiftElts = (8 - M0) % 8;
2087     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2088       // Input vectors need to be swapped if the leading element
2089       // of the result is one of the 3 left elements of the first vector
2090       // (or if we're shifting by 4 - thereby simply swapping the vectors).
2091       Swap = true;
2092       ShiftElts = (4 - M0) % 4;
2093     }
2094 
2095     return true;
2096   } else {                                          // BE
2097     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2098       // Input vectors don't need to be swapped if the leading element
2099       // of the result is one of the 4 elements of the first vector.
2100       Swap = false;
2101       ShiftElts = M0;
2102     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2103       // Input vectors need to be swapped if the leading element
2104       // of the result is one of the 4 elements of the right vector.
2105       Swap = true;
2106       ShiftElts = M0 - 4;
2107     }
2108 
2109     return true;
2110   }
2111 }
2112 
2113 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2114   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2115 
2116   if (!isNByteElemShuffleMask(N, Width, -1))
2117     return false;
2118 
2119   for (int i = 0; i < 16; i += Width)
2120     if (N->getMaskElt(i) != i + Width - 1)
2121       return false;
2122 
2123   return true;
2124 }
2125 
2126 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2127   return isXXBRShuffleMaskHelper(N, 2);
2128 }
2129 
2130 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2131   return isXXBRShuffleMaskHelper(N, 4);
2132 }
2133 
2134 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2135   return isXXBRShuffleMaskHelper(N, 8);
2136 }
2137 
2138 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2139   return isXXBRShuffleMaskHelper(N, 16);
2140 }
2141 
2142 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2143 /// if the inputs to the instruction should be swapped and set \p DM to the
2144 /// value for the immediate.
2145 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2146 /// AND element 0 of the result comes from the first input (LE) or second input
2147 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2148 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2149 /// mask.
2150 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2151                                bool &Swap, bool IsLE) {
2152   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2153 
2154   // Ensure each byte index of the double word is consecutive.
2155   if (!isNByteElemShuffleMask(N, 8, 1))
2156     return false;
2157 
2158   unsigned M0 = N->getMaskElt(0) / 8;
2159   unsigned M1 = N->getMaskElt(8) / 8;
2160   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2161 
2162   // If both vector operands for the shuffle are the same vector, the mask will
2163   // contain only elements from the first one and the second one will be undef.
2164   if (N->getOperand(1).isUndef()) {
2165     if ((M0 | M1) < 2) {
2166       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2167       Swap = false;
2168       return true;
2169     } else
2170       return false;
2171   }
2172 
2173   if (IsLE) {
2174     if (M0 > 1 && M1 < 2) {
2175       Swap = false;
2176     } else if (M0 < 2 && M1 > 1) {
2177       M0 = (M0 + 2) % 4;
2178       M1 = (M1 + 2) % 4;
2179       Swap = true;
2180     } else
2181       return false;
2182 
2183     // Note: if control flow comes here that means Swap is already set above
2184     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2185     return true;
2186   } else { // BE
2187     if (M0 < 2 && M1 > 1) {
2188       Swap = false;
2189     } else if (M0 > 1 && M1 < 2) {
2190       M0 = (M0 + 2) % 4;
2191       M1 = (M1 + 2) % 4;
2192       Swap = true;
2193     } else
2194       return false;
2195 
2196     // Note: if control flow comes here that means Swap is already set above
2197     DM = (M0 << 1) + (M1 & 1);
2198     return true;
2199   }
2200 }
2201 
2202 
2203 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2204 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2205 /// elements are counted from the left of the vector register).
2206 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2207                                          SelectionDAG &DAG) {
2208   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2209   assert(isSplatShuffleMask(SVOp, EltSize));
2210   if (DAG.getDataLayout().isLittleEndian())
2211     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2212   else
2213     return SVOp->getMaskElt(0) / EltSize;
2214 }
2215 
2216 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2217 /// by using a vspltis[bhw] instruction of the specified element size, return
2218 /// the constant being splatted.  The ByteSize field indicates the number of
2219 /// bytes of each element [124] -> [bhw].
2220 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2221   SDValue OpVal(nullptr, 0);
2222 
2223   // If ByteSize of the splat is bigger than the element size of the
2224   // build_vector, then we have a case where we are checking for a splat where
2225   // multiple elements of the buildvector are folded together into a single
2226   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2227   unsigned EltSize = 16/N->getNumOperands();
2228   if (EltSize < ByteSize) {
2229     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2230     SDValue UniquedVals[4];
2231     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2232 
2233     // See if all of the elements in the buildvector agree across.
2234     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2235       if (N->getOperand(i).isUndef()) continue;
2236       // If the element isn't a constant, bail fully out.
2237       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2238 
2239       if (!UniquedVals[i&(Multiple-1)].getNode())
2240         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2241       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2242         return SDValue();  // no match.
2243     }
2244 
2245     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2246     // either constant or undef values that are identical for each chunk.  See
2247     // if these chunks can form into a larger vspltis*.
2248 
2249     // Check to see if all of the leading entries are either 0 or -1.  If
2250     // neither, then this won't fit into the immediate field.
2251     bool LeadingZero = true;
2252     bool LeadingOnes = true;
2253     for (unsigned i = 0; i != Multiple-1; ++i) {
2254       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2255 
2256       LeadingZero &= isNullConstant(UniquedVals[i]);
2257       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2258     }
2259     // Finally, check the least significant entry.
2260     if (LeadingZero) {
2261       if (!UniquedVals[Multiple-1].getNode())
2262         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2263       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2264       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2265         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2266     }
2267     if (LeadingOnes) {
2268       if (!UniquedVals[Multiple-1].getNode())
2269         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2270       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2271       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2272         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2273     }
2274 
2275     return SDValue();
2276   }
2277 
2278   // Check to see if this buildvec has a single non-undef value in its elements.
2279   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2280     if (N->getOperand(i).isUndef()) continue;
2281     if (!OpVal.getNode())
2282       OpVal = N->getOperand(i);
2283     else if (OpVal != N->getOperand(i))
2284       return SDValue();
2285   }
2286 
2287   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2288 
2289   unsigned ValSizeInBytes = EltSize;
2290   uint64_t Value = 0;
2291   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2292     Value = CN->getZExtValue();
2293   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2294     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2295     Value = FloatToBits(CN->getValueAPF().convertToFloat());
2296   }
2297 
2298   // If the splat value is larger than the element value, then we can never do
2299   // this splat.  The only case that we could fit the replicated bits into our
2300   // immediate field for would be zero, and we prefer to use vxor for it.
2301   if (ValSizeInBytes < ByteSize) return SDValue();
2302 
2303   // If the element value is larger than the splat value, check if it consists
2304   // of a repeated bit pattern of size ByteSize.
2305   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2306     return SDValue();
2307 
2308   // Properly sign extend the value.
2309   int MaskVal = SignExtend32(Value, ByteSize * 8);
2310 
2311   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2312   if (MaskVal == 0) return SDValue();
2313 
2314   // Finally, if this value fits in a 5 bit sext field, return it
2315   if (SignExtend32<5>(MaskVal) == MaskVal)
2316     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2317   return SDValue();
2318 }
2319 
2320 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2321 /// amount, otherwise return -1.
2322 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2323   EVT VT = N->getValueType(0);
2324   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2325     return -1;
2326 
2327   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2328 
2329   // Find the first non-undef value in the shuffle mask.
2330   unsigned i;
2331   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2332     /*search*/;
2333 
2334   if (i == 4) return -1;  // all undef.
2335 
2336   // Otherwise, check to see if the rest of the elements are consecutively
2337   // numbered from this value.
2338   unsigned ShiftAmt = SVOp->getMaskElt(i);
2339   if (ShiftAmt < i) return -1;
2340   ShiftAmt -= i;
2341 
2342   // Check the rest of the elements to see if they are consecutive.
2343   for (++i; i != 4; ++i)
2344     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2345       return -1;
2346 
2347   return ShiftAmt;
2348 }
2349 
2350 //===----------------------------------------------------------------------===//
2351 //  Addressing Mode Selection
2352 //===----------------------------------------------------------------------===//
2353 
2354 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2355 /// or 64-bit immediate, and if the value can be accurately represented as a
2356 /// sign extension from a 16-bit value.  If so, this returns true and the
2357 /// immediate.
2358 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2359   if (!isa<ConstantSDNode>(N))
2360     return false;
2361 
2362   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2363   if (N->getValueType(0) == MVT::i32)
2364     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2365   else
2366     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2367 }
2368 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2369   return isIntS16Immediate(Op.getNode(), Imm);
2370 }
2371 
2372 
2373 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2374 /// be represented as an indexed [r+r] operation.
2375 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2376                                                SDValue &Index,
2377                                                SelectionDAG &DAG) const {
2378   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2379       UI != E; ++UI) {
2380     if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2381       if (Memop->getMemoryVT() == MVT::f64) {
2382           Base = N.getOperand(0);
2383           Index = N.getOperand(1);
2384           return true;
2385       }
2386     }
2387   }
2388   return false;
2389 }
2390 
2391 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2392 /// can be represented as an indexed [r+r] operation.  Returns false if it
2393 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2394 /// non-zero and N can be represented by a base register plus a signed 16-bit
2395 /// displacement, make a more precise judgement by checking (displacement % \p
2396 /// EncodingAlignment).
2397 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2398                                             SDValue &Index, SelectionDAG &DAG,
2399                                             unsigned EncodingAlignment) const {
2400   // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2401   // a [pc+imm].
2402   if (SelectAddressPCRel(N, Base))
2403     return false;
2404 
2405   int16_t imm = 0;
2406   if (N.getOpcode() == ISD::ADD) {
2407     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2408     // SPE load/store can only handle 8-bit offsets.
2409     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2410         return true;
2411     if (isIntS16Immediate(N.getOperand(1), imm) &&
2412         (!EncodingAlignment || !(imm % EncodingAlignment)))
2413       return false; // r+i
2414     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2415       return false;    // r+i
2416 
2417     Base = N.getOperand(0);
2418     Index = N.getOperand(1);
2419     return true;
2420   } else if (N.getOpcode() == ISD::OR) {
2421     if (isIntS16Immediate(N.getOperand(1), imm) &&
2422         (!EncodingAlignment || !(imm % EncodingAlignment)))
2423       return false; // r+i can fold it if we can.
2424 
2425     // If this is an or of disjoint bitfields, we can codegen this as an add
2426     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2427     // disjoint.
2428     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2429 
2430     if (LHSKnown.Zero.getBoolValue()) {
2431       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2432       // If all of the bits are known zero on the LHS or RHS, the add won't
2433       // carry.
2434       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2435         Base = N.getOperand(0);
2436         Index = N.getOperand(1);
2437         return true;
2438       }
2439     }
2440   }
2441 
2442   return false;
2443 }
2444 
2445 // If we happen to be doing an i64 load or store into a stack slot that has
2446 // less than a 4-byte alignment, then the frame-index elimination may need to
2447 // use an indexed load or store instruction (because the offset may not be a
2448 // multiple of 4). The extra register needed to hold the offset comes from the
2449 // register scavenger, and it is possible that the scavenger will need to use
2450 // an emergency spill slot. As a result, we need to make sure that a spill slot
2451 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2452 // stack slot.
2453 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2454   // FIXME: This does not handle the LWA case.
2455   if (VT != MVT::i64)
2456     return;
2457 
2458   // NOTE: We'll exclude negative FIs here, which come from argument
2459   // lowering, because there are no known test cases triggering this problem
2460   // using packed structures (or similar). We can remove this exclusion if
2461   // we find such a test case. The reason why this is so test-case driven is
2462   // because this entire 'fixup' is only to prevent crashes (from the
2463   // register scavenger) on not-really-valid inputs. For example, if we have:
2464   //   %a = alloca i1
2465   //   %b = bitcast i1* %a to i64*
2466   //   store i64* a, i64 b
2467   // then the store should really be marked as 'align 1', but is not. If it
2468   // were marked as 'align 1' then the indexed form would have been
2469   // instruction-selected initially, and the problem this 'fixup' is preventing
2470   // won't happen regardless.
2471   if (FrameIdx < 0)
2472     return;
2473 
2474   MachineFunction &MF = DAG.getMachineFunction();
2475   MachineFrameInfo &MFI = MF.getFrameInfo();
2476 
2477   if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2478     return;
2479 
2480   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2481   FuncInfo->setHasNonRISpills();
2482 }
2483 
2484 /// Returns true if the address N can be represented by a base register plus
2485 /// a signed 16-bit displacement [r+imm], and if it is not better
2486 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2487 /// displacements that are multiples of that value.
2488 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2489                                             SDValue &Base,
2490                                             SelectionDAG &DAG,
2491                                             unsigned EncodingAlignment) const {
2492   // FIXME dl should come from parent load or store, not from address
2493   SDLoc dl(N);
2494 
2495   // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2496   // a [pc+imm].
2497   if (SelectAddressPCRel(N, Base))
2498     return false;
2499 
2500   // If this can be more profitably realized as r+r, fail.
2501   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2502     return false;
2503 
2504   if (N.getOpcode() == ISD::ADD) {
2505     int16_t imm = 0;
2506     if (isIntS16Immediate(N.getOperand(1), imm) &&
2507         (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2508       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2509       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2510         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2511         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2512       } else {
2513         Base = N.getOperand(0);
2514       }
2515       return true; // [r+i]
2516     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2517       // Match LOAD (ADD (X, Lo(G))).
2518       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2519              && "Cannot handle constant offsets yet!");
2520       Disp = N.getOperand(1).getOperand(0);  // The global address.
2521       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2522              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2523              Disp.getOpcode() == ISD::TargetConstantPool ||
2524              Disp.getOpcode() == ISD::TargetJumpTable);
2525       Base = N.getOperand(0);
2526       return true;  // [&g+r]
2527     }
2528   } else if (N.getOpcode() == ISD::OR) {
2529     int16_t imm = 0;
2530     if (isIntS16Immediate(N.getOperand(1), imm) &&
2531         (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2532       // If this is an or of disjoint bitfields, we can codegen this as an add
2533       // (for better address arithmetic) if the LHS and RHS of the OR are
2534       // provably disjoint.
2535       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2536 
2537       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2538         // If all of the bits are known zero on the LHS or RHS, the add won't
2539         // carry.
2540         if (FrameIndexSDNode *FI =
2541               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2542           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2543           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2544         } else {
2545           Base = N.getOperand(0);
2546         }
2547         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2548         return true;
2549       }
2550     }
2551   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2552     // Loading from a constant address.
2553 
2554     // If this address fits entirely in a 16-bit sext immediate field, codegen
2555     // this as "d, 0"
2556     int16_t Imm;
2557     if (isIntS16Immediate(CN, Imm) &&
2558         (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
2559       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2560       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2561                              CN->getValueType(0));
2562       return true;
2563     }
2564 
2565     // Handle 32-bit sext immediates with LIS + addr mode.
2566     if ((CN->getValueType(0) == MVT::i32 ||
2567          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2568         (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
2569       int Addr = (int)CN->getZExtValue();
2570 
2571       // Otherwise, break this down into an LIS + disp.
2572       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2573 
2574       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2575                                    MVT::i32);
2576       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2577       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2578       return true;
2579     }
2580   }
2581 
2582   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2583   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2584     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2585     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2586   } else
2587     Base = N;
2588   return true;      // [r+0]
2589 }
2590 
2591 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2592 /// represented as an indexed [r+r] operation.
2593 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2594                                                 SDValue &Index,
2595                                                 SelectionDAG &DAG) const {
2596   // Check to see if we can easily represent this as an [r+r] address.  This
2597   // will fail if it thinks that the address is more profitably represented as
2598   // reg+imm, e.g. where imm = 0.
2599   if (SelectAddressRegReg(N, Base, Index, DAG))
2600     return true;
2601 
2602   // If the address is the result of an add, we will utilize the fact that the
2603   // address calculation includes an implicit add.  However, we can reduce
2604   // register pressure if we do not materialize a constant just for use as the
2605   // index register.  We only get rid of the add if it is not an add of a
2606   // value and a 16-bit signed constant and both have a single use.
2607   int16_t imm = 0;
2608   if (N.getOpcode() == ISD::ADD &&
2609       (!isIntS16Immediate(N.getOperand(1), imm) ||
2610        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2611     Base = N.getOperand(0);
2612     Index = N.getOperand(1);
2613     return true;
2614   }
2615 
2616   // Otherwise, do it the hard way, using R0 as the base register.
2617   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2618                          N.getValueType());
2619   Index = N;
2620   return true;
2621 }
2622 
2623 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2624   Ty *PCRelCand = dyn_cast<Ty>(N);
2625   return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2626 }
2627 
2628 /// Returns true if this address is a PC Relative address.
2629 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2630 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2631 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2632   // This is a materialize PC Relative node. Always select this as PC Relative.
2633   Base = N;
2634   if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2635     return true;
2636   if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2637       isValidPCRelNode<GlobalAddressSDNode>(N) ||
2638       isValidPCRelNode<JumpTableSDNode>(N) ||
2639       isValidPCRelNode<BlockAddressSDNode>(N))
2640     return true;
2641   return false;
2642 }
2643 
2644 /// Returns true if we should use a direct load into vector instruction
2645 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2646 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2647 
2648   // If there are any other uses other than scalar to vector, then we should
2649   // keep it as a scalar load -> direct move pattern to prevent multiple
2650   // loads.
2651   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2652   if (!LD)
2653     return false;
2654 
2655   EVT MemVT = LD->getMemoryVT();
2656   if (!MemVT.isSimple())
2657     return false;
2658   switch(MemVT.getSimpleVT().SimpleTy) {
2659   case MVT::i64:
2660     break;
2661   case MVT::i32:
2662     if (!ST.hasP8Vector())
2663       return false;
2664     break;
2665   case MVT::i16:
2666   case MVT::i8:
2667     if (!ST.hasP9Vector())
2668       return false;
2669     break;
2670   default:
2671     return false;
2672   }
2673 
2674   SDValue LoadedVal(N, 0);
2675   if (!LoadedVal.hasOneUse())
2676     return false;
2677 
2678   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2679        UI != UE; ++UI)
2680     if (UI.getUse().get().getResNo() == 0 &&
2681         UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
2682       return false;
2683 
2684   return true;
2685 }
2686 
2687 /// getPreIndexedAddressParts - returns true by value, base pointer and
2688 /// offset pointer and addressing mode by reference if the node's address
2689 /// can be legally represented as pre-indexed load / store address.
2690 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2691                                                   SDValue &Offset,
2692                                                   ISD::MemIndexedMode &AM,
2693                                                   SelectionDAG &DAG) const {
2694   if (DisablePPCPreinc) return false;
2695 
2696   bool isLoad = true;
2697   SDValue Ptr;
2698   EVT VT;
2699   unsigned Alignment;
2700   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2701     Ptr = LD->getBasePtr();
2702     VT = LD->getMemoryVT();
2703     Alignment = LD->getAlignment();
2704   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2705     Ptr = ST->getBasePtr();
2706     VT  = ST->getMemoryVT();
2707     Alignment = ST->getAlignment();
2708     isLoad = false;
2709   } else
2710     return false;
2711 
2712   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2713   // instructions because we can fold these into a more efficient instruction
2714   // instead, (such as LXSD).
2715   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2716     return false;
2717   }
2718 
2719   // PowerPC doesn't have preinc load/store instructions for vectors (except
2720   // for QPX, which does have preinc r+r forms).
2721   if (VT.isVector()) {
2722     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2723       return false;
2724     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2725       AM = ISD::PRE_INC;
2726       return true;
2727     }
2728   }
2729 
2730   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2731     // Common code will reject creating a pre-inc form if the base pointer
2732     // is a frame index, or if N is a store and the base pointer is either
2733     // the same as or a predecessor of the value being stored.  Check for
2734     // those situations here, and try with swapped Base/Offset instead.
2735     bool Swap = false;
2736 
2737     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2738       Swap = true;
2739     else if (!isLoad) {
2740       SDValue Val = cast<StoreSDNode>(N)->getValue();
2741       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2742         Swap = true;
2743     }
2744 
2745     if (Swap)
2746       std::swap(Base, Offset);
2747 
2748     AM = ISD::PRE_INC;
2749     return true;
2750   }
2751 
2752   // LDU/STU can only handle immediates that are a multiple of 4.
2753   if (VT != MVT::i64) {
2754     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2755       return false;
2756   } else {
2757     // LDU/STU need an address with at least 4-byte alignment.
2758     if (Alignment < 4)
2759       return false;
2760 
2761     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
2762       return false;
2763   }
2764 
2765   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2766     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2767     // sext i32 to i64 when addr mode is r+i.
2768     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2769         LD->getExtensionType() == ISD::SEXTLOAD &&
2770         isa<ConstantSDNode>(Offset))
2771       return false;
2772   }
2773 
2774   AM = ISD::PRE_INC;
2775   return true;
2776 }
2777 
2778 //===----------------------------------------------------------------------===//
2779 //  LowerOperation implementation
2780 //===----------------------------------------------------------------------===//
2781 
2782 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2783 /// and LoOpFlags to the target MO flags.
2784 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2785                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2786                                const GlobalValue *GV = nullptr) {
2787   HiOpFlags = PPCII::MO_HA;
2788   LoOpFlags = PPCII::MO_LO;
2789 
2790   // Don't use the pic base if not in PIC relocation model.
2791   if (IsPIC) {
2792     HiOpFlags |= PPCII::MO_PIC_FLAG;
2793     LoOpFlags |= PPCII::MO_PIC_FLAG;
2794   }
2795 }
2796 
2797 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2798                              SelectionDAG &DAG) {
2799   SDLoc DL(HiPart);
2800   EVT PtrVT = HiPart.getValueType();
2801   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2802 
2803   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2804   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2805 
2806   // With PIC, the first instruction is actually "GR+hi(&G)".
2807   if (isPIC)
2808     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2809                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2810 
2811   // Generate non-pic code that has direct accesses to the constant pool.
2812   // The address of the global is just (hi(&g)+lo(&g)).
2813   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2814 }
2815 
2816 static void setUsesTOCBasePtr(MachineFunction &MF) {
2817   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2818   FuncInfo->setUsesTOCBasePtr();
2819 }
2820 
2821 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2822   setUsesTOCBasePtr(DAG.getMachineFunction());
2823 }
2824 
2825 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2826                                        SDValue GA) const {
2827   const bool Is64Bit = Subtarget.isPPC64();
2828   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2829   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2830                         : Subtarget.isAIXABI()
2831                               ? DAG.getRegister(PPC::R2, VT)
2832                               : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2833   SDValue Ops[] = { GA, Reg };
2834   return DAG.getMemIntrinsicNode(
2835       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2836       MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2837       MachineMemOperand::MOLoad);
2838 }
2839 
2840 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2841                                              SelectionDAG &DAG) const {
2842   EVT PtrVT = Op.getValueType();
2843   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2844   const Constant *C = CP->getConstVal();
2845 
2846   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2847   // The actual address of the GlobalValue is stored in the TOC.
2848   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2849     if (Subtarget.isUsingPCRelativeCalls()) {
2850       SDLoc DL(CP);
2851       EVT Ty = getPointerTy(DAG.getDataLayout());
2852       SDValue ConstPool = DAG.getTargetConstantPool(
2853           C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2854       return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2855     }
2856     setUsesTOCBasePtr(DAG);
2857     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2858     return getTOCEntry(DAG, SDLoc(CP), GA);
2859   }
2860 
2861   unsigned MOHiFlag, MOLoFlag;
2862   bool IsPIC = isPositionIndependent();
2863   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2864 
2865   if (IsPIC && Subtarget.isSVR4ABI()) {
2866     SDValue GA =
2867         DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2868     return getTOCEntry(DAG, SDLoc(CP), GA);
2869   }
2870 
2871   SDValue CPIHi =
2872       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2873   SDValue CPILo =
2874       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2875   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2876 }
2877 
2878 // For 64-bit PowerPC, prefer the more compact relative encodings.
2879 // This trades 32 bits per jump table entry for one or two instructions
2880 // on the jump site.
2881 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2882   if (isJumpTableRelative())
2883     return MachineJumpTableInfo::EK_LabelDifference32;
2884 
2885   return TargetLowering::getJumpTableEncoding();
2886 }
2887 
2888 bool PPCTargetLowering::isJumpTableRelative() const {
2889   if (UseAbsoluteJumpTables)
2890     return false;
2891   if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2892     return true;
2893   return TargetLowering::isJumpTableRelative();
2894 }
2895 
2896 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2897                                                     SelectionDAG &DAG) const {
2898   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2899     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2900 
2901   switch (getTargetMachine().getCodeModel()) {
2902   case CodeModel::Small:
2903   case CodeModel::Medium:
2904     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2905   default:
2906     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2907                        getPointerTy(DAG.getDataLayout()));
2908   }
2909 }
2910 
2911 const MCExpr *
2912 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2913                                                 unsigned JTI,
2914                                                 MCContext &Ctx) const {
2915   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2916     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2917 
2918   switch (getTargetMachine().getCodeModel()) {
2919   case CodeModel::Small:
2920   case CodeModel::Medium:
2921     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2922   default:
2923     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2924   }
2925 }
2926 
2927 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2928   EVT PtrVT = Op.getValueType();
2929   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2930 
2931   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2932   if (Subtarget.isUsingPCRelativeCalls()) {
2933     SDLoc DL(JT);
2934     EVT Ty = getPointerTy(DAG.getDataLayout());
2935     SDValue GA =
2936         DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
2937     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2938     return MatAddr;
2939   }
2940 
2941   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2942   // The actual address of the GlobalValue is stored in the TOC.
2943   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2944     setUsesTOCBasePtr(DAG);
2945     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2946     return getTOCEntry(DAG, SDLoc(JT), GA);
2947   }
2948 
2949   unsigned MOHiFlag, MOLoFlag;
2950   bool IsPIC = isPositionIndependent();
2951   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2952 
2953   if (IsPIC && Subtarget.isSVR4ABI()) {
2954     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2955                                         PPCII::MO_PIC_FLAG);
2956     return getTOCEntry(DAG, SDLoc(GA), GA);
2957   }
2958 
2959   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2960   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2961   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2962 }
2963 
2964 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2965                                              SelectionDAG &DAG) const {
2966   EVT PtrVT = Op.getValueType();
2967   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2968   const BlockAddress *BA = BASDN->getBlockAddress();
2969 
2970   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2971   if (Subtarget.isUsingPCRelativeCalls()) {
2972     SDLoc DL(BASDN);
2973     EVT Ty = getPointerTy(DAG.getDataLayout());
2974     SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
2975                                            PPCII::MO_PCREL_FLAG);
2976     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2977     return MatAddr;
2978   }
2979 
2980   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2981   // The actual BlockAddress is stored in the TOC.
2982   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2983     setUsesTOCBasePtr(DAG);
2984     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2985     return getTOCEntry(DAG, SDLoc(BASDN), GA);
2986   }
2987 
2988   // 32-bit position-independent ELF stores the BlockAddress in the .got.
2989   if (Subtarget.is32BitELFABI() && isPositionIndependent())
2990     return getTOCEntry(
2991         DAG, SDLoc(BASDN),
2992         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
2993 
2994   unsigned MOHiFlag, MOLoFlag;
2995   bool IsPIC = isPositionIndependent();
2996   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2997   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2998   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2999   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3000 }
3001 
3002 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3003                                               SelectionDAG &DAG) const {
3004   // FIXME: TLS addresses currently use medium model code sequences,
3005   // which is the most useful form.  Eventually support for small and
3006   // large models could be added if users need it, at the cost of
3007   // additional complexity.
3008   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3009   if (DAG.getTarget().useEmulatedTLS())
3010     return LowerToTLSEmulatedModel(GA, DAG);
3011 
3012   SDLoc dl(GA);
3013   const GlobalValue *GV = GA->getGlobal();
3014   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3015   bool is64bit = Subtarget.isPPC64();
3016   const Module *M = DAG.getMachineFunction().getFunction().getParent();
3017   PICLevel::Level picLevel = M->getPICLevel();
3018 
3019   const TargetMachine &TM = getTargetMachine();
3020   TLSModel::Model Model = TM.getTLSModel(GV);
3021 
3022   if (Model == TLSModel::LocalExec) {
3023     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3024                                                PPCII::MO_TPREL_HA);
3025     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3026                                                PPCII::MO_TPREL_LO);
3027     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3028                              : DAG.getRegister(PPC::R2, MVT::i32);
3029 
3030     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3031     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3032   }
3033 
3034   if (Model == TLSModel::InitialExec) {
3035     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3036     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3037                                                 PPCII::MO_TLS);
3038     SDValue GOTPtr;
3039     if (is64bit) {
3040       setUsesTOCBasePtr(DAG);
3041       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3042       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
3043                            PtrVT, GOTReg, TGA);
3044     } else {
3045       if (!TM.isPositionIndependent())
3046         GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3047       else if (picLevel == PICLevel::SmallPIC)
3048         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3049       else
3050         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3051     }
3052     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
3053                                    PtrVT, TGA, GOTPtr);
3054     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3055   }
3056 
3057   if (Model == TLSModel::GeneralDynamic) {
3058     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3059     SDValue GOTPtr;
3060     if (is64bit) {
3061       setUsesTOCBasePtr(DAG);
3062       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3063       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3064                                    GOTReg, TGA);
3065     } else {
3066       if (picLevel == PICLevel::SmallPIC)
3067         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3068       else
3069         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3070     }
3071     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3072                        GOTPtr, TGA, TGA);
3073   }
3074 
3075   if (Model == TLSModel::LocalDynamic) {
3076     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3077     SDValue GOTPtr;
3078     if (is64bit) {
3079       setUsesTOCBasePtr(DAG);
3080       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3081       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3082                            GOTReg, TGA);
3083     } else {
3084       if (picLevel == PICLevel::SmallPIC)
3085         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3086       else
3087         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3088     }
3089     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3090                                   PtrVT, GOTPtr, TGA, TGA);
3091     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3092                                       PtrVT, TLSAddr, TGA);
3093     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3094   }
3095 
3096   llvm_unreachable("Unknown TLS model!");
3097 }
3098 
3099 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3100                                               SelectionDAG &DAG) const {
3101   EVT PtrVT = Op.getValueType();
3102   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3103   SDLoc DL(GSDN);
3104   const GlobalValue *GV = GSDN->getGlobal();
3105 
3106   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3107   // The actual address of the GlobalValue is stored in the TOC.
3108   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3109     if (Subtarget.isUsingPCRelativeCalls()) {
3110       EVT Ty = getPointerTy(DAG.getDataLayout());
3111       if (isAccessedAsGotIndirect(Op)) {
3112         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3113                                                 PPCII::MO_PCREL_FLAG |
3114                                                     PPCII::MO_GOT_FLAG);
3115         SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3116         SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3117                                    MachinePointerInfo());
3118         return Load;
3119       } else {
3120         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3121                                                 PPCII::MO_PCREL_FLAG);
3122         return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3123       }
3124     }
3125     setUsesTOCBasePtr(DAG);
3126     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3127     return getTOCEntry(DAG, DL, GA);
3128   }
3129 
3130   unsigned MOHiFlag, MOLoFlag;
3131   bool IsPIC = isPositionIndependent();
3132   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3133 
3134   if (IsPIC && Subtarget.isSVR4ABI()) {
3135     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3136                                             GSDN->getOffset(),
3137                                             PPCII::MO_PIC_FLAG);
3138     return getTOCEntry(DAG, DL, GA);
3139   }
3140 
3141   SDValue GAHi =
3142     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3143   SDValue GALo =
3144     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3145 
3146   return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3147 }
3148 
3149 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3150   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3151   SDLoc dl(Op);
3152 
3153   if (Op.getValueType() == MVT::v2i64) {
3154     // When the operands themselves are v2i64 values, we need to do something
3155     // special because VSX has no underlying comparison operations for these.
3156     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3157       // Equality can be handled by casting to the legal type for Altivec
3158       // comparisons, everything else needs to be expanded.
3159       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3160         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3161                  DAG.getSetCC(dl, MVT::v4i32,
3162                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3163                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3164                    CC));
3165       }
3166 
3167       return SDValue();
3168     }
3169 
3170     // We handle most of these in the usual way.
3171     return Op;
3172   }
3173 
3174   // If we're comparing for equality to zero, expose the fact that this is
3175   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3176   // fold the new nodes.
3177   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3178     return V;
3179 
3180   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3181     // Leave comparisons against 0 and -1 alone for now, since they're usually
3182     // optimized.  FIXME: revisit this when we can custom lower all setcc
3183     // optimizations.
3184     if (C->isAllOnesValue() || C->isNullValue())
3185       return SDValue();
3186   }
3187 
3188   // If we have an integer seteq/setne, turn it into a compare against zero
3189   // by xor'ing the rhs with the lhs, which is faster than setting a
3190   // condition register, reading it back out, and masking the correct bit.  The
3191   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3192   // the result to other bit-twiddling opportunities.
3193   EVT LHSVT = Op.getOperand(0).getValueType();
3194   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3195     EVT VT = Op.getValueType();
3196     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3197                                 Op.getOperand(1));
3198     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3199   }
3200   return SDValue();
3201 }
3202 
3203 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3204   SDNode *Node = Op.getNode();
3205   EVT VT = Node->getValueType(0);
3206   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3207   SDValue InChain = Node->getOperand(0);
3208   SDValue VAListPtr = Node->getOperand(1);
3209   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3210   SDLoc dl(Node);
3211 
3212   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3213 
3214   // gpr_index
3215   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3216                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3217   InChain = GprIndex.getValue(1);
3218 
3219   if (VT == MVT::i64) {
3220     // Check if GprIndex is even
3221     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3222                                  DAG.getConstant(1, dl, MVT::i32));
3223     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3224                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3225     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3226                                           DAG.getConstant(1, dl, MVT::i32));
3227     // Align GprIndex to be even if it isn't
3228     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3229                            GprIndex);
3230   }
3231 
3232   // fpr index is 1 byte after gpr
3233   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3234                                DAG.getConstant(1, dl, MVT::i32));
3235 
3236   // fpr
3237   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3238                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3239   InChain = FprIndex.getValue(1);
3240 
3241   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3242                                        DAG.getConstant(8, dl, MVT::i32));
3243 
3244   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3245                                         DAG.getConstant(4, dl, MVT::i32));
3246 
3247   // areas
3248   SDValue OverflowArea =
3249       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3250   InChain = OverflowArea.getValue(1);
3251 
3252   SDValue RegSaveArea =
3253       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3254   InChain = RegSaveArea.getValue(1);
3255 
3256   // select overflow_area if index > 8
3257   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3258                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3259 
3260   // adjustment constant gpr_index * 4/8
3261   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3262                                     VT.isInteger() ? GprIndex : FprIndex,
3263                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3264                                                     MVT::i32));
3265 
3266   // OurReg = RegSaveArea + RegConstant
3267   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3268                                RegConstant);
3269 
3270   // Floating types are 32 bytes into RegSaveArea
3271   if (VT.isFloatingPoint())
3272     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3273                          DAG.getConstant(32, dl, MVT::i32));
3274 
3275   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3276   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3277                                    VT.isInteger() ? GprIndex : FprIndex,
3278                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3279                                                    MVT::i32));
3280 
3281   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3282                               VT.isInteger() ? VAListPtr : FprPtr,
3283                               MachinePointerInfo(SV), MVT::i8);
3284 
3285   // determine if we should load from reg_save_area or overflow_area
3286   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3287 
3288   // increase overflow_area by 4/8 if gpr/fpr > 8
3289   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3290                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3291                                           dl, MVT::i32));
3292 
3293   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3294                              OverflowAreaPlusN);
3295 
3296   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3297                               MachinePointerInfo(), MVT::i32);
3298 
3299   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3300 }
3301 
3302 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3303   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3304 
3305   // We have to copy the entire va_list struct:
3306   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3307   return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3308                        DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3309                        false, true, false, MachinePointerInfo(),
3310                        MachinePointerInfo());
3311 }
3312 
3313 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3314                                                   SelectionDAG &DAG) const {
3315   if (Subtarget.isAIXABI())
3316     report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3317 
3318   return Op.getOperand(0);
3319 }
3320 
3321 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3322                                                 SelectionDAG &DAG) const {
3323   if (Subtarget.isAIXABI())
3324     report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3325 
3326   SDValue Chain = Op.getOperand(0);
3327   SDValue Trmp = Op.getOperand(1); // trampoline
3328   SDValue FPtr = Op.getOperand(2); // nested function
3329   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3330   SDLoc dl(Op);
3331 
3332   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3333   bool isPPC64 = (PtrVT == MVT::i64);
3334   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3335 
3336   TargetLowering::ArgListTy Args;
3337   TargetLowering::ArgListEntry Entry;
3338 
3339   Entry.Ty = IntPtrTy;
3340   Entry.Node = Trmp; Args.push_back(Entry);
3341 
3342   // TrampSize == (isPPC64 ? 48 : 40);
3343   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3344                                isPPC64 ? MVT::i64 : MVT::i32);
3345   Args.push_back(Entry);
3346 
3347   Entry.Node = FPtr; Args.push_back(Entry);
3348   Entry.Node = Nest; Args.push_back(Entry);
3349 
3350   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3351   TargetLowering::CallLoweringInfo CLI(DAG);
3352   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3353       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3354       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3355 
3356   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3357   return CallResult.second;
3358 }
3359 
3360 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3361   MachineFunction &MF = DAG.getMachineFunction();
3362   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3363   EVT PtrVT = getPointerTy(MF.getDataLayout());
3364 
3365   SDLoc dl(Op);
3366 
3367   if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3368     // vastart just stores the address of the VarArgsFrameIndex slot into the
3369     // memory location argument.
3370     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3371     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3372     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3373                         MachinePointerInfo(SV));
3374   }
3375 
3376   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3377   // We suppose the given va_list is already allocated.
3378   //
3379   // typedef struct {
3380   //  char gpr;     /* index into the array of 8 GPRs
3381   //                 * stored in the register save area
3382   //                 * gpr=0 corresponds to r3,
3383   //                 * gpr=1 to r4, etc.
3384   //                 */
3385   //  char fpr;     /* index into the array of 8 FPRs
3386   //                 * stored in the register save area
3387   //                 * fpr=0 corresponds to f1,
3388   //                 * fpr=1 to f2, etc.
3389   //                 */
3390   //  char *overflow_arg_area;
3391   //                /* location on stack that holds
3392   //                 * the next overflow argument
3393   //                 */
3394   //  char *reg_save_area;
3395   //               /* where r3:r10 and f1:f8 (if saved)
3396   //                * are stored
3397   //                */
3398   // } va_list[1];
3399 
3400   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3401   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3402   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3403                                             PtrVT);
3404   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3405                                  PtrVT);
3406 
3407   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3408   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3409 
3410   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3411   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3412 
3413   uint64_t FPROffset = 1;
3414   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3415 
3416   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3417 
3418   // Store first byte : number of int regs
3419   SDValue firstStore =
3420       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3421                         MachinePointerInfo(SV), MVT::i8);
3422   uint64_t nextOffset = FPROffset;
3423   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3424                                   ConstFPROffset);
3425 
3426   // Store second byte : number of float regs
3427   SDValue secondStore =
3428       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3429                         MachinePointerInfo(SV, nextOffset), MVT::i8);
3430   nextOffset += StackOffset;
3431   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3432 
3433   // Store second word : arguments given on stack
3434   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3435                                     MachinePointerInfo(SV, nextOffset));
3436   nextOffset += FrameOffset;
3437   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3438 
3439   // Store third word : arguments given in registers
3440   return DAG.getStore(thirdStore, dl, FR, nextPtr,
3441                       MachinePointerInfo(SV, nextOffset));
3442 }
3443 
3444 /// FPR - The set of FP registers that should be allocated for arguments
3445 /// on Darwin and AIX.
3446 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3447                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3448                                 PPC::F11, PPC::F12, PPC::F13};
3449 
3450 /// QFPR - The set of QPX registers that should be allocated for arguments.
3451 static const MCPhysReg QFPR[] = {
3452     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
3453     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3454 
3455 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3456 /// the stack.
3457 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3458                                        unsigned PtrByteSize) {
3459   unsigned ArgSize = ArgVT.getStoreSize();
3460   if (Flags.isByVal())
3461     ArgSize = Flags.getByValSize();
3462 
3463   // Round up to multiples of the pointer size, except for array members,
3464   // which are always packed.
3465   if (!Flags.isInConsecutiveRegs())
3466     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3467 
3468   return ArgSize;
3469 }
3470 
3471 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3472 /// on the stack.
3473 static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3474                                          ISD::ArgFlagsTy Flags,
3475                                          unsigned PtrByteSize) {
3476   Align Alignment(PtrByteSize);
3477 
3478   // Altivec parameters are padded to a 16 byte boundary.
3479   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3480       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3481       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3482       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3483     Alignment = Align(16);
3484   // QPX vector types stored in double-precision are padded to a 32 byte
3485   // boundary.
3486   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3487     Alignment = Align(32);
3488 
3489   // ByVal parameters are aligned as requested.
3490   if (Flags.isByVal()) {
3491     auto BVAlign = Flags.getNonZeroByValAlign();
3492     if (BVAlign > PtrByteSize) {
3493       if (BVAlign.value() % PtrByteSize != 0)
3494         llvm_unreachable(
3495             "ByVal alignment is not a multiple of the pointer size");
3496 
3497       Alignment = BVAlign;
3498     }
3499   }
3500 
3501   // Array members are always packed to their original alignment.
3502   if (Flags.isInConsecutiveRegs()) {
3503     // If the array member was split into multiple registers, the first
3504     // needs to be aligned to the size of the full type.  (Except for
3505     // ppcf128, which is only aligned as its f64 components.)
3506     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3507       Alignment = Align(OrigVT.getStoreSize());
3508     else
3509       Alignment = Align(ArgVT.getStoreSize());
3510   }
3511 
3512   return Alignment;
3513 }
3514 
3515 /// CalculateStackSlotUsed - Return whether this argument will use its
3516 /// stack slot (instead of being passed in registers).  ArgOffset,
3517 /// AvailableFPRs, and AvailableVRs must hold the current argument
3518 /// position, and will be updated to account for this argument.
3519 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3520                                    ISD::ArgFlagsTy Flags,
3521                                    unsigned PtrByteSize,
3522                                    unsigned LinkageSize,
3523                                    unsigned ParamAreaSize,
3524                                    unsigned &ArgOffset,
3525                                    unsigned &AvailableFPRs,
3526                                    unsigned &AvailableVRs, bool HasQPX) {
3527   bool UseMemory = false;
3528 
3529   // Respect alignment of argument on the stack.
3530   Align Alignment =
3531       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3532   ArgOffset = alignTo(ArgOffset, Alignment);
3533   // If there's no space left in the argument save area, we must
3534   // use memory (this check also catches zero-sized arguments).
3535   if (ArgOffset >= LinkageSize + ParamAreaSize)
3536     UseMemory = true;
3537 
3538   // Allocate argument on the stack.
3539   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3540   if (Flags.isInConsecutiveRegsLast())
3541     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3542   // If we overran the argument save area, we must use memory
3543   // (this check catches arguments passed partially in memory)
3544   if (ArgOffset > LinkageSize + ParamAreaSize)
3545     UseMemory = true;
3546 
3547   // However, if the argument is actually passed in an FPR or a VR,
3548   // we don't use memory after all.
3549   if (!Flags.isByVal()) {
3550     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3551         // QPX registers overlap with the scalar FP registers.
3552         (HasQPX && (ArgVT == MVT::v4f32 ||
3553                     ArgVT == MVT::v4f64 ||
3554                     ArgVT == MVT::v4i1)))
3555       if (AvailableFPRs > 0) {
3556         --AvailableFPRs;
3557         return false;
3558       }
3559     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3560         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3561         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3562         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3563       if (AvailableVRs > 0) {
3564         --AvailableVRs;
3565         return false;
3566       }
3567   }
3568 
3569   return UseMemory;
3570 }
3571 
3572 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3573 /// ensure minimum alignment required for target.
3574 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3575                                      unsigned NumBytes) {
3576   return alignTo(NumBytes, Lowering->getStackAlign());
3577 }
3578 
3579 SDValue PPCTargetLowering::LowerFormalArguments(
3580     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3581     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3582     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3583   if (Subtarget.isAIXABI())
3584     return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3585                                     InVals);
3586   if (Subtarget.is64BitELFABI())
3587     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3588                                        InVals);
3589   if (Subtarget.is32BitELFABI())
3590     return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3591                                        InVals);
3592 
3593   return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3594                                      InVals);
3595 }
3596 
3597 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3598     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3599     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3600     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3601 
3602   // 32-bit SVR4 ABI Stack Frame Layout:
3603   //              +-----------------------------------+
3604   //        +-->  |            Back chain             |
3605   //        |     +-----------------------------------+
3606   //        |     | Floating-point register save area |
3607   //        |     +-----------------------------------+
3608   //        |     |    General register save area     |
3609   //        |     +-----------------------------------+
3610   //        |     |          CR save word             |
3611   //        |     +-----------------------------------+
3612   //        |     |         VRSAVE save word          |
3613   //        |     +-----------------------------------+
3614   //        |     |         Alignment padding         |
3615   //        |     +-----------------------------------+
3616   //        |     |     Vector register save area     |
3617   //        |     +-----------------------------------+
3618   //        |     |       Local variable space        |
3619   //        |     +-----------------------------------+
3620   //        |     |        Parameter list area        |
3621   //        |     +-----------------------------------+
3622   //        |     |           LR save word            |
3623   //        |     +-----------------------------------+
3624   // SP-->  +---  |            Back chain             |
3625   //              +-----------------------------------+
3626   //
3627   // Specifications:
3628   //   System V Application Binary Interface PowerPC Processor Supplement
3629   //   AltiVec Technology Programming Interface Manual
3630 
3631   MachineFunction &MF = DAG.getMachineFunction();
3632   MachineFrameInfo &MFI = MF.getFrameInfo();
3633   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3634 
3635   EVT PtrVT = getPointerTy(MF.getDataLayout());
3636   // Potential tail calls could cause overwriting of argument stack slots.
3637   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3638                        (CallConv == CallingConv::Fast));
3639   unsigned PtrByteSize = 4;
3640 
3641   // Assign locations to all of the incoming arguments.
3642   SmallVector<CCValAssign, 16> ArgLocs;
3643   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3644                  *DAG.getContext());
3645 
3646   // Reserve space for the linkage area on the stack.
3647   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3648   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3649   if (useSoftFloat())
3650     CCInfo.PreAnalyzeFormalArguments(Ins);
3651 
3652   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3653   CCInfo.clearWasPPCF128();
3654 
3655   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3656     CCValAssign &VA = ArgLocs[i];
3657 
3658     // Arguments stored in registers.
3659     if (VA.isRegLoc()) {
3660       const TargetRegisterClass *RC;
3661       EVT ValVT = VA.getValVT();
3662 
3663       switch (ValVT.getSimpleVT().SimpleTy) {
3664         default:
3665           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3666         case MVT::i1:
3667         case MVT::i32:
3668           RC = &PPC::GPRCRegClass;
3669           break;
3670         case MVT::f32:
3671           if (Subtarget.hasP8Vector())
3672             RC = &PPC::VSSRCRegClass;
3673           else if (Subtarget.hasSPE())
3674             RC = &PPC::GPRCRegClass;
3675           else
3676             RC = &PPC::F4RCRegClass;
3677           break;
3678         case MVT::f64:
3679           if (Subtarget.hasVSX())
3680             RC = &PPC::VSFRCRegClass;
3681           else if (Subtarget.hasSPE())
3682             // SPE passes doubles in GPR pairs.
3683             RC = &PPC::GPRCRegClass;
3684           else
3685             RC = &PPC::F8RCRegClass;
3686           break;
3687         case MVT::v16i8:
3688         case MVT::v8i16:
3689         case MVT::v4i32:
3690           RC = &PPC::VRRCRegClass;
3691           break;
3692         case MVT::v4f32:
3693           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3694           break;
3695         case MVT::v2f64:
3696         case MVT::v2i64:
3697           RC = &PPC::VRRCRegClass;
3698           break;
3699         case MVT::v4f64:
3700           RC = &PPC::QFRCRegClass;
3701           break;
3702         case MVT::v4i1:
3703           RC = &PPC::QBRCRegClass;
3704           break;
3705       }
3706 
3707       SDValue ArgValue;
3708       // Transform the arguments stored in physical registers into
3709       // virtual ones.
3710       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3711         assert(i + 1 < e && "No second half of double precision argument");
3712         unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3713         unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3714         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3715         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3716         if (!Subtarget.isLittleEndian())
3717           std::swap (ArgValueLo, ArgValueHi);
3718         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3719                                ArgValueHi);
3720       } else {
3721         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3722         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3723                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
3724         if (ValVT == MVT::i1)
3725           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3726       }
3727 
3728       InVals.push_back(ArgValue);
3729     } else {
3730       // Argument stored in memory.
3731       assert(VA.isMemLoc());
3732 
3733       // Get the extended size of the argument type in stack
3734       unsigned ArgSize = VA.getLocVT().getStoreSize();
3735       // Get the actual size of the argument type
3736       unsigned ObjSize = VA.getValVT().getStoreSize();
3737       unsigned ArgOffset = VA.getLocMemOffset();
3738       // Stack objects in PPC32 are right justified.
3739       ArgOffset += ArgSize - ObjSize;
3740       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3741 
3742       // Create load nodes to retrieve arguments from the stack.
3743       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3744       InVals.push_back(
3745           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3746     }
3747   }
3748 
3749   // Assign locations to all of the incoming aggregate by value arguments.
3750   // Aggregates passed by value are stored in the local variable space of the
3751   // caller's stack frame, right above the parameter list area.
3752   SmallVector<CCValAssign, 16> ByValArgLocs;
3753   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3754                       ByValArgLocs, *DAG.getContext());
3755 
3756   // Reserve stack space for the allocations in CCInfo.
3757   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3758 
3759   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3760 
3761   // Area that is at least reserved in the caller of this function.
3762   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3763   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3764 
3765   // Set the size that is at least reserved in caller of this function.  Tail
3766   // call optimized function's reserved stack space needs to be aligned so that
3767   // taking the difference between two stack areas will result in an aligned
3768   // stack.
3769   MinReservedArea =
3770       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3771   FuncInfo->setMinReservedArea(MinReservedArea);
3772 
3773   SmallVector<SDValue, 8> MemOps;
3774 
3775   // If the function takes variable number of arguments, make a frame index for
3776   // the start of the first vararg value... for expansion of llvm.va_start.
3777   if (isVarArg) {
3778     static const MCPhysReg GPArgRegs[] = {
3779       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3780       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3781     };
3782     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3783 
3784     static const MCPhysReg FPArgRegs[] = {
3785       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3786       PPC::F8
3787     };
3788     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3789 
3790     if (useSoftFloat() || hasSPE())
3791        NumFPArgRegs = 0;
3792 
3793     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3794     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3795 
3796     // Make room for NumGPArgRegs and NumFPArgRegs.
3797     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3798                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3799 
3800     FuncInfo->setVarArgsStackOffset(
3801       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3802                             CCInfo.getNextStackOffset(), true));
3803 
3804     FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3805     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3806 
3807     // The fixed integer arguments of a variadic function are stored to the
3808     // VarArgsFrameIndex on the stack so that they may be loaded by
3809     // dereferencing the result of va_next.
3810     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3811       // Get an existing live-in vreg, or add a new one.
3812       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3813       if (!VReg)
3814         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3815 
3816       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3817       SDValue Store =
3818           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3819       MemOps.push_back(Store);
3820       // Increment the address by four for the next argument to store
3821       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3822       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3823     }
3824 
3825     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3826     // is set.
3827     // The double arguments are stored to the VarArgsFrameIndex
3828     // on the stack.
3829     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3830       // Get an existing live-in vreg, or add a new one.
3831       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3832       if (!VReg)
3833         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3834 
3835       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3836       SDValue Store =
3837           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3838       MemOps.push_back(Store);
3839       // Increment the address by eight for the next argument to store
3840       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3841                                          PtrVT);
3842       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3843     }
3844   }
3845 
3846   if (!MemOps.empty())
3847     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3848 
3849   return Chain;
3850 }
3851 
3852 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3853 // value to MVT::i64 and then truncate to the correct register size.
3854 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3855                                              EVT ObjectVT, SelectionDAG &DAG,
3856                                              SDValue ArgVal,
3857                                              const SDLoc &dl) const {
3858   if (Flags.isSExt())
3859     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3860                          DAG.getValueType(ObjectVT));
3861   else if (Flags.isZExt())
3862     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3863                          DAG.getValueType(ObjectVT));
3864 
3865   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3866 }
3867 
3868 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3869     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3870     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3871     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3872   // TODO: add description of PPC stack frame format, or at least some docs.
3873   //
3874   bool isELFv2ABI = Subtarget.isELFv2ABI();
3875   bool isLittleEndian = Subtarget.isLittleEndian();
3876   MachineFunction &MF = DAG.getMachineFunction();
3877   MachineFrameInfo &MFI = MF.getFrameInfo();
3878   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3879 
3880   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3881          "fastcc not supported on varargs functions");
3882 
3883   EVT PtrVT = getPointerTy(MF.getDataLayout());
3884   // Potential tail calls could cause overwriting of argument stack slots.
3885   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3886                        (CallConv == CallingConv::Fast));
3887   unsigned PtrByteSize = 8;
3888   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3889 
3890   static const MCPhysReg GPR[] = {
3891     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3892     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3893   };
3894   static const MCPhysReg VR[] = {
3895     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3896     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3897   };
3898 
3899   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3900   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3901   const unsigned Num_VR_Regs  = array_lengthof(VR);
3902   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3903 
3904   // Do a first pass over the arguments to determine whether the ABI
3905   // guarantees that our caller has allocated the parameter save area
3906   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3907   // in the ELFv2 ABI, it is true if this is a vararg function or if
3908   // any parameter is located in a stack slot.
3909 
3910   bool HasParameterArea = !isELFv2ABI || isVarArg;
3911   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3912   unsigned NumBytes = LinkageSize;
3913   unsigned AvailableFPRs = Num_FPR_Regs;
3914   unsigned AvailableVRs = Num_VR_Regs;
3915   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3916     if (Ins[i].Flags.isNest())
3917       continue;
3918 
3919     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3920                                PtrByteSize, LinkageSize, ParamAreaSize,
3921                                NumBytes, AvailableFPRs, AvailableVRs,
3922                                Subtarget.hasQPX()))
3923       HasParameterArea = true;
3924   }
3925 
3926   // Add DAG nodes to load the arguments or copy them out of registers.  On
3927   // entry to a function on PPC, the arguments start after the linkage area,
3928   // although the first ones are often in registers.
3929 
3930   unsigned ArgOffset = LinkageSize;
3931   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3932   unsigned &QFPR_idx = FPR_idx;
3933   SmallVector<SDValue, 8> MemOps;
3934   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3935   unsigned CurArgIdx = 0;
3936   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3937     SDValue ArgVal;
3938     bool needsLoad = false;
3939     EVT ObjectVT = Ins[ArgNo].VT;
3940     EVT OrigVT = Ins[ArgNo].ArgVT;
3941     unsigned ObjSize = ObjectVT.getStoreSize();
3942     unsigned ArgSize = ObjSize;
3943     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3944     if (Ins[ArgNo].isOrigArg()) {
3945       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3946       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3947     }
3948     // We re-align the argument offset for each argument, except when using the
3949     // fast calling convention, when we need to make sure we do that only when
3950     // we'll actually use a stack slot.
3951     unsigned CurArgOffset;
3952     Align Alignment;
3953     auto ComputeArgOffset = [&]() {
3954       /* Respect alignment of argument on the stack.  */
3955       Alignment =
3956           CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3957       ArgOffset = alignTo(ArgOffset, Alignment);
3958       CurArgOffset = ArgOffset;
3959     };
3960 
3961     if (CallConv != CallingConv::Fast) {
3962       ComputeArgOffset();
3963 
3964       /* Compute GPR index associated with argument offset.  */
3965       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3966       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3967     }
3968 
3969     // FIXME the codegen can be much improved in some cases.
3970     // We do not have to keep everything in memory.
3971     if (Flags.isByVal()) {
3972       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3973 
3974       if (CallConv == CallingConv::Fast)
3975         ComputeArgOffset();
3976 
3977       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3978       ObjSize = Flags.getByValSize();
3979       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3980       // Empty aggregate parameters do not take up registers.  Examples:
3981       //   struct { } a;
3982       //   union  { } b;
3983       //   int c[0];
3984       // etc.  However, we have to provide a place-holder in InVals, so
3985       // pretend we have an 8-byte item at the current address for that
3986       // purpose.
3987       if (!ObjSize) {
3988         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3989         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3990         InVals.push_back(FIN);
3991         continue;
3992       }
3993 
3994       // Create a stack object covering all stack doublewords occupied
3995       // by the argument.  If the argument is (fully or partially) on
3996       // the stack, or if the argument is fully in registers but the
3997       // caller has allocated the parameter save anyway, we can refer
3998       // directly to the caller's stack frame.  Otherwise, create a
3999       // local copy in our own frame.
4000       int FI;
4001       if (HasParameterArea ||
4002           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4003         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4004       else
4005         FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4006       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4007 
4008       // Handle aggregates smaller than 8 bytes.
4009       if (ObjSize < PtrByteSize) {
4010         // The value of the object is its address, which differs from the
4011         // address of the enclosing doubleword on big-endian systems.
4012         SDValue Arg = FIN;
4013         if (!isLittleEndian) {
4014           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4015           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4016         }
4017         InVals.push_back(Arg);
4018 
4019         if (GPR_idx != Num_GPR_Regs) {
4020           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4021           FuncInfo->addLiveInAttr(VReg, Flags);
4022           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4023           SDValue Store;
4024 
4025           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4026             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4027                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
4028             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4029                                       MachinePointerInfo(&*FuncArg), ObjType);
4030           } else {
4031             // For sizes that don't fit a truncating store (3, 5, 6, 7),
4032             // store the whole register as-is to the parameter save area
4033             // slot.
4034             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4035                                  MachinePointerInfo(&*FuncArg));
4036           }
4037 
4038           MemOps.push_back(Store);
4039         }
4040         // Whether we copied from a register or not, advance the offset
4041         // into the parameter save area by a full doubleword.
4042         ArgOffset += PtrByteSize;
4043         continue;
4044       }
4045 
4046       // The value of the object is its address, which is the address of
4047       // its first stack doubleword.
4048       InVals.push_back(FIN);
4049 
4050       // Store whatever pieces of the object are in registers to memory.
4051       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4052         if (GPR_idx == Num_GPR_Regs)
4053           break;
4054 
4055         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4056         FuncInfo->addLiveInAttr(VReg, Flags);
4057         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4058         SDValue Addr = FIN;
4059         if (j) {
4060           SDValue Off = DAG.getConstant(j, dl, PtrVT);
4061           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4062         }
4063         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4064                                      MachinePointerInfo(&*FuncArg, j));
4065         MemOps.push_back(Store);
4066         ++GPR_idx;
4067       }
4068       ArgOffset += ArgSize;
4069       continue;
4070     }
4071 
4072     switch (ObjectVT.getSimpleVT().SimpleTy) {
4073     default: llvm_unreachable("Unhandled argument type!");
4074     case MVT::i1:
4075     case MVT::i32:
4076     case MVT::i64:
4077       if (Flags.isNest()) {
4078         // The 'nest' parameter, if any, is passed in R11.
4079         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4080         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4081 
4082         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4083           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4084 
4085         break;
4086       }
4087 
4088       // These can be scalar arguments or elements of an integer array type
4089       // passed directly.  Clang may use those instead of "byval" aggregate
4090       // types to avoid forcing arguments to memory unnecessarily.
4091       if (GPR_idx != Num_GPR_Regs) {
4092         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4093         FuncInfo->addLiveInAttr(VReg, Flags);
4094         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4095 
4096         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4097           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4098           // value to MVT::i64 and then truncate to the correct register size.
4099           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4100       } else {
4101         if (CallConv == CallingConv::Fast)
4102           ComputeArgOffset();
4103 
4104         needsLoad = true;
4105         ArgSize = PtrByteSize;
4106       }
4107       if (CallConv != CallingConv::Fast || needsLoad)
4108         ArgOffset += 8;
4109       break;
4110 
4111     case MVT::f32:
4112     case MVT::f64:
4113       // These can be scalar arguments or elements of a float array type
4114       // passed directly.  The latter are used to implement ELFv2 homogenous
4115       // float aggregates.
4116       if (FPR_idx != Num_FPR_Regs) {
4117         unsigned VReg;
4118 
4119         if (ObjectVT == MVT::f32)
4120           VReg = MF.addLiveIn(FPR[FPR_idx],
4121                               Subtarget.hasP8Vector()
4122                                   ? &PPC::VSSRCRegClass
4123                                   : &PPC::F4RCRegClass);
4124         else
4125           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4126                                                 ? &PPC::VSFRCRegClass
4127                                                 : &PPC::F8RCRegClass);
4128 
4129         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4130         ++FPR_idx;
4131       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4132         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4133         // once we support fp <-> gpr moves.
4134 
4135         // This can only ever happen in the presence of f32 array types,
4136         // since otherwise we never run out of FPRs before running out
4137         // of GPRs.
4138         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4139         FuncInfo->addLiveInAttr(VReg, Flags);
4140         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4141 
4142         if (ObjectVT == MVT::f32) {
4143           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4144             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4145                                  DAG.getConstant(32, dl, MVT::i32));
4146           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4147         }
4148 
4149         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4150       } else {
4151         if (CallConv == CallingConv::Fast)
4152           ComputeArgOffset();
4153 
4154         needsLoad = true;
4155       }
4156 
4157       // When passing an array of floats, the array occupies consecutive
4158       // space in the argument area; only round up to the next doubleword
4159       // at the end of the array.  Otherwise, each float takes 8 bytes.
4160       if (CallConv != CallingConv::Fast || needsLoad) {
4161         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4162         ArgOffset += ArgSize;
4163         if (Flags.isInConsecutiveRegsLast())
4164           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4165       }
4166       break;
4167     case MVT::v4f32:
4168     case MVT::v4i32:
4169     case MVT::v8i16:
4170     case MVT::v16i8:
4171     case MVT::v2f64:
4172     case MVT::v2i64:
4173     case MVT::v1i128:
4174     case MVT::f128:
4175       if (!Subtarget.hasQPX()) {
4176         // These can be scalar arguments or elements of a vector array type
4177         // passed directly.  The latter are used to implement ELFv2 homogenous
4178         // vector aggregates.
4179         if (VR_idx != Num_VR_Regs) {
4180           unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4181           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4182           ++VR_idx;
4183         } else {
4184           if (CallConv == CallingConv::Fast)
4185             ComputeArgOffset();
4186           needsLoad = true;
4187         }
4188         if (CallConv != CallingConv::Fast || needsLoad)
4189           ArgOffset += 16;
4190         break;
4191       } // not QPX
4192 
4193       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
4194              "Invalid QPX parameter type");
4195       LLVM_FALLTHROUGH;
4196 
4197     case MVT::v4f64:
4198     case MVT::v4i1:
4199       // QPX vectors are treated like their scalar floating-point subregisters
4200       // (except that they're larger).
4201       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
4202       if (QFPR_idx != Num_QFPR_Regs) {
4203         const TargetRegisterClass *RC;
4204         switch (ObjectVT.getSimpleVT().SimpleTy) {
4205         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
4206         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
4207         default:         RC = &PPC::QBRCRegClass; break;
4208         }
4209 
4210         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
4211         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4212         ++QFPR_idx;
4213       } else {
4214         if (CallConv == CallingConv::Fast)
4215           ComputeArgOffset();
4216         needsLoad = true;
4217       }
4218       if (CallConv != CallingConv::Fast || needsLoad)
4219         ArgOffset += Sz;
4220       break;
4221     }
4222 
4223     // We need to load the argument to a virtual register if we determined
4224     // above that we ran out of physical registers of the appropriate type.
4225     if (needsLoad) {
4226       if (ObjSize < ArgSize && !isLittleEndian)
4227         CurArgOffset += ArgSize - ObjSize;
4228       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4229       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4230       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4231     }
4232 
4233     InVals.push_back(ArgVal);
4234   }
4235 
4236   // Area that is at least reserved in the caller of this function.
4237   unsigned MinReservedArea;
4238   if (HasParameterArea)
4239     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4240   else
4241     MinReservedArea = LinkageSize;
4242 
4243   // Set the size that is at least reserved in caller of this function.  Tail
4244   // call optimized functions' reserved stack space needs to be aligned so that
4245   // taking the difference between two stack areas will result in an aligned
4246   // stack.
4247   MinReservedArea =
4248       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4249   FuncInfo->setMinReservedArea(MinReservedArea);
4250 
4251   // If the function takes variable number of arguments, make a frame index for
4252   // the start of the first vararg value... for expansion of llvm.va_start.
4253   if (isVarArg) {
4254     int Depth = ArgOffset;
4255 
4256     FuncInfo->setVarArgsFrameIndex(
4257       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4258     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4259 
4260     // If this function is vararg, store any remaining integer argument regs
4261     // to their spots on the stack so that they may be loaded by dereferencing
4262     // the result of va_next.
4263     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4264          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4265       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4266       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4267       SDValue Store =
4268           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4269       MemOps.push_back(Store);
4270       // Increment the address by four for the next argument to store
4271       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4272       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4273     }
4274   }
4275 
4276   if (!MemOps.empty())
4277     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4278 
4279   return Chain;
4280 }
4281 
4282 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4283     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4284     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4285     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4286   // TODO: add description of PPC stack frame format, or at least some docs.
4287   //
4288   MachineFunction &MF = DAG.getMachineFunction();
4289   MachineFrameInfo &MFI = MF.getFrameInfo();
4290   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4291 
4292   EVT PtrVT = getPointerTy(MF.getDataLayout());
4293   bool isPPC64 = PtrVT == MVT::i64;
4294   // Potential tail calls could cause overwriting of argument stack slots.
4295   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4296                        (CallConv == CallingConv::Fast));
4297   unsigned PtrByteSize = isPPC64 ? 8 : 4;
4298   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4299   unsigned ArgOffset = LinkageSize;
4300   // Area that is at least reserved in caller of this function.
4301   unsigned MinReservedArea = ArgOffset;
4302 
4303   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4304     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4305     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4306   };
4307   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4308     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4309     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4310   };
4311   static const MCPhysReg VR[] = {
4312     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4313     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4314   };
4315 
4316   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4317   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4318   const unsigned Num_VR_Regs  = array_lengthof( VR);
4319 
4320   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4321 
4322   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4323 
4324   // In 32-bit non-varargs functions, the stack space for vectors is after the
4325   // stack space for non-vectors.  We do not use this space unless we have
4326   // too many vectors to fit in registers, something that only occurs in
4327   // constructed examples:), but we have to walk the arglist to figure
4328   // that out...for the pathological case, compute VecArgOffset as the
4329   // start of the vector parameter area.  Computing VecArgOffset is the
4330   // entire point of the following loop.
4331   unsigned VecArgOffset = ArgOffset;
4332   if (!isVarArg && !isPPC64) {
4333     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4334          ++ArgNo) {
4335       EVT ObjectVT = Ins[ArgNo].VT;
4336       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4337 
4338       if (Flags.isByVal()) {
4339         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4340         unsigned ObjSize = Flags.getByValSize();
4341         unsigned ArgSize =
4342                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4343         VecArgOffset += ArgSize;
4344         continue;
4345       }
4346 
4347       switch(ObjectVT.getSimpleVT().SimpleTy) {
4348       default: llvm_unreachable("Unhandled argument type!");
4349       case MVT::i1:
4350       case MVT::i32:
4351       case MVT::f32:
4352         VecArgOffset += 4;
4353         break;
4354       case MVT::i64:  // PPC64
4355       case MVT::f64:
4356         // FIXME: We are guaranteed to be !isPPC64 at this point.
4357         // Does MVT::i64 apply?
4358         VecArgOffset += 8;
4359         break;
4360       case MVT::v4f32:
4361       case MVT::v4i32:
4362       case MVT::v8i16:
4363       case MVT::v16i8:
4364         // Nothing to do, we're only looking at Nonvector args here.
4365         break;
4366       }
4367     }
4368   }
4369   // We've found where the vector parameter area in memory is.  Skip the
4370   // first 12 parameters; these don't use that memory.
4371   VecArgOffset = ((VecArgOffset+15)/16)*16;
4372   VecArgOffset += 12*16;
4373 
4374   // Add DAG nodes to load the arguments or copy them out of registers.  On
4375   // entry to a function on PPC, the arguments start after the linkage area,
4376   // although the first ones are often in registers.
4377 
4378   SmallVector<SDValue, 8> MemOps;
4379   unsigned nAltivecParamsAtEnd = 0;
4380   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4381   unsigned CurArgIdx = 0;
4382   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4383     SDValue ArgVal;
4384     bool needsLoad = false;
4385     EVT ObjectVT = Ins[ArgNo].VT;
4386     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4387     unsigned ArgSize = ObjSize;
4388     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4389     if (Ins[ArgNo].isOrigArg()) {
4390       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4391       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4392     }
4393     unsigned CurArgOffset = ArgOffset;
4394 
4395     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4396     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4397         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4398       if (isVarArg || isPPC64) {
4399         MinReservedArea = ((MinReservedArea+15)/16)*16;
4400         MinReservedArea += CalculateStackSlotSize(ObjectVT,
4401                                                   Flags,
4402                                                   PtrByteSize);
4403       } else  nAltivecParamsAtEnd++;
4404     } else
4405       // Calculate min reserved area.
4406       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4407                                                 Flags,
4408                                                 PtrByteSize);
4409 
4410     // FIXME the codegen can be much improved in some cases.
4411     // We do not have to keep everything in memory.
4412     if (Flags.isByVal()) {
4413       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4414 
4415       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4416       ObjSize = Flags.getByValSize();
4417       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4418       // Objects of size 1 and 2 are right justified, everything else is
4419       // left justified.  This means the memory address is adjusted forwards.
4420       if (ObjSize==1 || ObjSize==2) {
4421         CurArgOffset = CurArgOffset + (4 - ObjSize);
4422       }
4423       // The value of the object is its address.
4424       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4425       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4426       InVals.push_back(FIN);
4427       if (ObjSize==1 || ObjSize==2) {
4428         if (GPR_idx != Num_GPR_Regs) {
4429           unsigned VReg;
4430           if (isPPC64)
4431             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4432           else
4433             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4434           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4435           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4436           SDValue Store =
4437               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4438                                 MachinePointerInfo(&*FuncArg), ObjType);
4439           MemOps.push_back(Store);
4440           ++GPR_idx;
4441         }
4442 
4443         ArgOffset += PtrByteSize;
4444 
4445         continue;
4446       }
4447       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4448         // Store whatever pieces of the object are in registers
4449         // to memory.  ArgOffset will be the address of the beginning
4450         // of the object.
4451         if (GPR_idx != Num_GPR_Regs) {
4452           unsigned VReg;
4453           if (isPPC64)
4454             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4455           else
4456             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4457           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4458           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4459           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4460           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4461                                        MachinePointerInfo(&*FuncArg, j));
4462           MemOps.push_back(Store);
4463           ++GPR_idx;
4464           ArgOffset += PtrByteSize;
4465         } else {
4466           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4467           break;
4468         }
4469       }
4470       continue;
4471     }
4472 
4473     switch (ObjectVT.getSimpleVT().SimpleTy) {
4474     default: llvm_unreachable("Unhandled argument type!");
4475     case MVT::i1:
4476     case MVT::i32:
4477       if (!isPPC64) {
4478         if (GPR_idx != Num_GPR_Regs) {
4479           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4480           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4481 
4482           if (ObjectVT == MVT::i1)
4483             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4484 
4485           ++GPR_idx;
4486         } else {
4487           needsLoad = true;
4488           ArgSize = PtrByteSize;
4489         }
4490         // All int arguments reserve stack space in the Darwin ABI.
4491         ArgOffset += PtrByteSize;
4492         break;
4493       }
4494       LLVM_FALLTHROUGH;
4495     case MVT::i64:  // PPC64
4496       if (GPR_idx != Num_GPR_Regs) {
4497         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4498         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4499 
4500         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4501           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4502           // value to MVT::i64 and then truncate to the correct register size.
4503           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4504 
4505         ++GPR_idx;
4506       } else {
4507         needsLoad = true;
4508         ArgSize = PtrByteSize;
4509       }
4510       // All int arguments reserve stack space in the Darwin ABI.
4511       ArgOffset += 8;
4512       break;
4513 
4514     case MVT::f32:
4515     case MVT::f64:
4516       // Every 4 bytes of argument space consumes one of the GPRs available for
4517       // argument passing.
4518       if (GPR_idx != Num_GPR_Regs) {
4519         ++GPR_idx;
4520         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4521           ++GPR_idx;
4522       }
4523       if (FPR_idx != Num_FPR_Regs) {
4524         unsigned VReg;
4525 
4526         if (ObjectVT == MVT::f32)
4527           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4528         else
4529           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4530 
4531         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4532         ++FPR_idx;
4533       } else {
4534         needsLoad = true;
4535       }
4536 
4537       // All FP arguments reserve stack space in the Darwin ABI.
4538       ArgOffset += isPPC64 ? 8 : ObjSize;
4539       break;
4540     case MVT::v4f32:
4541     case MVT::v4i32:
4542     case MVT::v8i16:
4543     case MVT::v16i8:
4544       // Note that vector arguments in registers don't reserve stack space,
4545       // except in varargs functions.
4546       if (VR_idx != Num_VR_Regs) {
4547         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4548         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4549         if (isVarArg) {
4550           while ((ArgOffset % 16) != 0) {
4551             ArgOffset += PtrByteSize;
4552             if (GPR_idx != Num_GPR_Regs)
4553               GPR_idx++;
4554           }
4555           ArgOffset += 16;
4556           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4557         }
4558         ++VR_idx;
4559       } else {
4560         if (!isVarArg && !isPPC64) {
4561           // Vectors go after all the nonvectors.
4562           CurArgOffset = VecArgOffset;
4563           VecArgOffset += 16;
4564         } else {
4565           // Vectors are aligned.
4566           ArgOffset = ((ArgOffset+15)/16)*16;
4567           CurArgOffset = ArgOffset;
4568           ArgOffset += 16;
4569         }
4570         needsLoad = true;
4571       }
4572       break;
4573     }
4574 
4575     // We need to load the argument to a virtual register if we determined above
4576     // that we ran out of physical registers of the appropriate type.
4577     if (needsLoad) {
4578       int FI = MFI.CreateFixedObject(ObjSize,
4579                                      CurArgOffset + (ArgSize - ObjSize),
4580                                      isImmutable);
4581       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4582       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4583     }
4584 
4585     InVals.push_back(ArgVal);
4586   }
4587 
4588   // Allow for Altivec parameters at the end, if needed.
4589   if (nAltivecParamsAtEnd) {
4590     MinReservedArea = ((MinReservedArea+15)/16)*16;
4591     MinReservedArea += 16*nAltivecParamsAtEnd;
4592   }
4593 
4594   // Area that is at least reserved in the caller of this function.
4595   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4596 
4597   // Set the size that is at least reserved in caller of this function.  Tail
4598   // call optimized functions' reserved stack space needs to be aligned so that
4599   // taking the difference between two stack areas will result in an aligned
4600   // stack.
4601   MinReservedArea =
4602       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4603   FuncInfo->setMinReservedArea(MinReservedArea);
4604 
4605   // If the function takes variable number of arguments, make a frame index for
4606   // the start of the first vararg value... for expansion of llvm.va_start.
4607   if (isVarArg) {
4608     int Depth = ArgOffset;
4609 
4610     FuncInfo->setVarArgsFrameIndex(
4611       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4612                             Depth, true));
4613     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4614 
4615     // If this function is vararg, store any remaining integer argument regs
4616     // to their spots on the stack so that they may be loaded by dereferencing
4617     // the result of va_next.
4618     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4619       unsigned VReg;
4620 
4621       if (isPPC64)
4622         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4623       else
4624         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4625 
4626       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4627       SDValue Store =
4628           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4629       MemOps.push_back(Store);
4630       // Increment the address by four for the next argument to store
4631       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4632       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4633     }
4634   }
4635 
4636   if (!MemOps.empty())
4637     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4638 
4639   return Chain;
4640 }
4641 
4642 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4643 /// adjusted to accommodate the arguments for the tailcall.
4644 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4645                                    unsigned ParamSize) {
4646 
4647   if (!isTailCall) return 0;
4648 
4649   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4650   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4651   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4652   // Remember only if the new adjustment is bigger.
4653   if (SPDiff < FI->getTailCallSPDelta())
4654     FI->setTailCallSPDelta(SPDiff);
4655 
4656   return SPDiff;
4657 }
4658 
4659 static bool isFunctionGlobalAddress(SDValue Callee);
4660 
4661 static bool
4662 callsShareTOCBase(const Function *Caller, SDValue Callee,
4663                     const TargetMachine &TM) {
4664    // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4665    // don't have enough information to determine if the caller and calle share
4666    // the same  TOC base, so we have to pessimistically assume they don't for
4667    // correctness.
4668    GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4669    if (!G)
4670      return false;
4671 
4672    const GlobalValue *GV = G->getGlobal();
4673   // The medium and large code models are expected to provide a sufficiently
4674   // large TOC to provide all data addressing needs of a module with a
4675   // single TOC. Since each module will be addressed with a single TOC then we
4676   // only need to check that caller and callee don't cross dso boundaries.
4677   if (CodeModel::Medium == TM.getCodeModel() ||
4678       CodeModel::Large == TM.getCodeModel())
4679     return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
4680 
4681   // Otherwise we need to ensure callee and caller are in the same section,
4682   // since the linker may allocate multiple TOCs, and we don't know which
4683   // sections will belong to the same TOC base.
4684 
4685   if (!GV->isStrongDefinitionForLinker())
4686     return false;
4687 
4688   // Any explicitly-specified sections and section prefixes must also match.
4689   // Also, if we're using -ffunction-sections, then each function is always in
4690   // a different section (the same is true for COMDAT functions).
4691   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4692       GV->getSection() != Caller->getSection())
4693     return false;
4694   if (const auto *F = dyn_cast<Function>(GV)) {
4695     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4696       return false;
4697   }
4698 
4699   // If the callee might be interposed, then we can't assume the ultimate call
4700   // target will be in the same section. Even in cases where we can assume that
4701   // interposition won't happen, in any case where the linker might insert a
4702   // stub to allow for interposition, we must generate code as though
4703   // interposition might occur. To understand why this matters, consider a
4704   // situation where: a -> b -> c where the arrows indicate calls. b and c are
4705   // in the same section, but a is in a different module (i.e. has a different
4706   // TOC base pointer). If the linker allows for interposition between b and c,
4707   // then it will generate a stub for the call edge between b and c which will
4708   // save the TOC pointer into the designated stack slot allocated by b. If we
4709   // return true here, and therefore allow a tail call between b and c, that
4710   // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4711   // pointer into the stack slot allocated by a (where the a -> b stub saved
4712   // a's TOC base pointer). If we're not considering a tail call, but rather,
4713   // whether a nop is needed after the call instruction in b, because the linker
4714   // will insert a stub, it might complain about a missing nop if we omit it
4715   // (although many don't complain in this case).
4716   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4717     return false;
4718 
4719   return true;
4720 }
4721 
4722 static bool
4723 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4724                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4725   assert(Subtarget.is64BitELFABI());
4726 
4727   const unsigned PtrByteSize = 8;
4728   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4729 
4730   static const MCPhysReg GPR[] = {
4731     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4732     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4733   };
4734   static const MCPhysReg VR[] = {
4735     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4736     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4737   };
4738 
4739   const unsigned NumGPRs = array_lengthof(GPR);
4740   const unsigned NumFPRs = 13;
4741   const unsigned NumVRs = array_lengthof(VR);
4742   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4743 
4744   unsigned NumBytes = LinkageSize;
4745   unsigned AvailableFPRs = NumFPRs;
4746   unsigned AvailableVRs = NumVRs;
4747 
4748   for (const ISD::OutputArg& Param : Outs) {
4749     if (Param.Flags.isNest()) continue;
4750 
4751     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4752                                PtrByteSize, LinkageSize, ParamAreaSize,
4753                                NumBytes, AvailableFPRs, AvailableVRs,
4754                                Subtarget.hasQPX()))
4755       return true;
4756   }
4757   return false;
4758 }
4759 
4760 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4761   if (CB.arg_size() != CallerFn->arg_size())
4762     return false;
4763 
4764   auto CalleeArgIter = CB.arg_begin();
4765   auto CalleeArgEnd = CB.arg_end();
4766   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4767 
4768   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4769     const Value* CalleeArg = *CalleeArgIter;
4770     const Value* CallerArg = &(*CallerArgIter);
4771     if (CalleeArg == CallerArg)
4772       continue;
4773 
4774     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4775     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4776     //      }
4777     // 1st argument of callee is undef and has the same type as caller.
4778     if (CalleeArg->getType() == CallerArg->getType() &&
4779         isa<UndefValue>(CalleeArg))
4780       continue;
4781 
4782     return false;
4783   }
4784 
4785   return true;
4786 }
4787 
4788 // Returns true if TCO is possible between the callers and callees
4789 // calling conventions.
4790 static bool
4791 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4792                                     CallingConv::ID CalleeCC) {
4793   // Tail calls are possible with fastcc and ccc.
4794   auto isTailCallableCC  = [] (CallingConv::ID CC){
4795       return  CC == CallingConv::C || CC == CallingConv::Fast;
4796   };
4797   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4798     return false;
4799 
4800   // We can safely tail call both fastcc and ccc callees from a c calling
4801   // convention caller. If the caller is fastcc, we may have less stack space
4802   // than a non-fastcc caller with the same signature so disable tail-calls in
4803   // that case.
4804   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4805 }
4806 
4807 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4808     SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4809     const SmallVectorImpl<ISD::OutputArg> &Outs,
4810     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4811   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4812 
4813   if (DisableSCO && !TailCallOpt) return false;
4814 
4815   // Variadic argument functions are not supported.
4816   if (isVarArg) return false;
4817 
4818   auto &Caller = DAG.getMachineFunction().getFunction();
4819   // Check that the calling conventions are compatible for tco.
4820   if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4821     return false;
4822 
4823   // Caller contains any byval parameter is not supported.
4824   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4825     return false;
4826 
4827   // Callee contains any byval parameter is not supported, too.
4828   // Note: This is a quick work around, because in some cases, e.g.
4829   // caller's stack size > callee's stack size, we are still able to apply
4830   // sibling call optimization. For example, gcc is able to do SCO for caller1
4831   // in the following example, but not for caller2.
4832   //   struct test {
4833   //     long int a;
4834   //     char ary[56];
4835   //   } gTest;
4836   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4837   //     b->a = v.a;
4838   //     return 0;
4839   //   }
4840   //   void caller1(struct test a, struct test c, struct test *b) {
4841   //     callee(gTest, b); }
4842   //   void caller2(struct test *b) { callee(gTest, b); }
4843   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4844     return false;
4845 
4846   // If callee and caller use different calling conventions, we cannot pass
4847   // parameters on stack since offsets for the parameter area may be different.
4848   if (Caller.getCallingConv() != CalleeCC &&
4849       needStackSlotPassParameters(Subtarget, Outs))
4850     return false;
4851 
4852   // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4853   // the caller and callee share the same TOC for TCO/SCO. If the caller and
4854   // callee potentially have different TOC bases then we cannot tail call since
4855   // we need to restore the TOC pointer after the call.
4856   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4857   // We cannot guarantee this for indirect calls or calls to external functions.
4858   // When PC-Relative addressing is used, the concept of the TOC is no longer
4859   // applicable so this check is not required.
4860   // Check first for indirect calls.
4861   if (!Subtarget.isUsingPCRelativeCalls() &&
4862       !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4863     return false;
4864 
4865   // Check if we share the TOC base.
4866   if (!Subtarget.isUsingPCRelativeCalls() &&
4867       !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4868     return false;
4869 
4870   // TCO allows altering callee ABI, so we don't have to check further.
4871   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4872     return true;
4873 
4874   if (DisableSCO) return false;
4875 
4876   // If callee use the same argument list that caller is using, then we can
4877   // apply SCO on this case. If it is not, then we need to check if callee needs
4878   // stack for passing arguments.
4879   // PC Relative tail calls may not have a CallBase.
4880   // If there is no CallBase we cannot verify if we have the same argument
4881   // list so assume that we don't have the same argument list.
4882   if (CB && !hasSameArgumentList(&Caller, *CB) &&
4883       needStackSlotPassParameters(Subtarget, Outs))
4884     return false;
4885   else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4886     return false;
4887 
4888   return true;
4889 }
4890 
4891 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4892 /// for tail call optimization. Targets which want to do tail call
4893 /// optimization should implement this function.
4894 bool
4895 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4896                                                      CallingConv::ID CalleeCC,
4897                                                      bool isVarArg,
4898                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4899                                                      SelectionDAG& DAG) const {
4900   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4901     return false;
4902 
4903   // Variable argument functions are not supported.
4904   if (isVarArg)
4905     return false;
4906 
4907   MachineFunction &MF = DAG.getMachineFunction();
4908   CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4909   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4910     // Functions containing by val parameters are not supported.
4911     for (unsigned i = 0; i != Ins.size(); i++) {
4912        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4913        if (Flags.isByVal()) return false;
4914     }
4915 
4916     // Non-PIC/GOT tail calls are supported.
4917     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4918       return true;
4919 
4920     // At the moment we can only do local tail calls (in same module, hidden
4921     // or protected) if we are generating PIC.
4922     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4923       return G->getGlobal()->hasHiddenVisibility()
4924           || G->getGlobal()->hasProtectedVisibility();
4925   }
4926 
4927   return false;
4928 }
4929 
4930 /// isCallCompatibleAddress - Return the immediate to use if the specified
4931 /// 32-bit value is representable in the immediate field of a BxA instruction.
4932 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4933   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4934   if (!C) return nullptr;
4935 
4936   int Addr = C->getZExtValue();
4937   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4938       SignExtend32<26>(Addr) != Addr)
4939     return nullptr;  // Top 6 bits have to be sext of immediate.
4940 
4941   return DAG
4942       .getConstant(
4943           (int)C->getZExtValue() >> 2, SDLoc(Op),
4944           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4945       .getNode();
4946 }
4947 
4948 namespace {
4949 
4950 struct TailCallArgumentInfo {
4951   SDValue Arg;
4952   SDValue FrameIdxOp;
4953   int FrameIdx = 0;
4954 
4955   TailCallArgumentInfo() = default;
4956 };
4957 
4958 } // end anonymous namespace
4959 
4960 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4961 static void StoreTailCallArgumentsToStackSlot(
4962     SelectionDAG &DAG, SDValue Chain,
4963     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4964     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4965   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4966     SDValue Arg = TailCallArgs[i].Arg;
4967     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4968     int FI = TailCallArgs[i].FrameIdx;
4969     // Store relative to framepointer.
4970     MemOpChains.push_back(DAG.getStore(
4971         Chain, dl, Arg, FIN,
4972         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4973   }
4974 }
4975 
4976 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4977 /// the appropriate stack slot for the tail call optimized function call.
4978 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4979                                              SDValue OldRetAddr, SDValue OldFP,
4980                                              int SPDiff, const SDLoc &dl) {
4981   if (SPDiff) {
4982     // Calculate the new stack slot for the return address.
4983     MachineFunction &MF = DAG.getMachineFunction();
4984     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4985     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4986     bool isPPC64 = Subtarget.isPPC64();
4987     int SlotSize = isPPC64 ? 8 : 4;
4988     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4989     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4990                                                          NewRetAddrLoc, true);
4991     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4992     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4993     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4994                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4995   }
4996   return Chain;
4997 }
4998 
4999 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5000 /// the position of the argument.
5001 static void
5002 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5003                          SDValue Arg, int SPDiff, unsigned ArgOffset,
5004                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5005   int Offset = ArgOffset + SPDiff;
5006   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5007   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5008   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5009   SDValue FIN = DAG.getFrameIndex(FI, VT);
5010   TailCallArgumentInfo Info;
5011   Info.Arg = Arg;
5012   Info.FrameIdxOp = FIN;
5013   Info.FrameIdx = FI;
5014   TailCallArguments.push_back(Info);
5015 }
5016 
5017 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5018 /// stack slot. Returns the chain as result and the loaded frame pointers in
5019 /// LROpOut/FPOpout. Used when tail calling.
5020 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5021     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5022     SDValue &FPOpOut, const SDLoc &dl) const {
5023   if (SPDiff) {
5024     // Load the LR and FP stack slot for later adjusting.
5025     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5026     LROpOut = getReturnAddrFrameIndex(DAG);
5027     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5028     Chain = SDValue(LROpOut.getNode(), 1);
5029   }
5030   return Chain;
5031 }
5032 
5033 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5034 /// by "Src" to address "Dst" of size "Size".  Alignment information is
5035 /// specified by the specific parameter attribute. The copy will be passed as
5036 /// a byval function parameter.
5037 /// Sometimes what we are copying is the end of a larger object, the part that
5038 /// does not fit in registers.
5039 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5040                                          SDValue Chain, ISD::ArgFlagsTy Flags,
5041                                          SelectionDAG &DAG, const SDLoc &dl) {
5042   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5043   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5044                        Flags.getNonZeroByValAlign(), false, false, false,
5045                        MachinePointerInfo(), MachinePointerInfo());
5046 }
5047 
5048 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5049 /// tail calls.
5050 static void LowerMemOpCallTo(
5051     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5052     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5053     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5054     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5055   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5056   if (!isTailCall) {
5057     if (isVector) {
5058       SDValue StackPtr;
5059       if (isPPC64)
5060         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5061       else
5062         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5063       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5064                            DAG.getConstant(ArgOffset, dl, PtrVT));
5065     }
5066     MemOpChains.push_back(
5067         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5068     // Calculate and remember argument location.
5069   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5070                                   TailCallArguments);
5071 }
5072 
5073 static void
5074 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5075                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5076                 SDValue FPOp,
5077                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5078   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5079   // might overwrite each other in case of tail call optimization.
5080   SmallVector<SDValue, 8> MemOpChains2;
5081   // Do not flag preceding copytoreg stuff together with the following stuff.
5082   InFlag = SDValue();
5083   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5084                                     MemOpChains2, dl);
5085   if (!MemOpChains2.empty())
5086     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5087 
5088   // Store the return address to the appropriate stack slot.
5089   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5090 
5091   // Emit callseq_end just before tailcall node.
5092   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5093                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5094   InFlag = Chain.getValue(1);
5095 }
5096 
5097 // Is this global address that of a function that can be called by name? (as
5098 // opposed to something that must hold a descriptor for an indirect call).
5099 static bool isFunctionGlobalAddress(SDValue Callee) {
5100   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5101     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5102         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5103       return false;
5104 
5105     return G->getGlobal()->getValueType()->isFunctionTy();
5106   }
5107 
5108   return false;
5109 }
5110 
5111 SDValue PPCTargetLowering::LowerCallResult(
5112     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5113     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5114     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5115   SmallVector<CCValAssign, 16> RVLocs;
5116   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5117                     *DAG.getContext());
5118 
5119   CCRetInfo.AnalyzeCallResult(
5120       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5121                ? RetCC_PPC_Cold
5122                : RetCC_PPC);
5123 
5124   // Copy all of the result registers out of their specified physreg.
5125   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5126     CCValAssign &VA = RVLocs[i];
5127     assert(VA.isRegLoc() && "Can only return in registers!");
5128 
5129     SDValue Val;
5130 
5131     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5132       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5133                                       InFlag);
5134       Chain = Lo.getValue(1);
5135       InFlag = Lo.getValue(2);
5136       VA = RVLocs[++i]; // skip ahead to next loc
5137       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5138                                       InFlag);
5139       Chain = Hi.getValue(1);
5140       InFlag = Hi.getValue(2);
5141       if (!Subtarget.isLittleEndian())
5142         std::swap (Lo, Hi);
5143       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5144     } else {
5145       Val = DAG.getCopyFromReg(Chain, dl,
5146                                VA.getLocReg(), VA.getLocVT(), InFlag);
5147       Chain = Val.getValue(1);
5148       InFlag = Val.getValue(2);
5149     }
5150 
5151     switch (VA.getLocInfo()) {
5152     default: llvm_unreachable("Unknown loc info!");
5153     case CCValAssign::Full: break;
5154     case CCValAssign::AExt:
5155       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5156       break;
5157     case CCValAssign::ZExt:
5158       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5159                         DAG.getValueType(VA.getValVT()));
5160       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5161       break;
5162     case CCValAssign::SExt:
5163       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5164                         DAG.getValueType(VA.getValVT()));
5165       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5166       break;
5167     }
5168 
5169     InVals.push_back(Val);
5170   }
5171 
5172   return Chain;
5173 }
5174 
5175 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5176                            const PPCSubtarget &Subtarget, bool isPatchPoint) {
5177   // PatchPoint calls are not indirect.
5178   if (isPatchPoint)
5179     return false;
5180 
5181   if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
5182     return false;
5183 
5184   // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5185   // becuase the immediate function pointer points to a descriptor instead of
5186   // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5187   // pointer immediate points to the global entry point, while the BLA would
5188   // need to jump to the local entry point (see rL211174).
5189   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5190       isBLACompatibleAddress(Callee, DAG))
5191     return false;
5192 
5193   return true;
5194 }
5195 
5196 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5197 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5198   return Subtarget.isAIXABI() ||
5199          (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5200 }
5201 
5202 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5203                               const Function &Caller,
5204                               const SDValue &Callee,
5205                               const PPCSubtarget &Subtarget,
5206                               const TargetMachine &TM) {
5207   if (CFlags.IsTailCall)
5208     return PPCISD::TC_RETURN;
5209 
5210   // This is a call through a function pointer.
5211   if (CFlags.IsIndirect) {
5212     // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5213     // indirect calls. The save of the caller's TOC pointer to the stack will be
5214     // inserted into the DAG as part of call lowering. The restore of the TOC
5215     // pointer is modeled by using a pseudo instruction for the call opcode that
5216     // represents the 2 instruction sequence of an indirect branch and link,
5217     // immediately followed by a load of the TOC pointer from the the stack save
5218     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5219     // as it is not saved or used.
5220     return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5221                                                : PPCISD::BCTRL;
5222   }
5223 
5224   if (Subtarget.isUsingPCRelativeCalls()) {
5225     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5226     return PPCISD::CALL_NOTOC;
5227   }
5228 
5229   // The ABIs that maintain a TOC pointer accross calls need to have a nop
5230   // immediately following the call instruction if the caller and callee may
5231   // have different TOC bases. At link time if the linker determines the calls
5232   // may not share a TOC base, the call is redirected to a trampoline inserted
5233   // by the linker. The trampoline will (among other things) save the callers
5234   // TOC pointer at an ABI designated offset in the linkage area and the linker
5235   // will rewrite the nop to be a load of the TOC pointer from the linkage area
5236   // into gpr2.
5237   if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5238       return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5239                                                     : PPCISD::CALL_NOP;
5240 
5241   return PPCISD::CALL;
5242 }
5243 
5244 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5245                                const SDLoc &dl, const PPCSubtarget &Subtarget) {
5246   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5247     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5248       return SDValue(Dest, 0);
5249 
5250   // Returns true if the callee is local, and false otherwise.
5251   auto isLocalCallee = [&]() {
5252     const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5253     const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5254     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5255 
5256     return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5257            !dyn_cast_or_null<GlobalIFunc>(GV);
5258   };
5259 
5260   // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5261   // a static relocation model causes some versions of GNU LD (2.17.50, at
5262   // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5263   // built with secure-PLT.
5264   bool UsePlt =
5265       Subtarget.is32BitELFABI() && !isLocalCallee() &&
5266       Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5267 
5268   // On AIX, direct function calls reference the symbol for the function's
5269   // entry point, which is named by prepending a "." before the function's
5270   // C-linkage name.
5271   const auto getAIXFuncEntryPointSymbolSDNode =
5272       [&](StringRef FuncName, bool IsDeclaration,
5273           const XCOFF::StorageClass &SC) {
5274         auto &Context = DAG.getMachineFunction().getMMI().getContext();
5275 
5276         MCSymbolXCOFF *S = cast<MCSymbolXCOFF>(
5277             Context.getOrCreateSymbol(Twine(".") + Twine(FuncName)));
5278 
5279         if (IsDeclaration && !S->hasRepresentedCsectSet()) {
5280           // On AIX, an undefined symbol needs to be associated with a
5281           // MCSectionXCOFF to get the correct storage mapping class.
5282           // In this case, XCOFF::XMC_PR.
5283           MCSectionXCOFF *Sec = Context.getXCOFFSection(
5284               S->getName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC,
5285               SectionKind::getMetadata());
5286           S->setRepresentedCsect(Sec);
5287         }
5288 
5289         MVT PtrVT =
5290             DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5291         return DAG.getMCSymbol(S, PtrVT);
5292       };
5293 
5294   if (isFunctionGlobalAddress(Callee)) {
5295     const GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
5296     const GlobalValue *GV = G->getGlobal();
5297 
5298     if (!Subtarget.isAIXABI())
5299       return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5300                                         UsePlt ? PPCII::MO_PLT : 0);
5301 
5302     assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5303     const GlobalObject *GO = cast<GlobalObject>(GV);
5304     const XCOFF::StorageClass SC =
5305         TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
5306     return getAIXFuncEntryPointSymbolSDNode(GO->getName(), GO->isDeclaration(),
5307                                             SC);
5308   }
5309 
5310   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5311     const char *SymName = S->getSymbol();
5312     if (!Subtarget.isAIXABI())
5313       return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5314                                          UsePlt ? PPCII::MO_PLT : 0);
5315 
5316     // If there exists a user-declared function whose name is the same as the
5317     // ExternalSymbol's, then we pick up the user-declared version.
5318     const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5319     if (const Function *F =
5320             dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) {
5321       const XCOFF::StorageClass SC =
5322           TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F);
5323       return getAIXFuncEntryPointSymbolSDNode(F->getName(), F->isDeclaration(),
5324                                               SC);
5325     }
5326 
5327     return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
5328   }
5329 
5330   // No transformation needed.
5331   assert(Callee.getNode() && "What no callee?");
5332   return Callee;
5333 }
5334 
5335 static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5336   assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5337          "Expected a CALLSEQ_STARTSDNode.");
5338 
5339   // The last operand is the chain, except when the node has glue. If the node
5340   // has glue, then the last operand is the glue, and the chain is the second
5341   // last operand.
5342   SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5343   if (LastValue.getValueType() != MVT::Glue)
5344     return LastValue;
5345 
5346   return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5347 }
5348 
5349 // Creates the node that moves a functions address into the count register
5350 // to prepare for an indirect call instruction.
5351 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5352                                 SDValue &Glue, SDValue &Chain,
5353                                 const SDLoc &dl) {
5354   SDValue MTCTROps[] = {Chain, Callee, Glue};
5355   EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5356   Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5357                       makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5358   // The glue is the second value produced.
5359   Glue = Chain.getValue(1);
5360 }
5361 
5362 static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5363                                           SDValue &Glue, SDValue &Chain,
5364                                           SDValue CallSeqStart,
5365                                           const CallBase *CB, const SDLoc &dl,
5366                                           bool hasNest,
5367                                           const PPCSubtarget &Subtarget) {
5368   // Function pointers in the 64-bit SVR4 ABI do not point to the function
5369   // entry point, but to the function descriptor (the function entry point
5370   // address is part of the function descriptor though).
5371   // The function descriptor is a three doubleword structure with the
5372   // following fields: function entry point, TOC base address and
5373   // environment pointer.
5374   // Thus for a call through a function pointer, the following actions need
5375   // to be performed:
5376   //   1. Save the TOC of the caller in the TOC save area of its stack
5377   //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5378   //   2. Load the address of the function entry point from the function
5379   //      descriptor.
5380   //   3. Load the TOC of the callee from the function descriptor into r2.
5381   //   4. Load the environment pointer from the function descriptor into
5382   //      r11.
5383   //   5. Branch to the function entry point address.
5384   //   6. On return of the callee, the TOC of the caller needs to be
5385   //      restored (this is done in FinishCall()).
5386   //
5387   // The loads are scheduled at the beginning of the call sequence, and the
5388   // register copies are flagged together to ensure that no other
5389   // operations can be scheduled in between. E.g. without flagging the
5390   // copies together, a TOC access in the caller could be scheduled between
5391   // the assignment of the callee TOC and the branch to the callee, which leads
5392   // to incorrect code.
5393 
5394   // Start by loading the function address from the descriptor.
5395   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5396   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5397                       ? (MachineMemOperand::MODereferenceable |
5398                          MachineMemOperand::MOInvariant)
5399                       : MachineMemOperand::MONone;
5400 
5401   MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5402 
5403   // Registers used in building the DAG.
5404   const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5405   const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5406 
5407   // Offsets of descriptor members.
5408   const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5409   const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5410 
5411   const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5412   const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5413 
5414   // One load for the functions entry point address.
5415   SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5416                                     Alignment, MMOFlags);
5417 
5418   // One for loading the TOC anchor for the module that contains the called
5419   // function.
5420   SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5421   SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5422   SDValue TOCPtr =
5423       DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5424                   MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5425 
5426   // One for loading the environment pointer.
5427   SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5428   SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5429   SDValue LoadEnvPtr =
5430       DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5431                   MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5432 
5433 
5434   // Then copy the newly loaded TOC anchor to the TOC pointer.
5435   SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5436   Chain = TOCVal.getValue(0);
5437   Glue = TOCVal.getValue(1);
5438 
5439   // If the function call has an explicit 'nest' parameter, it takes the
5440   // place of the environment pointer.
5441   assert((!hasNest || !Subtarget.isAIXABI()) &&
5442          "Nest parameter is not supported on AIX.");
5443   if (!hasNest) {
5444     SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5445     Chain = EnvVal.getValue(0);
5446     Glue = EnvVal.getValue(1);
5447   }
5448 
5449   // The rest of the indirect call sequence is the same as the non-descriptor
5450   // DAG.
5451   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5452 }
5453 
5454 static void
5455 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5456                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5457                   SelectionDAG &DAG,
5458                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5459                   SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5460                   const PPCSubtarget &Subtarget) {
5461   const bool IsPPC64 = Subtarget.isPPC64();
5462   // MVT for a general purpose register.
5463   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5464 
5465   // First operand is always the chain.
5466   Ops.push_back(Chain);
5467 
5468   // If it's a direct call pass the callee as the second operand.
5469   if (!CFlags.IsIndirect)
5470     Ops.push_back(Callee);
5471   else {
5472     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5473 
5474     // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5475     // on the stack (this would have been done in `LowerCall_64SVR4` or
5476     // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5477     // represents both the indirect branch and a load that restores the TOC
5478     // pointer from the linkage area. The operand for the TOC restore is an add
5479     // of the TOC save offset to the stack pointer. This must be the second
5480     // operand: after the chain input but before any other variadic arguments.
5481     // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5482     // saved or used.
5483     if (isTOCSaveRestoreRequired(Subtarget)) {
5484       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5485 
5486       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5487       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5488       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5489       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5490       Ops.push_back(AddTOC);
5491     }
5492 
5493     // Add the register used for the environment pointer.
5494     if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5495       Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5496                                     RegVT));
5497 
5498 
5499     // Add CTR register as callee so a bctr can be emitted later.
5500     if (CFlags.IsTailCall)
5501       Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5502   }
5503 
5504   // If this is a tail call add stack pointer delta.
5505   if (CFlags.IsTailCall)
5506     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5507 
5508   // Add argument registers to the end of the list so that they are known live
5509   // into the call.
5510   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5511     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5512                                   RegsToPass[i].second.getValueType()));
5513 
5514   // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5515   // no way to mark dependencies as implicit here.
5516   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5517   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5518        !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5519     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5520 
5521   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5522   if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5523     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5524 
5525   // Add a register mask operand representing the call-preserved registers.
5526   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5527   const uint32_t *Mask =
5528       TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5529   assert(Mask && "Missing call preserved mask for calling convention");
5530   Ops.push_back(DAG.getRegisterMask(Mask));
5531 
5532   // If the glue is valid, it is the last operand.
5533   if (Glue.getNode())
5534     Ops.push_back(Glue);
5535 }
5536 
5537 SDValue PPCTargetLowering::FinishCall(
5538     CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5539     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5540     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5541     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5542     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5543 
5544   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5545       Subtarget.isAIXABI())
5546     setUsesTOCBasePtr(DAG);
5547 
5548   unsigned CallOpc =
5549       getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5550                     Subtarget, DAG.getTarget());
5551 
5552   if (!CFlags.IsIndirect)
5553     Callee = transformCallee(Callee, DAG, dl, Subtarget);
5554   else if (Subtarget.usesFunctionDescriptors())
5555     prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5556                                   dl, CFlags.HasNest, Subtarget);
5557   else
5558     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5559 
5560   // Build the operand list for the call instruction.
5561   SmallVector<SDValue, 8> Ops;
5562   buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5563                     SPDiff, Subtarget);
5564 
5565   // Emit tail call.
5566   if (CFlags.IsTailCall) {
5567     // Indirect tail call when using PC Relative calls do not have the same
5568     // constraints.
5569     assert(((Callee.getOpcode() == ISD::Register &&
5570              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5571             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5572             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5573             isa<ConstantSDNode>(Callee) ||
5574             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5575            "Expecting a global address, external symbol, absolute value, "
5576            "register or an indirect tail call when PC Relative calls are "
5577            "used.");
5578     // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5579     assert(CallOpc == PPCISD::TC_RETURN &&
5580            "Unexpected call opcode for a tail call.");
5581     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5582     return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5583   }
5584 
5585   std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5586   Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5587   Glue = Chain.getValue(1);
5588 
5589   // When performing tail call optimization the callee pops its arguments off
5590   // the stack. Account for this here so these bytes can be pushed back on in
5591   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5592   int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5593                          getTargetMachine().Options.GuaranteedTailCallOpt)
5594                             ? NumBytes
5595                             : 0;
5596 
5597   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5598                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5599                              Glue, dl);
5600   Glue = Chain.getValue(1);
5601 
5602   return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5603                          DAG, InVals);
5604 }
5605 
5606 SDValue
5607 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5608                              SmallVectorImpl<SDValue> &InVals) const {
5609   SelectionDAG &DAG                     = CLI.DAG;
5610   SDLoc &dl                             = CLI.DL;
5611   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5612   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5613   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5614   SDValue Chain                         = CLI.Chain;
5615   SDValue Callee                        = CLI.Callee;
5616   bool &isTailCall                      = CLI.IsTailCall;
5617   CallingConv::ID CallConv              = CLI.CallConv;
5618   bool isVarArg                         = CLI.IsVarArg;
5619   bool isPatchPoint                     = CLI.IsPatchPoint;
5620   const CallBase *CB                    = CLI.CB;
5621 
5622   if (isTailCall) {
5623     if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5624       isTailCall = false;
5625     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5626       isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5627           Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5628     else
5629       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5630                                                      Ins, DAG);
5631     if (isTailCall) {
5632       ++NumTailCalls;
5633       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5634         ++NumSiblingCalls;
5635 
5636       // PC Relative calls no longer guarantee that the callee is a Global
5637       // Address Node. The callee could be an indirect tail call in which
5638       // case the SDValue for the callee could be a load (to load the address
5639       // of a function pointer) or it may be a register copy (to move the
5640       // address of the callee from a function parameter into a virtual
5641       // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5642       assert((Subtarget.isUsingPCRelativeCalls() ||
5643               isa<GlobalAddressSDNode>(Callee)) &&
5644              "Callee should be an llvm::Function object.");
5645 
5646       LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5647                         << "\nTCO callee: ");
5648       LLVM_DEBUG(Callee.dump());
5649     }
5650   }
5651 
5652   if (!isTailCall && CB && CB->isMustTailCall())
5653     report_fatal_error("failed to perform tail call elimination on a call "
5654                        "site marked musttail");
5655 
5656   // When long calls (i.e. indirect calls) are always used, calls are always
5657   // made via function pointer. If we have a function name, first translate it
5658   // into a pointer.
5659   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5660       !isTailCall)
5661     Callee = LowerGlobalAddress(Callee, DAG);
5662 
5663   CallFlags CFlags(
5664       CallConv, isTailCall, isVarArg, isPatchPoint,
5665       isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5666       // hasNest
5667       Subtarget.is64BitELFABI() &&
5668           any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }));
5669 
5670   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5671     return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5672                             InVals, CB);
5673 
5674   if (Subtarget.isSVR4ABI())
5675     return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5676                             InVals, CB);
5677 
5678   if (Subtarget.isAIXABI())
5679     return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5680                          InVals, CB);
5681 
5682   return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5683                           InVals, CB);
5684 }
5685 
5686 SDValue PPCTargetLowering::LowerCall_32SVR4(
5687     SDValue Chain, SDValue Callee, CallFlags CFlags,
5688     const SmallVectorImpl<ISD::OutputArg> &Outs,
5689     const SmallVectorImpl<SDValue> &OutVals,
5690     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5691     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5692     const CallBase *CB) const {
5693   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5694   // of the 32-bit SVR4 ABI stack frame layout.
5695 
5696   const CallingConv::ID CallConv = CFlags.CallConv;
5697   const bool IsVarArg = CFlags.IsVarArg;
5698   const bool IsTailCall = CFlags.IsTailCall;
5699 
5700   assert((CallConv == CallingConv::C ||
5701           CallConv == CallingConv::Cold ||
5702           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5703 
5704   unsigned PtrByteSize = 4;
5705 
5706   MachineFunction &MF = DAG.getMachineFunction();
5707 
5708   // Mark this function as potentially containing a function that contains a
5709   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5710   // and restoring the callers stack pointer in this functions epilog. This is
5711   // done because by tail calling the called function might overwrite the value
5712   // in this function's (MF) stack pointer stack slot 0(SP).
5713   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5714       CallConv == CallingConv::Fast)
5715     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5716 
5717   // Count how many bytes are to be pushed on the stack, including the linkage
5718   // area, parameter list area and the part of the local variable space which
5719   // contains copies of aggregates which are passed by value.
5720 
5721   // Assign locations to all of the outgoing arguments.
5722   SmallVector<CCValAssign, 16> ArgLocs;
5723   PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5724 
5725   // Reserve space for the linkage area on the stack.
5726   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5727                        PtrByteSize);
5728   if (useSoftFloat())
5729     CCInfo.PreAnalyzeCallOperands(Outs);
5730 
5731   if (IsVarArg) {
5732     // Handle fixed and variable vector arguments differently.
5733     // Fixed vector arguments go into registers as long as registers are
5734     // available. Variable vector arguments always go into memory.
5735     unsigned NumArgs = Outs.size();
5736 
5737     for (unsigned i = 0; i != NumArgs; ++i) {
5738       MVT ArgVT = Outs[i].VT;
5739       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5740       bool Result;
5741 
5742       if (Outs[i].IsFixed) {
5743         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5744                                CCInfo);
5745       } else {
5746         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5747                                       ArgFlags, CCInfo);
5748       }
5749 
5750       if (Result) {
5751 #ifndef NDEBUG
5752         errs() << "Call operand #" << i << " has unhandled type "
5753              << EVT(ArgVT).getEVTString() << "\n";
5754 #endif
5755         llvm_unreachable(nullptr);
5756       }
5757     }
5758   } else {
5759     // All arguments are treated the same.
5760     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5761   }
5762   CCInfo.clearWasPPCF128();
5763 
5764   // Assign locations to all of the outgoing aggregate by value arguments.
5765   SmallVector<CCValAssign, 16> ByValArgLocs;
5766   CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5767 
5768   // Reserve stack space for the allocations in CCInfo.
5769   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5770 
5771   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5772 
5773   // Size of the linkage area, parameter list area and the part of the local
5774   // space variable where copies of aggregates which are passed by value are
5775   // stored.
5776   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5777 
5778   // Calculate by how many bytes the stack has to be adjusted in case of tail
5779   // call optimization.
5780   int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5781 
5782   // Adjust the stack pointer for the new arguments...
5783   // These operations are automatically eliminated by the prolog/epilog pass
5784   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5785   SDValue CallSeqStart = Chain;
5786 
5787   // Load the return address and frame pointer so it can be moved somewhere else
5788   // later.
5789   SDValue LROp, FPOp;
5790   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5791 
5792   // Set up a copy of the stack pointer for use loading and storing any
5793   // arguments that may not fit in the registers available for argument
5794   // passing.
5795   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5796 
5797   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5798   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5799   SmallVector<SDValue, 8> MemOpChains;
5800 
5801   bool seenFloatArg = false;
5802   // Walk the register/memloc assignments, inserting copies/loads.
5803   // i - Tracks the index into the list of registers allocated for the call
5804   // RealArgIdx - Tracks the index into the list of actual function arguments
5805   // j - Tracks the index into the list of byval arguments
5806   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5807        i != e;
5808        ++i, ++RealArgIdx) {
5809     CCValAssign &VA = ArgLocs[i];
5810     SDValue Arg = OutVals[RealArgIdx];
5811     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5812 
5813     if (Flags.isByVal()) {
5814       // Argument is an aggregate which is passed by value, thus we need to
5815       // create a copy of it in the local variable space of the current stack
5816       // frame (which is the stack frame of the caller) and pass the address of
5817       // this copy to the callee.
5818       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5819       CCValAssign &ByValVA = ByValArgLocs[j++];
5820       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5821 
5822       // Memory reserved in the local variable space of the callers stack frame.
5823       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5824 
5825       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5826       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5827                            StackPtr, PtrOff);
5828 
5829       // Create a copy of the argument in the local area of the current
5830       // stack frame.
5831       SDValue MemcpyCall =
5832         CreateCopyOfByValArgument(Arg, PtrOff,
5833                                   CallSeqStart.getNode()->getOperand(0),
5834                                   Flags, DAG, dl);
5835 
5836       // This must go outside the CALLSEQ_START..END.
5837       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5838                                                      SDLoc(MemcpyCall));
5839       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5840                              NewCallSeqStart.getNode());
5841       Chain = CallSeqStart = NewCallSeqStart;
5842 
5843       // Pass the address of the aggregate copy on the stack either in a
5844       // physical register or in the parameter list area of the current stack
5845       // frame to the callee.
5846       Arg = PtrOff;
5847     }
5848 
5849     // When useCRBits() is true, there can be i1 arguments.
5850     // It is because getRegisterType(MVT::i1) => MVT::i1,
5851     // and for other integer types getRegisterType() => MVT::i32.
5852     // Extend i1 and ensure callee will get i32.
5853     if (Arg.getValueType() == MVT::i1)
5854       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5855                         dl, MVT::i32, Arg);
5856 
5857     if (VA.isRegLoc()) {
5858       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5859       // Put argument in a physical register.
5860       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5861         bool IsLE = Subtarget.isLittleEndian();
5862         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5863                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5864         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5865         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5866                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5867         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5868                              SVal.getValue(0)));
5869       } else
5870         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5871     } else {
5872       // Put argument in the parameter list area of the current stack frame.
5873       assert(VA.isMemLoc());
5874       unsigned LocMemOffset = VA.getLocMemOffset();
5875 
5876       if (!IsTailCall) {
5877         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5878         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5879                              StackPtr, PtrOff);
5880 
5881         MemOpChains.push_back(
5882             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5883       } else {
5884         // Calculate and remember argument location.
5885         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5886                                  TailCallArguments);
5887       }
5888     }
5889   }
5890 
5891   if (!MemOpChains.empty())
5892     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5893 
5894   // Build a sequence of copy-to-reg nodes chained together with token chain
5895   // and flag operands which copy the outgoing args into the appropriate regs.
5896   SDValue InFlag;
5897   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5898     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5899                              RegsToPass[i].second, InFlag);
5900     InFlag = Chain.getValue(1);
5901   }
5902 
5903   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5904   // registers.
5905   if (IsVarArg) {
5906     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5907     SDValue Ops[] = { Chain, InFlag };
5908 
5909     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5910                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5911 
5912     InFlag = Chain.getValue(1);
5913   }
5914 
5915   if (IsTailCall)
5916     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5917                     TailCallArguments);
5918 
5919   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5920                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
5921 }
5922 
5923 // Copy an argument into memory, being careful to do this outside the
5924 // call sequence for the call to which the argument belongs.
5925 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5926     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5927     SelectionDAG &DAG, const SDLoc &dl) const {
5928   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5929                         CallSeqStart.getNode()->getOperand(0),
5930                         Flags, DAG, dl);
5931   // The MEMCPY must go outside the CALLSEQ_START..END.
5932   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5933   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5934                                                  SDLoc(MemcpyCall));
5935   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5936                          NewCallSeqStart.getNode());
5937   return NewCallSeqStart;
5938 }
5939 
5940 SDValue PPCTargetLowering::LowerCall_64SVR4(
5941     SDValue Chain, SDValue Callee, CallFlags CFlags,
5942     const SmallVectorImpl<ISD::OutputArg> &Outs,
5943     const SmallVectorImpl<SDValue> &OutVals,
5944     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5945     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5946     const CallBase *CB) const {
5947   bool isELFv2ABI = Subtarget.isELFv2ABI();
5948   bool isLittleEndian = Subtarget.isLittleEndian();
5949   unsigned NumOps = Outs.size();
5950   bool IsSibCall = false;
5951   bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5952 
5953   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5954   unsigned PtrByteSize = 8;
5955 
5956   MachineFunction &MF = DAG.getMachineFunction();
5957 
5958   if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5959     IsSibCall = true;
5960 
5961   // Mark this function as potentially containing a function that contains a
5962   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5963   // and restoring the callers stack pointer in this functions epilog. This is
5964   // done because by tail calling the called function might overwrite the value
5965   // in this function's (MF) stack pointer stack slot 0(SP).
5966   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5967     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5968 
5969   assert(!(IsFastCall && CFlags.IsVarArg) &&
5970          "fastcc not supported on varargs functions");
5971 
5972   // Count how many bytes are to be pushed on the stack, including the linkage
5973   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5974   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5975   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5976   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5977   unsigned NumBytes = LinkageSize;
5978   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5979   unsigned &QFPR_idx = FPR_idx;
5980 
5981   static const MCPhysReg GPR[] = {
5982     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5983     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5984   };
5985   static const MCPhysReg VR[] = {
5986     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5987     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5988   };
5989 
5990   const unsigned NumGPRs = array_lengthof(GPR);
5991   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5992   const unsigned NumVRs  = array_lengthof(VR);
5993   const unsigned NumQFPRs = NumFPRs;
5994 
5995   // On ELFv2, we can avoid allocating the parameter area if all the arguments
5996   // can be passed to the callee in registers.
5997   // For the fast calling convention, there is another check below.
5998   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5999   bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6000   if (!HasParameterArea) {
6001     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6002     unsigned AvailableFPRs = NumFPRs;
6003     unsigned AvailableVRs = NumVRs;
6004     unsigned NumBytesTmp = NumBytes;
6005     for (unsigned i = 0; i != NumOps; ++i) {
6006       if (Outs[i].Flags.isNest()) continue;
6007       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6008                                 PtrByteSize, LinkageSize, ParamAreaSize,
6009                                 NumBytesTmp, AvailableFPRs, AvailableVRs,
6010                                 Subtarget.hasQPX()))
6011         HasParameterArea = true;
6012     }
6013   }
6014 
6015   // When using the fast calling convention, we don't provide backing for
6016   // arguments that will be in registers.
6017   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6018 
6019   // Avoid allocating parameter area for fastcc functions if all the arguments
6020   // can be passed in the registers.
6021   if (IsFastCall)
6022     HasParameterArea = false;
6023 
6024   // Add up all the space actually used.
6025   for (unsigned i = 0; i != NumOps; ++i) {
6026     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6027     EVT ArgVT = Outs[i].VT;
6028     EVT OrigVT = Outs[i].ArgVT;
6029 
6030     if (Flags.isNest())
6031       continue;
6032 
6033     if (IsFastCall) {
6034       if (Flags.isByVal()) {
6035         NumGPRsUsed += (Flags.getByValSize()+7)/8;
6036         if (NumGPRsUsed > NumGPRs)
6037           HasParameterArea = true;
6038       } else {
6039         switch (ArgVT.getSimpleVT().SimpleTy) {
6040         default: llvm_unreachable("Unexpected ValueType for argument!");
6041         case MVT::i1:
6042         case MVT::i32:
6043         case MVT::i64:
6044           if (++NumGPRsUsed <= NumGPRs)
6045             continue;
6046           break;
6047         case MVT::v4i32:
6048         case MVT::v8i16:
6049         case MVT::v16i8:
6050         case MVT::v2f64:
6051         case MVT::v2i64:
6052         case MVT::v1i128:
6053         case MVT::f128:
6054           if (++NumVRsUsed <= NumVRs)
6055             continue;
6056           break;
6057         case MVT::v4f32:
6058           // When using QPX, this is handled like a FP register, otherwise, it
6059           // is an Altivec register.
6060           if (Subtarget.hasQPX()) {
6061             if (++NumFPRsUsed <= NumFPRs)
6062               continue;
6063           } else {
6064             if (++NumVRsUsed <= NumVRs)
6065               continue;
6066           }
6067           break;
6068         case MVT::f32:
6069         case MVT::f64:
6070         case MVT::v4f64: // QPX
6071         case MVT::v4i1:  // QPX
6072           if (++NumFPRsUsed <= NumFPRs)
6073             continue;
6074           break;
6075         }
6076         HasParameterArea = true;
6077       }
6078     }
6079 
6080     /* Respect alignment of argument on the stack.  */
6081     auto Alignement =
6082         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6083     NumBytes = alignTo(NumBytes, Alignement);
6084 
6085     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6086     if (Flags.isInConsecutiveRegsLast())
6087       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6088   }
6089 
6090   unsigned NumBytesActuallyUsed = NumBytes;
6091 
6092   // In the old ELFv1 ABI,
6093   // the prolog code of the callee may store up to 8 GPR argument registers to
6094   // the stack, allowing va_start to index over them in memory if its varargs.
6095   // Because we cannot tell if this is needed on the caller side, we have to
6096   // conservatively assume that it is needed.  As such, make sure we have at
6097   // least enough stack space for the caller to store the 8 GPRs.
6098   // In the ELFv2 ABI, we allocate the parameter area iff a callee
6099   // really requires memory operands, e.g. a vararg function.
6100   if (HasParameterArea)
6101     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6102   else
6103     NumBytes = LinkageSize;
6104 
6105   // Tail call needs the stack to be aligned.
6106   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6107     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6108 
6109   int SPDiff = 0;
6110 
6111   // Calculate by how many bytes the stack has to be adjusted in case of tail
6112   // call optimization.
6113   if (!IsSibCall)
6114     SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6115 
6116   // To protect arguments on the stack from being clobbered in a tail call,
6117   // force all the loads to happen before doing any other lowering.
6118   if (CFlags.IsTailCall)
6119     Chain = DAG.getStackArgumentTokenFactor(Chain);
6120 
6121   // Adjust the stack pointer for the new arguments...
6122   // These operations are automatically eliminated by the prolog/epilog pass
6123   if (!IsSibCall)
6124     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6125   SDValue CallSeqStart = Chain;
6126 
6127   // Load the return address and frame pointer so it can be move somewhere else
6128   // later.
6129   SDValue LROp, FPOp;
6130   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6131 
6132   // Set up a copy of the stack pointer for use loading and storing any
6133   // arguments that may not fit in the registers available for argument
6134   // passing.
6135   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6136 
6137   // Figure out which arguments are going to go in registers, and which in
6138   // memory.  Also, if this is a vararg function, floating point operations
6139   // must be stored to our stack, and loaded into integer regs as well, if
6140   // any integer regs are available for argument passing.
6141   unsigned ArgOffset = LinkageSize;
6142 
6143   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6144   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6145 
6146   SmallVector<SDValue, 8> MemOpChains;
6147   for (unsigned i = 0; i != NumOps; ++i) {
6148     SDValue Arg = OutVals[i];
6149     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6150     EVT ArgVT = Outs[i].VT;
6151     EVT OrigVT = Outs[i].ArgVT;
6152 
6153     // PtrOff will be used to store the current argument to the stack if a
6154     // register cannot be found for it.
6155     SDValue PtrOff;
6156 
6157     // We re-align the argument offset for each argument, except when using the
6158     // fast calling convention, when we need to make sure we do that only when
6159     // we'll actually use a stack slot.
6160     auto ComputePtrOff = [&]() {
6161       /* Respect alignment of argument on the stack.  */
6162       auto Alignment =
6163           CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6164       ArgOffset = alignTo(ArgOffset, Alignment);
6165 
6166       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6167 
6168       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6169     };
6170 
6171     if (!IsFastCall) {
6172       ComputePtrOff();
6173 
6174       /* Compute GPR index associated with argument offset.  */
6175       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6176       GPR_idx = std::min(GPR_idx, NumGPRs);
6177     }
6178 
6179     // Promote integers to 64-bit values.
6180     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6181       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6182       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6183       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6184     }
6185 
6186     // FIXME memcpy is used way more than necessary.  Correctness first.
6187     // Note: "by value" is code for passing a structure by value, not
6188     // basic types.
6189     if (Flags.isByVal()) {
6190       // Note: Size includes alignment padding, so
6191       //   struct x { short a; char b; }
6192       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
6193       // These are the proper values we need for right-justifying the
6194       // aggregate in a parameter register.
6195       unsigned Size = Flags.getByValSize();
6196 
6197       // An empty aggregate parameter takes up no storage and no
6198       // registers.
6199       if (Size == 0)
6200         continue;
6201 
6202       if (IsFastCall)
6203         ComputePtrOff();
6204 
6205       // All aggregates smaller than 8 bytes must be passed right-justified.
6206       if (Size==1 || Size==2 || Size==4) {
6207         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6208         if (GPR_idx != NumGPRs) {
6209           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6210                                         MachinePointerInfo(), VT);
6211           MemOpChains.push_back(Load.getValue(1));
6212           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6213 
6214           ArgOffset += PtrByteSize;
6215           continue;
6216         }
6217       }
6218 
6219       if (GPR_idx == NumGPRs && Size < 8) {
6220         SDValue AddPtr = PtrOff;
6221         if (!isLittleEndian) {
6222           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6223                                           PtrOff.getValueType());
6224           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6225         }
6226         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6227                                                           CallSeqStart,
6228                                                           Flags, DAG, dl);
6229         ArgOffset += PtrByteSize;
6230         continue;
6231       }
6232       // Copy entire object into memory.  There are cases where gcc-generated
6233       // code assumes it is there, even if it could be put entirely into
6234       // registers.  (This is not what the doc says.)
6235 
6236       // FIXME: The above statement is likely due to a misunderstanding of the
6237       // documents.  All arguments must be copied into the parameter area BY
6238       // THE CALLEE in the event that the callee takes the address of any
6239       // formal argument.  That has not yet been implemented.  However, it is
6240       // reasonable to use the stack area as a staging area for the register
6241       // load.
6242 
6243       // Skip this for small aggregates, as we will use the same slot for a
6244       // right-justified copy, below.
6245       if (Size >= 8)
6246         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6247                                                           CallSeqStart,
6248                                                           Flags, DAG, dl);
6249 
6250       // When a register is available, pass a small aggregate right-justified.
6251       if (Size < 8 && GPR_idx != NumGPRs) {
6252         // The easiest way to get this right-justified in a register
6253         // is to copy the structure into the rightmost portion of a
6254         // local variable slot, then load the whole slot into the
6255         // register.
6256         // FIXME: The memcpy seems to produce pretty awful code for
6257         // small aggregates, particularly for packed ones.
6258         // FIXME: It would be preferable to use the slot in the
6259         // parameter save area instead of a new local variable.
6260         SDValue AddPtr = PtrOff;
6261         if (!isLittleEndian) {
6262           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6263           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6264         }
6265         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6266                                                           CallSeqStart,
6267                                                           Flags, DAG, dl);
6268 
6269         // Load the slot into the register.
6270         SDValue Load =
6271             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6272         MemOpChains.push_back(Load.getValue(1));
6273         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6274 
6275         // Done with this argument.
6276         ArgOffset += PtrByteSize;
6277         continue;
6278       }
6279 
6280       // For aggregates larger than PtrByteSize, copy the pieces of the
6281       // object that fit into registers from the parameter save area.
6282       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6283         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6284         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6285         if (GPR_idx != NumGPRs) {
6286           SDValue Load =
6287               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6288           MemOpChains.push_back(Load.getValue(1));
6289           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6290           ArgOffset += PtrByteSize;
6291         } else {
6292           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6293           break;
6294         }
6295       }
6296       continue;
6297     }
6298 
6299     switch (Arg.getSimpleValueType().SimpleTy) {
6300     default: llvm_unreachable("Unexpected ValueType for argument!");
6301     case MVT::i1:
6302     case MVT::i32:
6303     case MVT::i64:
6304       if (Flags.isNest()) {
6305         // The 'nest' parameter, if any, is passed in R11.
6306         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6307         break;
6308       }
6309 
6310       // These can be scalar arguments or elements of an integer array type
6311       // passed directly.  Clang may use those instead of "byval" aggregate
6312       // types to avoid forcing arguments to memory unnecessarily.
6313       if (GPR_idx != NumGPRs) {
6314         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6315       } else {
6316         if (IsFastCall)
6317           ComputePtrOff();
6318 
6319         assert(HasParameterArea &&
6320                "Parameter area must exist to pass an argument in memory.");
6321         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6322                          true, CFlags.IsTailCall, false, MemOpChains,
6323                          TailCallArguments, dl);
6324         if (IsFastCall)
6325           ArgOffset += PtrByteSize;
6326       }
6327       if (!IsFastCall)
6328         ArgOffset += PtrByteSize;
6329       break;
6330     case MVT::f32:
6331     case MVT::f64: {
6332       // These can be scalar arguments or elements of a float array type
6333       // passed directly.  The latter are used to implement ELFv2 homogenous
6334       // float aggregates.
6335 
6336       // Named arguments go into FPRs first, and once they overflow, the
6337       // remaining arguments go into GPRs and then the parameter save area.
6338       // Unnamed arguments for vararg functions always go to GPRs and
6339       // then the parameter save area.  For now, put all arguments to vararg
6340       // routines always in both locations (FPR *and* GPR or stack slot).
6341       bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6342       bool NeededLoad = false;
6343 
6344       // First load the argument into the next available FPR.
6345       if (FPR_idx != NumFPRs)
6346         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6347 
6348       // Next, load the argument into GPR or stack slot if needed.
6349       if (!NeedGPROrStack)
6350         ;
6351       else if (GPR_idx != NumGPRs && !IsFastCall) {
6352         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6353         // once we support fp <-> gpr moves.
6354 
6355         // In the non-vararg case, this can only ever happen in the
6356         // presence of f32 array types, since otherwise we never run
6357         // out of FPRs before running out of GPRs.
6358         SDValue ArgVal;
6359 
6360         // Double values are always passed in a single GPR.
6361         if (Arg.getValueType() != MVT::f32) {
6362           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6363 
6364         // Non-array float values are extended and passed in a GPR.
6365         } else if (!Flags.isInConsecutiveRegs()) {
6366           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6367           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6368 
6369         // If we have an array of floats, we collect every odd element
6370         // together with its predecessor into one GPR.
6371         } else if (ArgOffset % PtrByteSize != 0) {
6372           SDValue Lo, Hi;
6373           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6374           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6375           if (!isLittleEndian)
6376             std::swap(Lo, Hi);
6377           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6378 
6379         // The final element, if even, goes into the first half of a GPR.
6380         } else if (Flags.isInConsecutiveRegsLast()) {
6381           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6382           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6383           if (!isLittleEndian)
6384             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6385                                  DAG.getConstant(32, dl, MVT::i32));
6386 
6387         // Non-final even elements are skipped; they will be handled
6388         // together the with subsequent argument on the next go-around.
6389         } else
6390           ArgVal = SDValue();
6391 
6392         if (ArgVal.getNode())
6393           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6394       } else {
6395         if (IsFastCall)
6396           ComputePtrOff();
6397 
6398         // Single-precision floating-point values are mapped to the
6399         // second (rightmost) word of the stack doubleword.
6400         if (Arg.getValueType() == MVT::f32 &&
6401             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6402           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6403           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6404         }
6405 
6406         assert(HasParameterArea &&
6407                "Parameter area must exist to pass an argument in memory.");
6408         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6409                          true, CFlags.IsTailCall, false, MemOpChains,
6410                          TailCallArguments, dl);
6411 
6412         NeededLoad = true;
6413       }
6414       // When passing an array of floats, the array occupies consecutive
6415       // space in the argument area; only round up to the next doubleword
6416       // at the end of the array.  Otherwise, each float takes 8 bytes.
6417       if (!IsFastCall || NeededLoad) {
6418         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6419                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6420         if (Flags.isInConsecutiveRegsLast())
6421           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6422       }
6423       break;
6424     }
6425     case MVT::v4f32:
6426     case MVT::v4i32:
6427     case MVT::v8i16:
6428     case MVT::v16i8:
6429     case MVT::v2f64:
6430     case MVT::v2i64:
6431     case MVT::v1i128:
6432     case MVT::f128:
6433       if (!Subtarget.hasQPX()) {
6434       // These can be scalar arguments or elements of a vector array type
6435       // passed directly.  The latter are used to implement ELFv2 homogenous
6436       // vector aggregates.
6437 
6438       // For a varargs call, named arguments go into VRs or on the stack as
6439       // usual; unnamed arguments always go to the stack or the corresponding
6440       // GPRs when within range.  For now, we always put the value in both
6441       // locations (or even all three).
6442       if (CFlags.IsVarArg) {
6443         assert(HasParameterArea &&
6444                "Parameter area must exist if we have a varargs call.");
6445         // We could elide this store in the case where the object fits
6446         // entirely in R registers.  Maybe later.
6447         SDValue Store =
6448             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6449         MemOpChains.push_back(Store);
6450         if (VR_idx != NumVRs) {
6451           SDValue Load =
6452               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6453           MemOpChains.push_back(Load.getValue(1));
6454           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6455         }
6456         ArgOffset += 16;
6457         for (unsigned i=0; i<16; i+=PtrByteSize) {
6458           if (GPR_idx == NumGPRs)
6459             break;
6460           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6461                                    DAG.getConstant(i, dl, PtrVT));
6462           SDValue Load =
6463               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6464           MemOpChains.push_back(Load.getValue(1));
6465           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6466         }
6467         break;
6468       }
6469 
6470       // Non-varargs Altivec params go into VRs or on the stack.
6471       if (VR_idx != NumVRs) {
6472         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6473       } else {
6474         if (IsFastCall)
6475           ComputePtrOff();
6476 
6477         assert(HasParameterArea &&
6478                "Parameter area must exist to pass an argument in memory.");
6479         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6480                          true, CFlags.IsTailCall, true, MemOpChains,
6481                          TailCallArguments, dl);
6482         if (IsFastCall)
6483           ArgOffset += 16;
6484       }
6485 
6486       if (!IsFastCall)
6487         ArgOffset += 16;
6488       break;
6489       } // not QPX
6490 
6491       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
6492              "Invalid QPX parameter type");
6493 
6494       LLVM_FALLTHROUGH;
6495     case MVT::v4f64:
6496     case MVT::v4i1: {
6497       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
6498       if (CFlags.IsVarArg) {
6499         assert(HasParameterArea &&
6500                "Parameter area must exist if we have a varargs call.");
6501         // We could elide this store in the case where the object fits
6502         // entirely in R registers.  Maybe later.
6503         SDValue Store =
6504             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6505         MemOpChains.push_back(Store);
6506         if (QFPR_idx != NumQFPRs) {
6507           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
6508                                      PtrOff, MachinePointerInfo());
6509           MemOpChains.push_back(Load.getValue(1));
6510           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
6511         }
6512         ArgOffset += (IsF32 ? 16 : 32);
6513         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
6514           if (GPR_idx == NumGPRs)
6515             break;
6516           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6517                                    DAG.getConstant(i, dl, PtrVT));
6518           SDValue Load =
6519               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6520           MemOpChains.push_back(Load.getValue(1));
6521           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6522         }
6523         break;
6524       }
6525 
6526       // Non-varargs QPX params go into registers or on the stack.
6527       if (QFPR_idx != NumQFPRs) {
6528         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
6529       } else {
6530         if (IsFastCall)
6531           ComputePtrOff();
6532 
6533         assert(HasParameterArea &&
6534                "Parameter area must exist to pass an argument in memory.");
6535         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6536                          true, CFlags.IsTailCall, true, MemOpChains,
6537                          TailCallArguments, dl);
6538         if (IsFastCall)
6539           ArgOffset += (IsF32 ? 16 : 32);
6540       }
6541 
6542       if (!IsFastCall)
6543         ArgOffset += (IsF32 ? 16 : 32);
6544       break;
6545       }
6546     }
6547   }
6548 
6549   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6550          "mismatch in size of parameter area");
6551   (void)NumBytesActuallyUsed;
6552 
6553   if (!MemOpChains.empty())
6554     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6555 
6556   // Check if this is an indirect call (MTCTR/BCTRL).
6557   // See prepareDescriptorIndirectCall and buildCallOperands for more
6558   // information about calls through function pointers in the 64-bit SVR4 ABI.
6559   if (CFlags.IsIndirect) {
6560     // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6561     // caller in the TOC save area.
6562     if (isTOCSaveRestoreRequired(Subtarget)) {
6563       assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6564       // Load r2 into a virtual register and store it to the TOC save area.
6565       setUsesTOCBasePtr(DAG);
6566       SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6567       // TOC save area offset.
6568       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6569       SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6570       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6571       Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6572                            MachinePointerInfo::getStack(
6573                                DAG.getMachineFunction(), TOCSaveOffset));
6574     }
6575     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6576     // This does not mean the MTCTR instruction must use R12; it's easier
6577     // to model this as an extra parameter, so do that.
6578     if (isELFv2ABI && !CFlags.IsPatchPoint)
6579       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6580   }
6581 
6582   // Build a sequence of copy-to-reg nodes chained together with token chain
6583   // and flag operands which copy the outgoing args into the appropriate regs.
6584   SDValue InFlag;
6585   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6586     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6587                              RegsToPass[i].second, InFlag);
6588     InFlag = Chain.getValue(1);
6589   }
6590 
6591   if (CFlags.IsTailCall && !IsSibCall)
6592     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6593                     TailCallArguments);
6594 
6595   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6596                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6597 }
6598 
6599 SDValue PPCTargetLowering::LowerCall_Darwin(
6600     SDValue Chain, SDValue Callee, CallFlags CFlags,
6601     const SmallVectorImpl<ISD::OutputArg> &Outs,
6602     const SmallVectorImpl<SDValue> &OutVals,
6603     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6604     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6605     const CallBase *CB) const {
6606   unsigned NumOps = Outs.size();
6607 
6608   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6609   bool isPPC64 = PtrVT == MVT::i64;
6610   unsigned PtrByteSize = isPPC64 ? 8 : 4;
6611 
6612   MachineFunction &MF = DAG.getMachineFunction();
6613 
6614   // Mark this function as potentially containing a function that contains a
6615   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6616   // and restoring the callers stack pointer in this functions epilog. This is
6617   // done because by tail calling the called function might overwrite the value
6618   // in this function's (MF) stack pointer stack slot 0(SP).
6619   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6620       CFlags.CallConv == CallingConv::Fast)
6621     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6622 
6623   // Count how many bytes are to be pushed on the stack, including the linkage
6624   // area, and parameter passing area.  We start with 24/48 bytes, which is
6625   // prereserved space for [SP][CR][LR][3 x unused].
6626   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6627   unsigned NumBytes = LinkageSize;
6628 
6629   // Add up all the space actually used.
6630   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6631   // they all go in registers, but we must reserve stack space for them for
6632   // possible use by the caller.  In varargs or 64-bit calls, parameters are
6633   // assigned stack space in order, with padding so Altivec parameters are
6634   // 16-byte aligned.
6635   unsigned nAltivecParamsAtEnd = 0;
6636   for (unsigned i = 0; i != NumOps; ++i) {
6637     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6638     EVT ArgVT = Outs[i].VT;
6639     // Varargs Altivec parameters are padded to a 16 byte boundary.
6640     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6641         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6642         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6643       if (!CFlags.IsVarArg && !isPPC64) {
6644         // Non-varargs Altivec parameters go after all the non-Altivec
6645         // parameters; handle those later so we know how much padding we need.
6646         nAltivecParamsAtEnd++;
6647         continue;
6648       }
6649       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6650       NumBytes = ((NumBytes+15)/16)*16;
6651     }
6652     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6653   }
6654 
6655   // Allow for Altivec parameters at the end, if needed.
6656   if (nAltivecParamsAtEnd) {
6657     NumBytes = ((NumBytes+15)/16)*16;
6658     NumBytes += 16*nAltivecParamsAtEnd;
6659   }
6660 
6661   // The prolog code of the callee may store up to 8 GPR argument registers to
6662   // the stack, allowing va_start to index over them in memory if its varargs.
6663   // Because we cannot tell if this is needed on the caller side, we have to
6664   // conservatively assume that it is needed.  As such, make sure we have at
6665   // least enough stack space for the caller to store the 8 GPRs.
6666   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6667 
6668   // Tail call needs the stack to be aligned.
6669   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6670       CFlags.CallConv == CallingConv::Fast)
6671     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6672 
6673   // Calculate by how many bytes the stack has to be adjusted in case of tail
6674   // call optimization.
6675   int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6676 
6677   // To protect arguments on the stack from being clobbered in a tail call,
6678   // force all the loads to happen before doing any other lowering.
6679   if (CFlags.IsTailCall)
6680     Chain = DAG.getStackArgumentTokenFactor(Chain);
6681 
6682   // Adjust the stack pointer for the new arguments...
6683   // These operations are automatically eliminated by the prolog/epilog pass
6684   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6685   SDValue CallSeqStart = Chain;
6686 
6687   // Load the return address and frame pointer so it can be move somewhere else
6688   // later.
6689   SDValue LROp, FPOp;
6690   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6691 
6692   // Set up a copy of the stack pointer for use loading and storing any
6693   // arguments that may not fit in the registers available for argument
6694   // passing.
6695   SDValue StackPtr;
6696   if (isPPC64)
6697     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6698   else
6699     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6700 
6701   // Figure out which arguments are going to go in registers, and which in
6702   // memory.  Also, if this is a vararg function, floating point operations
6703   // must be stored to our stack, and loaded into integer regs as well, if
6704   // any integer regs are available for argument passing.
6705   unsigned ArgOffset = LinkageSize;
6706   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6707 
6708   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6709     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6710     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6711   };
6712   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6713     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6714     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6715   };
6716   static const MCPhysReg VR[] = {
6717     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6718     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6719   };
6720   const unsigned NumGPRs = array_lengthof(GPR_32);
6721   const unsigned NumFPRs = 13;
6722   const unsigned NumVRs  = array_lengthof(VR);
6723 
6724   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6725 
6726   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6727   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6728 
6729   SmallVector<SDValue, 8> MemOpChains;
6730   for (unsigned i = 0; i != NumOps; ++i) {
6731     SDValue Arg = OutVals[i];
6732     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6733 
6734     // PtrOff will be used to store the current argument to the stack if a
6735     // register cannot be found for it.
6736     SDValue PtrOff;
6737 
6738     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6739 
6740     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6741 
6742     // On PPC64, promote integers to 64-bit values.
6743     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6744       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6745       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6746       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6747     }
6748 
6749     // FIXME memcpy is used way more than necessary.  Correctness first.
6750     // Note: "by value" is code for passing a structure by value, not
6751     // basic types.
6752     if (Flags.isByVal()) {
6753       unsigned Size = Flags.getByValSize();
6754       // Very small objects are passed right-justified.  Everything else is
6755       // passed left-justified.
6756       if (Size==1 || Size==2) {
6757         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6758         if (GPR_idx != NumGPRs) {
6759           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6760                                         MachinePointerInfo(), VT);
6761           MemOpChains.push_back(Load.getValue(1));
6762           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6763 
6764           ArgOffset += PtrByteSize;
6765         } else {
6766           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6767                                           PtrOff.getValueType());
6768           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6769           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6770                                                             CallSeqStart,
6771                                                             Flags, DAG, dl);
6772           ArgOffset += PtrByteSize;
6773         }
6774         continue;
6775       }
6776       // Copy entire object into memory.  There are cases where gcc-generated
6777       // code assumes it is there, even if it could be put entirely into
6778       // registers.  (This is not what the doc says.)
6779       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6780                                                         CallSeqStart,
6781                                                         Flags, DAG, dl);
6782 
6783       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6784       // copy the pieces of the object that fit into registers from the
6785       // parameter save area.
6786       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6787         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6788         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6789         if (GPR_idx != NumGPRs) {
6790           SDValue Load =
6791               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6792           MemOpChains.push_back(Load.getValue(1));
6793           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6794           ArgOffset += PtrByteSize;
6795         } else {
6796           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6797           break;
6798         }
6799       }
6800       continue;
6801     }
6802 
6803     switch (Arg.getSimpleValueType().SimpleTy) {
6804     default: llvm_unreachable("Unexpected ValueType for argument!");
6805     case MVT::i1:
6806     case MVT::i32:
6807     case MVT::i64:
6808       if (GPR_idx != NumGPRs) {
6809         if (Arg.getValueType() == MVT::i1)
6810           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6811 
6812         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6813       } else {
6814         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6815                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6816                          TailCallArguments, dl);
6817       }
6818       ArgOffset += PtrByteSize;
6819       break;
6820     case MVT::f32:
6821     case MVT::f64:
6822       if (FPR_idx != NumFPRs) {
6823         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6824 
6825         if (CFlags.IsVarArg) {
6826           SDValue Store =
6827               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6828           MemOpChains.push_back(Store);
6829 
6830           // Float varargs are always shadowed in available integer registers
6831           if (GPR_idx != NumGPRs) {
6832             SDValue Load =
6833                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6834             MemOpChains.push_back(Load.getValue(1));
6835             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6836           }
6837           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6838             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6839             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6840             SDValue Load =
6841                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6842             MemOpChains.push_back(Load.getValue(1));
6843             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6844           }
6845         } else {
6846           // If we have any FPRs remaining, we may also have GPRs remaining.
6847           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6848           // GPRs.
6849           if (GPR_idx != NumGPRs)
6850             ++GPR_idx;
6851           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6852               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6853             ++GPR_idx;
6854         }
6855       } else
6856         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6857                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6858                          TailCallArguments, dl);
6859       if (isPPC64)
6860         ArgOffset += 8;
6861       else
6862         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6863       break;
6864     case MVT::v4f32:
6865     case MVT::v4i32:
6866     case MVT::v8i16:
6867     case MVT::v16i8:
6868       if (CFlags.IsVarArg) {
6869         // These go aligned on the stack, or in the corresponding R registers
6870         // when within range.  The Darwin PPC ABI doc claims they also go in
6871         // V registers; in fact gcc does this only for arguments that are
6872         // prototyped, not for those that match the ...  We do it for all
6873         // arguments, seems to work.
6874         while (ArgOffset % 16 !=0) {
6875           ArgOffset += PtrByteSize;
6876           if (GPR_idx != NumGPRs)
6877             GPR_idx++;
6878         }
6879         // We could elide this store in the case where the object fits
6880         // entirely in R registers.  Maybe later.
6881         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6882                              DAG.getConstant(ArgOffset, dl, PtrVT));
6883         SDValue Store =
6884             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6885         MemOpChains.push_back(Store);
6886         if (VR_idx != NumVRs) {
6887           SDValue Load =
6888               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6889           MemOpChains.push_back(Load.getValue(1));
6890           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6891         }
6892         ArgOffset += 16;
6893         for (unsigned i=0; i<16; i+=PtrByteSize) {
6894           if (GPR_idx == NumGPRs)
6895             break;
6896           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6897                                    DAG.getConstant(i, dl, PtrVT));
6898           SDValue Load =
6899               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6900           MemOpChains.push_back(Load.getValue(1));
6901           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6902         }
6903         break;
6904       }
6905 
6906       // Non-varargs Altivec params generally go in registers, but have
6907       // stack space allocated at the end.
6908       if (VR_idx != NumVRs) {
6909         // Doesn't have GPR space allocated.
6910         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6911       } else if (nAltivecParamsAtEnd==0) {
6912         // We are emitting Altivec params in order.
6913         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6914                          isPPC64, CFlags.IsTailCall, true, MemOpChains,
6915                          TailCallArguments, dl);
6916         ArgOffset += 16;
6917       }
6918       break;
6919     }
6920   }
6921   // If all Altivec parameters fit in registers, as they usually do,
6922   // they get stack space following the non-Altivec parameters.  We
6923   // don't track this here because nobody below needs it.
6924   // If there are more Altivec parameters than fit in registers emit
6925   // the stores here.
6926   if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
6927     unsigned j = 0;
6928     // Offset is aligned; skip 1st 12 params which go in V registers.
6929     ArgOffset = ((ArgOffset+15)/16)*16;
6930     ArgOffset += 12*16;
6931     for (unsigned i = 0; i != NumOps; ++i) {
6932       SDValue Arg = OutVals[i];
6933       EVT ArgType = Outs[i].VT;
6934       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6935           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6936         if (++j > NumVRs) {
6937           SDValue PtrOff;
6938           // We are emitting Altivec params in order.
6939           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6940                            isPPC64, CFlags.IsTailCall, true, MemOpChains,
6941                            TailCallArguments, dl);
6942           ArgOffset += 16;
6943         }
6944       }
6945     }
6946   }
6947 
6948   if (!MemOpChains.empty())
6949     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6950 
6951   // On Darwin, R12 must contain the address of an indirect callee.  This does
6952   // not mean the MTCTR instruction must use R12; it's easier to model this as
6953   // an extra parameter, so do that.
6954   if (CFlags.IsIndirect) {
6955     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
6956     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6957                                                    PPC::R12), Callee));
6958   }
6959 
6960   // Build a sequence of copy-to-reg nodes chained together with token chain
6961   // and flag operands which copy the outgoing args into the appropriate regs.
6962   SDValue InFlag;
6963   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6964     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6965                              RegsToPass[i].second, InFlag);
6966     InFlag = Chain.getValue(1);
6967   }
6968 
6969   if (CFlags.IsTailCall)
6970     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6971                     TailCallArguments);
6972 
6973   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6974                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6975 }
6976 
6977 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6978                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6979                    CCState &State) {
6980 
6981   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6982       State.getMachineFunction().getSubtarget());
6983   const bool IsPPC64 = Subtarget.isPPC64();
6984   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6985   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6986 
6987   assert((!ValVT.isInteger() ||
6988           (ValVT.getSizeInBits() <= RegVT.getSizeInBits())) &&
6989          "Integer argument exceeds register size: should have been legalized");
6990 
6991   if (ValVT == MVT::f128)
6992     report_fatal_error("f128 is unimplemented on AIX.");
6993 
6994   if (ArgFlags.isNest())
6995     report_fatal_error("Nest arguments are unimplemented.");
6996 
6997   if (ValVT.isVector() || LocVT.isVector())
6998     report_fatal_error("Vector arguments are unimplemented on AIX.");
6999 
7000   static const MCPhysReg GPR_32[] = {// 32-bit registers.
7001                                      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7002                                      PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7003   static const MCPhysReg GPR_64[] = {// 64-bit registers.
7004                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7005                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7006 
7007   if (ArgFlags.isByVal()) {
7008     if (ArgFlags.getNonZeroByValAlign() > PtrByteSize)
7009       report_fatal_error("Pass-by-value arguments with alignment greater than "
7010                          "register width are not supported.");
7011 
7012     const unsigned ByValSize = ArgFlags.getByValSize();
7013 
7014     // An empty aggregate parameter takes up no storage and no registers,
7015     // but needs a MemLoc for a stack slot for the formal arguments side.
7016     if (ByValSize == 0) {
7017       State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
7018                                        State.getNextStackOffset(), RegVT,
7019                                        LocInfo));
7020       return false;
7021     }
7022 
7023     const unsigned StackSize = alignTo(ByValSize, PtrByteSize);
7024     unsigned Offset = State.AllocateStack(StackSize, PtrByteSize);
7025     for (const unsigned E = Offset + StackSize; Offset < E;
7026          Offset += PtrByteSize) {
7027       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
7028         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7029       else {
7030         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
7031                                          Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
7032                                          LocInfo));
7033         break;
7034       }
7035     }
7036     return false;
7037   }
7038 
7039   // Arguments always reserve parameter save area.
7040   switch (ValVT.SimpleTy) {
7041   default:
7042     report_fatal_error("Unhandled value type for argument.");
7043   case MVT::i64:
7044     // i64 arguments should have been split to i32 for PPC32.
7045     assert(IsPPC64 && "PPC32 should have split i64 values.");
7046     LLVM_FALLTHROUGH;
7047   case MVT::i1:
7048   case MVT::i32: {
7049     const unsigned Offset = State.AllocateStack(PtrByteSize, PtrByteSize);
7050     // AIX integer arguments are always passed in register width.
7051     if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
7052       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
7053                                   : CCValAssign::LocInfo::ZExt;
7054     if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
7055       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7056     else
7057       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
7058 
7059     return false;
7060   }
7061   case MVT::f32:
7062   case MVT::f64: {
7063     // Parameter save area (PSA) is reserved even if the float passes in fpr.
7064     const unsigned StoreSize = LocVT.getStoreSize();
7065     // Floats are always 4-byte aligned in the PSA on AIX.
7066     // This includes f64 in 64-bit mode for ABI compatibility.
7067     const unsigned Offset = State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
7068     unsigned FReg = State.AllocateReg(FPR);
7069     if (FReg)
7070       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7071 
7072     // Reserve and initialize GPRs or initialize the PSA as required.
7073     for (unsigned I = 0; I < StoreSize; I += PtrByteSize) {
7074       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
7075         assert(FReg && "An FPR should be available when a GPR is reserved.");
7076         if (State.isVarArg()) {
7077           // Successfully reserved GPRs are only initialized for vararg calls.
7078           // Custom handling is required for:
7079           //   f64 in PPC32 needs to be split into 2 GPRs.
7080           //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7081           State.addLoc(
7082               CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7083         }
7084       } else {
7085         // If there are insufficient GPRs, the PSA needs to be initialized.
7086         // Initialization occurs even if an FPR was initialized for
7087         // compatibility with the AIX XL compiler. The full memory for the
7088         // argument will be initialized even if a prior word is saved in GPR.
7089         // A custom memLoc is used when the argument also passes in FPR so
7090         // that the callee handling can skip over it easily.
7091         State.addLoc(
7092             FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7093                                              LocInfo)
7094                  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7095         break;
7096       }
7097     }
7098 
7099     return false;
7100   }
7101   }
7102   return true;
7103 }
7104 
7105 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7106                                                     bool IsPPC64) {
7107   assert((IsPPC64 || SVT != MVT::i64) &&
7108          "i64 should have been split for 32-bit codegen.");
7109 
7110   switch (SVT) {
7111   default:
7112     report_fatal_error("Unexpected value type for formal argument");
7113   case MVT::i1:
7114   case MVT::i32:
7115   case MVT::i64:
7116     return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7117   case MVT::f32:
7118     return &PPC::F4RCRegClass;
7119   case MVT::f64:
7120     return &PPC::F8RCRegClass;
7121   }
7122 }
7123 
7124 static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7125                                         SelectionDAG &DAG, SDValue ArgValue,
7126                                         MVT LocVT, const SDLoc &dl) {
7127   assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7128   assert(ValVT.getSizeInBits() < LocVT.getSizeInBits());
7129 
7130   if (Flags.isSExt())
7131     ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7132                            DAG.getValueType(ValVT));
7133   else if (Flags.isZExt())
7134     ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7135                            DAG.getValueType(ValVT));
7136 
7137   return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7138 }
7139 
7140 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7141   const unsigned LASize = FL->getLinkageSize();
7142 
7143   if (PPC::GPRCRegClass.contains(Reg)) {
7144     assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7145            "Reg must be a valid argument register!");
7146     return LASize + 4 * (Reg - PPC::R3);
7147   }
7148 
7149   if (PPC::G8RCRegClass.contains(Reg)) {
7150     assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7151            "Reg must be a valid argument register!");
7152     return LASize + 8 * (Reg - PPC::X3);
7153   }
7154 
7155   llvm_unreachable("Only general purpose registers expected.");
7156 }
7157 
7158 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7159     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7160     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7161     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7162 
7163   assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7164           CallConv == CallingConv::Fast) &&
7165          "Unexpected calling convention!");
7166 
7167   if (getTargetMachine().Options.GuaranteedTailCallOpt)
7168     report_fatal_error("Tail call support is unimplemented on AIX.");
7169 
7170   if (useSoftFloat())
7171     report_fatal_error("Soft float support is unimplemented on AIX.");
7172 
7173   const PPCSubtarget &Subtarget =
7174       static_cast<const PPCSubtarget &>(DAG.getSubtarget());
7175   if (Subtarget.hasQPX())
7176     report_fatal_error("QPX support is not supported on AIX.");
7177 
7178   const bool IsPPC64 = Subtarget.isPPC64();
7179   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7180 
7181   // Assign locations to all of the incoming arguments.
7182   SmallVector<CCValAssign, 16> ArgLocs;
7183   MachineFunction &MF = DAG.getMachineFunction();
7184   MachineFrameInfo &MFI = MF.getFrameInfo();
7185   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7186 
7187   const EVT PtrVT = getPointerTy(MF.getDataLayout());
7188   // Reserve space for the linkage area on the stack.
7189   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7190   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
7191   CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7192 
7193   SmallVector<SDValue, 8> MemOps;
7194 
7195   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7196     CCValAssign &VA = ArgLocs[I++];
7197     MVT LocVT = VA.getLocVT();
7198     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7199 
7200     // For compatibility with the AIX XL compiler, the float args in the
7201     // parameter save area are initialized even if the argument is available
7202     // in register.  The caller is required to initialize both the register
7203     // and memory, however, the callee can choose to expect it in either.
7204     // The memloc is dismissed here because the argument is retrieved from
7205     // the register.
7206     if (VA.isMemLoc() && VA.needsCustom())
7207       continue;
7208 
7209     if (Flags.isByVal() && VA.isMemLoc()) {
7210       const unsigned Size =
7211           alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7212                   PtrByteSize);
7213       const int FI = MF.getFrameInfo().CreateFixedObject(
7214           Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7215           /* IsAliased */ true);
7216       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7217       InVals.push_back(FIN);
7218 
7219       continue;
7220     }
7221 
7222     if (Flags.isByVal()) {
7223       assert(VA.isRegLoc() && "MemLocs should already be handled.");
7224 
7225       const MCPhysReg ArgReg = VA.getLocReg();
7226       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7227 
7228       if (Flags.getNonZeroByValAlign() > PtrByteSize)
7229         report_fatal_error("Over aligned byvals not supported yet.");
7230 
7231       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7232       const int FI = MF.getFrameInfo().CreateFixedObject(
7233           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7234           /* IsAliased */ true);
7235       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7236       InVals.push_back(FIN);
7237 
7238       // Add live ins for all the RegLocs for the same ByVal.
7239       const TargetRegisterClass *RegClass =
7240           IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7241 
7242       auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7243                                                unsigned Offset) {
7244         const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
7245         // Since the callers side has left justified the aggregate in the
7246         // register, we can simply store the entire register into the stack
7247         // slot.
7248         SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7249         // The store to the fixedstack object is needed becuase accessing a
7250         // field of the ByVal will use a gep and load. Ideally we will optimize
7251         // to extracting the value from the register directly, and elide the
7252         // stores when the arguments address is not taken, but that will need to
7253         // be future work.
7254         SDValue Store =
7255             DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom,
7256                          DAG.getObjectPtrOffset(dl, FIN, Offset),
7257                          MachinePointerInfo::getFixedStack(MF, FI, Offset));
7258 
7259         MemOps.push_back(Store);
7260       };
7261 
7262       unsigned Offset = 0;
7263       HandleRegLoc(VA.getLocReg(), Offset);
7264       Offset += PtrByteSize;
7265       for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7266            Offset += PtrByteSize) {
7267         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7268                "RegLocs should be for ByVal argument.");
7269 
7270         const CCValAssign RL = ArgLocs[I++];
7271         HandleRegLoc(RL.getLocReg(), Offset);
7272       }
7273 
7274       if (Offset != StackSize) {
7275         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7276                "Expected MemLoc for remaining bytes.");
7277         assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7278         // Consume the MemLoc.The InVal has already been emitted, so nothing
7279         // more needs to be done.
7280         ++I;
7281       }
7282 
7283       continue;
7284     }
7285 
7286     EVT ValVT = VA.getValVT();
7287     if (VA.isRegLoc() && !VA.needsCustom()) {
7288       MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
7289       unsigned VReg =
7290           MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
7291       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7292       if (ValVT.isScalarInteger() &&
7293           (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
7294         ArgValue =
7295             truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7296       }
7297       InVals.push_back(ArgValue);
7298       continue;
7299     }
7300     if (VA.isMemLoc()) {
7301       const unsigned LocSize = LocVT.getStoreSize();
7302       const unsigned ValSize = ValVT.getStoreSize();
7303       assert((ValSize <= LocSize) &&
7304              "Object size is larger than size of MemLoc");
7305       int CurArgOffset = VA.getLocMemOffset();
7306       // Objects are right-justified because AIX is big-endian.
7307       if (LocSize > ValSize)
7308         CurArgOffset += LocSize - ValSize;
7309       // Potential tail calls could cause overwriting of argument stack slots.
7310       const bool IsImmutable =
7311           !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7312             (CallConv == CallingConv::Fast));
7313       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7314       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7315       SDValue ArgValue =
7316           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7317       InVals.push_back(ArgValue);
7318       continue;
7319     }
7320   }
7321 
7322   // On AIX a minimum of 8 words is saved to the parameter save area.
7323   const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7324   // Area that is at least reserved in the caller of this function.
7325   unsigned CallerReservedArea =
7326       std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7327 
7328   // Set the size that is at least reserved in caller of this function. Tail
7329   // call optimized function's reserved stack space needs to be aligned so
7330   // that taking the difference between two stack areas will result in an
7331   // aligned stack.
7332   CallerReservedArea =
7333       EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7334   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7335   FuncInfo->setMinReservedArea(CallerReservedArea);
7336 
7337   if (isVarArg) {
7338     FuncInfo->setVarArgsFrameIndex(
7339         MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7340     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7341 
7342     static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7343                                        PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7344 
7345     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7346                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7347     const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7348 
7349     // The fixed integer arguments of a variadic function are stored to the
7350     // VarArgsFrameIndex on the stack so that they may be loaded by
7351     // dereferencing the result of va_next.
7352     for (unsigned GPRIndex =
7353              (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7354          GPRIndex < NumGPArgRegs; ++GPRIndex) {
7355 
7356       const unsigned VReg =
7357           IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7358                   : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7359 
7360       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7361       SDValue Store =
7362           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7363       MemOps.push_back(Store);
7364       // Increment the address for the next argument to store.
7365       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7366       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7367     }
7368   }
7369 
7370   if (!MemOps.empty())
7371     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7372 
7373   return Chain;
7374 }
7375 
7376 SDValue PPCTargetLowering::LowerCall_AIX(
7377     SDValue Chain, SDValue Callee, CallFlags CFlags,
7378     const SmallVectorImpl<ISD::OutputArg> &Outs,
7379     const SmallVectorImpl<SDValue> &OutVals,
7380     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7381     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7382     const CallBase *CB) const {
7383 
7384   assert((CFlags.CallConv == CallingConv::C ||
7385           CFlags.CallConv == CallingConv::Cold ||
7386           CFlags.CallConv == CallingConv::Fast) &&
7387          "Unexpected calling convention!");
7388 
7389   if (CFlags.IsPatchPoint)
7390     report_fatal_error("This call type is unimplemented on AIX.");
7391 
7392   const PPCSubtarget& Subtarget =
7393       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7394   if (Subtarget.hasQPX())
7395     report_fatal_error("QPX is not supported on AIX.");
7396   if (Subtarget.hasAltivec())
7397     report_fatal_error("Altivec support is unimplemented on AIX.");
7398 
7399   MachineFunction &MF = DAG.getMachineFunction();
7400   SmallVector<CCValAssign, 16> ArgLocs;
7401   CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7402                  *DAG.getContext());
7403 
7404   // Reserve space for the linkage save area (LSA) on the stack.
7405   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7406   //   [SP][CR][LR][2 x reserved][TOC].
7407   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7408   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7409   const bool IsPPC64 = Subtarget.isPPC64();
7410   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7411   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7412   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
7413   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7414 
7415   // The prolog code of the callee may store up to 8 GPR argument registers to
7416   // the stack, allowing va_start to index over them in memory if the callee
7417   // is variadic.
7418   // Because we cannot tell if this is needed on the caller side, we have to
7419   // conservatively assume that it is needed.  As such, make sure we have at
7420   // least enough stack space for the caller to store the 8 GPRs.
7421   const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7422   const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7423                                      CCInfo.getNextStackOffset());
7424 
7425   // Adjust the stack pointer for the new arguments...
7426   // These operations are automatically eliminated by the prolog/epilog pass.
7427   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7428   SDValue CallSeqStart = Chain;
7429 
7430   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7431   SmallVector<SDValue, 8> MemOpChains;
7432 
7433   // Set up a copy of the stack pointer for loading and storing any
7434   // arguments that may not fit in the registers available for argument
7435   // passing.
7436   const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7437                                    : DAG.getRegister(PPC::R1, MVT::i32);
7438 
7439   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7440     const unsigned ValNo = ArgLocs[I].getValNo();
7441     SDValue Arg = OutVals[ValNo];
7442     ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7443 
7444     if (Flags.isByVal()) {
7445       const unsigned ByValSize = Flags.getByValSize();
7446 
7447       // Nothing to do for zero-sized ByVals on the caller side.
7448       if (!ByValSize) {
7449         ++I;
7450         continue;
7451       }
7452 
7453       auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7454         return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7455                               (LoadOffset != 0)
7456                                   ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
7457                                   : Arg,
7458                               MachinePointerInfo(), VT);
7459       };
7460 
7461       unsigned LoadOffset = 0;
7462 
7463       // Initialize registers, which are fully occupied by the by-val argument.
7464       while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7465         SDValue Load = GetLoad(PtrVT, LoadOffset);
7466         MemOpChains.push_back(Load.getValue(1));
7467         LoadOffset += PtrByteSize;
7468         const CCValAssign &ByValVA = ArgLocs[I++];
7469         assert(ByValVA.getValNo() == ValNo &&
7470                "Unexpected location for pass-by-value argument.");
7471         RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7472       }
7473 
7474       if (LoadOffset == ByValSize)
7475         continue;
7476 
7477       // There must be one more loc to handle the remainder.
7478       assert(ArgLocs[I].getValNo() == ValNo &&
7479              "Expected additional location for by-value argument.");
7480 
7481       if (ArgLocs[I].isMemLoc()) {
7482         assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7483         const CCValAssign &ByValVA = ArgLocs[I++];
7484         ISD::ArgFlagsTy MemcpyFlags = Flags;
7485         // Only memcpy the bytes that don't pass in register.
7486         MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7487         Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7488             (LoadOffset != 0) ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
7489                               : Arg,
7490             DAG.getObjectPtrOffset(dl, StackPtr, ByValVA.getLocMemOffset()),
7491             CallSeqStart, MemcpyFlags, DAG, dl);
7492         continue;
7493       }
7494 
7495       // Initialize the final register residue.
7496       // Any residue that occupies the final by-val arg register must be
7497       // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7498       // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7499       // 2 and 1 byte loads.
7500       const unsigned ResidueBytes = ByValSize % PtrByteSize;
7501       assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7502              "Unexpected register residue for by-value argument.");
7503       SDValue ResidueVal;
7504       for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7505         const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7506         const MVT VT =
7507             N == 1 ? MVT::i8
7508                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7509         SDValue Load = GetLoad(VT, LoadOffset);
7510         MemOpChains.push_back(Load.getValue(1));
7511         LoadOffset += N;
7512         Bytes += N;
7513 
7514         // By-val arguments are passed left-justfied in register.
7515         // Every load here needs to be shifted, otherwise a full register load
7516         // should have been used.
7517         assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7518                "Unexpected load emitted during handling of pass-by-value "
7519                "argument.");
7520         unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7521         EVT ShiftAmountTy =
7522             getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7523         SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7524         SDValue ShiftedLoad =
7525             DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7526         ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7527                                               ShiftedLoad)
7528                                 : ShiftedLoad;
7529       }
7530 
7531       const CCValAssign &ByValVA = ArgLocs[I++];
7532       RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7533       continue;
7534     }
7535 
7536     CCValAssign &VA = ArgLocs[I++];
7537     const MVT LocVT = VA.getLocVT();
7538     const MVT ValVT = VA.getValVT();
7539 
7540     switch (VA.getLocInfo()) {
7541     default:
7542       report_fatal_error("Unexpected argument extension type.");
7543     case CCValAssign::Full:
7544       break;
7545     case CCValAssign::ZExt:
7546       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7547       break;
7548     case CCValAssign::SExt:
7549       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7550       break;
7551     }
7552 
7553     if (VA.isRegLoc() && !VA.needsCustom()) {
7554       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7555       continue;
7556     }
7557 
7558     if (VA.isMemLoc()) {
7559       SDValue PtrOff =
7560           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7561       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7562       MemOpChains.push_back(
7563           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7564 
7565       continue;
7566     }
7567 
7568     // Custom handling is used for GPR initializations for vararg float
7569     // arguments.
7570     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7571            ValVT.isFloatingPoint() && LocVT.isInteger() &&
7572            "Unexpected register handling for calling convention.");
7573 
7574     SDValue ArgAsInt =
7575         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7576 
7577     if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7578       // f32 in 32-bit GPR
7579       // f64 in 64-bit GPR
7580       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7581     else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits())
7582       // f32 in 64-bit GPR.
7583       RegsToPass.push_back(std::make_pair(
7584           VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7585     else {
7586       // f64 in two 32-bit GPRs
7587       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7588       assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7589              "Unexpected custom register for argument!");
7590       CCValAssign &GPR1 = VA;
7591       SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7592                                      DAG.getConstant(32, dl, MVT::i8));
7593       RegsToPass.push_back(std::make_pair(
7594           GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7595 
7596       if (I != E) {
7597         // If only 1 GPR was available, there will only be one custom GPR and
7598         // the argument will also pass in memory.
7599         CCValAssign &PeekArg = ArgLocs[I];
7600         if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7601           assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7602           CCValAssign &GPR2 = ArgLocs[I++];
7603           RegsToPass.push_back(std::make_pair(
7604               GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7605         }
7606       }
7607     }
7608   }
7609 
7610   if (!MemOpChains.empty())
7611     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7612 
7613   // For indirect calls, we need to save the TOC base to the stack for
7614   // restoration after the call.
7615   if (CFlags.IsIndirect) {
7616     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7617     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7618     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7619     const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7620     const unsigned TOCSaveOffset =
7621         Subtarget.getFrameLowering()->getTOCSaveOffset();
7622 
7623     setUsesTOCBasePtr(DAG);
7624     SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7625     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7626     SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7627     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7628     Chain = DAG.getStore(
7629         Val.getValue(1), dl, Val, AddPtr,
7630         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7631   }
7632 
7633   // Build a sequence of copy-to-reg nodes chained together with token chain
7634   // and flag operands which copy the outgoing args into the appropriate regs.
7635   SDValue InFlag;
7636   for (auto Reg : RegsToPass) {
7637     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7638     InFlag = Chain.getValue(1);
7639   }
7640 
7641   const int SPDiff = 0;
7642   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7643                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
7644 }
7645 
7646 bool
7647 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7648                                   MachineFunction &MF, bool isVarArg,
7649                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
7650                                   LLVMContext &Context) const {
7651   SmallVector<CCValAssign, 16> RVLocs;
7652   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7653   return CCInfo.CheckReturn(
7654       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7655                 ? RetCC_PPC_Cold
7656                 : RetCC_PPC);
7657 }
7658 
7659 SDValue
7660 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7661                                bool isVarArg,
7662                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7663                                const SmallVectorImpl<SDValue> &OutVals,
7664                                const SDLoc &dl, SelectionDAG &DAG) const {
7665   SmallVector<CCValAssign, 16> RVLocs;
7666   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7667                  *DAG.getContext());
7668   CCInfo.AnalyzeReturn(Outs,
7669                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7670                            ? RetCC_PPC_Cold
7671                            : RetCC_PPC);
7672 
7673   SDValue Flag;
7674   SmallVector<SDValue, 4> RetOps(1, Chain);
7675 
7676   // Copy the result values into the output registers.
7677   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7678     CCValAssign &VA = RVLocs[i];
7679     assert(VA.isRegLoc() && "Can only return in registers!");
7680 
7681     SDValue Arg = OutVals[RealResIdx];
7682 
7683     switch (VA.getLocInfo()) {
7684     default: llvm_unreachable("Unknown loc info!");
7685     case CCValAssign::Full: break;
7686     case CCValAssign::AExt:
7687       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7688       break;
7689     case CCValAssign::ZExt:
7690       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7691       break;
7692     case CCValAssign::SExt:
7693       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7694       break;
7695     }
7696     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7697       bool isLittleEndian = Subtarget.isLittleEndian();
7698       // Legalize ret f64 -> ret 2 x i32.
7699       SDValue SVal =
7700           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7701                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7702       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7703       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7704       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7705                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7706       Flag = Chain.getValue(1);
7707       VA = RVLocs[++i]; // skip ahead to next loc
7708       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7709     } else
7710       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7711     Flag = Chain.getValue(1);
7712     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7713   }
7714 
7715   RetOps[0] = Chain;  // Update chain.
7716 
7717   // Add the flag if we have it.
7718   if (Flag.getNode())
7719     RetOps.push_back(Flag);
7720 
7721   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7722 }
7723 
7724 SDValue
7725 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7726                                                 SelectionDAG &DAG) const {
7727   SDLoc dl(Op);
7728 
7729   // Get the correct type for integers.
7730   EVT IntVT = Op.getValueType();
7731 
7732   // Get the inputs.
7733   SDValue Chain = Op.getOperand(0);
7734   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7735   // Build a DYNAREAOFFSET node.
7736   SDValue Ops[2] = {Chain, FPSIdx};
7737   SDVTList VTs = DAG.getVTList(IntVT);
7738   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7739 }
7740 
7741 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7742                                              SelectionDAG &DAG) const {
7743   // When we pop the dynamic allocation we need to restore the SP link.
7744   SDLoc dl(Op);
7745 
7746   // Get the correct type for pointers.
7747   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7748 
7749   // Construct the stack pointer operand.
7750   bool isPPC64 = Subtarget.isPPC64();
7751   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7752   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7753 
7754   // Get the operands for the STACKRESTORE.
7755   SDValue Chain = Op.getOperand(0);
7756   SDValue SaveSP = Op.getOperand(1);
7757 
7758   // Load the old link SP.
7759   SDValue LoadLinkSP =
7760       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7761 
7762   // Restore the stack pointer.
7763   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7764 
7765   // Store the old link SP.
7766   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7767 }
7768 
7769 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7770   MachineFunction &MF = DAG.getMachineFunction();
7771   bool isPPC64 = Subtarget.isPPC64();
7772   EVT PtrVT = getPointerTy(MF.getDataLayout());
7773 
7774   // Get current frame pointer save index.  The users of this index will be
7775   // primarily DYNALLOC instructions.
7776   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7777   int RASI = FI->getReturnAddrSaveIndex();
7778 
7779   // If the frame pointer save index hasn't been defined yet.
7780   if (!RASI) {
7781     // Find out what the fix offset of the frame pointer save area.
7782     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7783     // Allocate the frame index for frame pointer save area.
7784     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7785     // Save the result.
7786     FI->setReturnAddrSaveIndex(RASI);
7787   }
7788   return DAG.getFrameIndex(RASI, PtrVT);
7789 }
7790 
7791 SDValue
7792 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7793   MachineFunction &MF = DAG.getMachineFunction();
7794   bool isPPC64 = Subtarget.isPPC64();
7795   EVT PtrVT = getPointerTy(MF.getDataLayout());
7796 
7797   // Get current frame pointer save index.  The users of this index will be
7798   // primarily DYNALLOC instructions.
7799   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7800   int FPSI = FI->getFramePointerSaveIndex();
7801 
7802   // If the frame pointer save index hasn't been defined yet.
7803   if (!FPSI) {
7804     // Find out what the fix offset of the frame pointer save area.
7805     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7806     // Allocate the frame index for frame pointer save area.
7807     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7808     // Save the result.
7809     FI->setFramePointerSaveIndex(FPSI);
7810   }
7811   return DAG.getFrameIndex(FPSI, PtrVT);
7812 }
7813 
7814 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7815                                                    SelectionDAG &DAG) const {
7816   // Get the inputs.
7817   SDValue Chain = Op.getOperand(0);
7818   SDValue Size  = Op.getOperand(1);
7819   SDLoc dl(Op);
7820 
7821   // Get the correct type for pointers.
7822   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7823   // Negate the size.
7824   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7825                                 DAG.getConstant(0, dl, PtrVT), Size);
7826   // Construct a node for the frame pointer save index.
7827   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7828   // Build a DYNALLOC node.
7829   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7830   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7831   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7832 }
7833 
7834 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7835                                                      SelectionDAG &DAG) const {
7836   MachineFunction &MF = DAG.getMachineFunction();
7837 
7838   bool isPPC64 = Subtarget.isPPC64();
7839   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7840 
7841   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7842   return DAG.getFrameIndex(FI, PtrVT);
7843 }
7844 
7845 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7846                                                SelectionDAG &DAG) const {
7847   SDLoc DL(Op);
7848   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7849                      DAG.getVTList(MVT::i32, MVT::Other),
7850                      Op.getOperand(0), Op.getOperand(1));
7851 }
7852 
7853 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7854                                                 SelectionDAG &DAG) const {
7855   SDLoc DL(Op);
7856   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7857                      Op.getOperand(0), Op.getOperand(1));
7858 }
7859 
7860 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7861   if (Op.getValueType().isVector())
7862     return LowerVectorLoad(Op, DAG);
7863 
7864   assert(Op.getValueType() == MVT::i1 &&
7865          "Custom lowering only for i1 loads");
7866 
7867   // First, load 8 bits into 32 bits, then truncate to 1 bit.
7868 
7869   SDLoc dl(Op);
7870   LoadSDNode *LD = cast<LoadSDNode>(Op);
7871 
7872   SDValue Chain = LD->getChain();
7873   SDValue BasePtr = LD->getBasePtr();
7874   MachineMemOperand *MMO = LD->getMemOperand();
7875 
7876   SDValue NewLD =
7877       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7878                      BasePtr, MVT::i8, MMO);
7879   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7880 
7881   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7882   return DAG.getMergeValues(Ops, dl);
7883 }
7884 
7885 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7886   if (Op.getOperand(1).getValueType().isVector())
7887     return LowerVectorStore(Op, DAG);
7888 
7889   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7890          "Custom lowering only for i1 stores");
7891 
7892   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7893 
7894   SDLoc dl(Op);
7895   StoreSDNode *ST = cast<StoreSDNode>(Op);
7896 
7897   SDValue Chain = ST->getChain();
7898   SDValue BasePtr = ST->getBasePtr();
7899   SDValue Value = ST->getValue();
7900   MachineMemOperand *MMO = ST->getMemOperand();
7901 
7902   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7903                       Value);
7904   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7905 }
7906 
7907 // FIXME: Remove this once the ANDI glue bug is fixed:
7908 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7909   assert(Op.getValueType() == MVT::i1 &&
7910          "Custom lowering only for i1 results");
7911 
7912   SDLoc DL(Op);
7913   return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7914 }
7915 
7916 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7917                                                SelectionDAG &DAG) const {
7918 
7919   // Implements a vector truncate that fits in a vector register as a shuffle.
7920   // We want to legalize vector truncates down to where the source fits in
7921   // a vector register (and target is therefore smaller than vector register
7922   // size).  At that point legalization will try to custom lower the sub-legal
7923   // result and get here - where we can contain the truncate as a single target
7924   // operation.
7925 
7926   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7927   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7928   //
7929   // We will implement it for big-endian ordering as this (where x denotes
7930   // undefined):
7931   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7932   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7933   //
7934   // The same operation in little-endian ordering will be:
7935   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7936   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7937 
7938   assert(Op.getValueType().isVector() && "Vector type expected.");
7939 
7940   SDLoc DL(Op);
7941   SDValue N1 = Op.getOperand(0);
7942   unsigned SrcSize = N1.getValueType().getSizeInBits();
7943   assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
7944   SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7945 
7946   EVT TrgVT = Op.getValueType();
7947   unsigned TrgNumElts = TrgVT.getVectorNumElements();
7948   EVT EltVT = TrgVT.getVectorElementType();
7949   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7950   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7951 
7952   // First list the elements we want to keep.
7953   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7954   SmallVector<int, 16> ShuffV;
7955   if (Subtarget.isLittleEndian())
7956     for (unsigned i = 0; i < TrgNumElts; ++i)
7957       ShuffV.push_back(i * SizeMult);
7958   else
7959     for (unsigned i = 1; i <= TrgNumElts; ++i)
7960       ShuffV.push_back(i * SizeMult - 1);
7961 
7962   // Populate the remaining elements with undefs.
7963   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7964     // ShuffV.push_back(i + WideNumElts);
7965     ShuffV.push_back(WideNumElts + 1);
7966 
7967   SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
7968   return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
7969 }
7970 
7971 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7972 /// possible.
7973 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7974   // Not FP? Not a fsel.
7975   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7976       !Op.getOperand(2).getValueType().isFloatingPoint())
7977     return Op;
7978 
7979   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7980 
7981   EVT ResVT = Op.getValueType();
7982   EVT CmpVT = Op.getOperand(0).getValueType();
7983   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7984   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
7985   SDLoc dl(Op);
7986   SDNodeFlags Flags = Op.getNode()->getFlags();
7987 
7988   // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7989   // presence of infinities.
7990   if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7991     switch (CC) {
7992     default:
7993       break;
7994     case ISD::SETOGT:
7995     case ISD::SETGT:
7996       return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
7997     case ISD::SETOLT:
7998     case ISD::SETLT:
7999       return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
8000     }
8001   }
8002 
8003   // We might be able to do better than this under some circumstances, but in
8004   // general, fsel-based lowering of select is a finite-math-only optimization.
8005   // For more information, see section F.3 of the 2.06 ISA specification.
8006   // With ISA 3.0
8007   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8008       (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
8009     return Op;
8010 
8011   // If the RHS of the comparison is a 0.0, we don't need to do the
8012   // subtraction at all.
8013   SDValue Sel1;
8014   if (isFloatingPointZero(RHS))
8015     switch (CC) {
8016     default: break;       // SETUO etc aren't handled by fsel.
8017     case ISD::SETNE:
8018       std::swap(TV, FV);
8019       LLVM_FALLTHROUGH;
8020     case ISD::SETEQ:
8021       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8022         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8023       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8024       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8025         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8026       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8027                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8028     case ISD::SETULT:
8029     case ISD::SETLT:
8030       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8031       LLVM_FALLTHROUGH;
8032     case ISD::SETOGE:
8033     case ISD::SETGE:
8034       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8035         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8036       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8037     case ISD::SETUGT:
8038     case ISD::SETGT:
8039       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8040       LLVM_FALLTHROUGH;
8041     case ISD::SETOLE:
8042     case ISD::SETLE:
8043       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8044         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8045       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8046                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8047     }
8048 
8049   SDValue Cmp;
8050   switch (CC) {
8051   default: break;       // SETUO etc aren't handled by fsel.
8052   case ISD::SETNE:
8053     std::swap(TV, FV);
8054     LLVM_FALLTHROUGH;
8055   case ISD::SETEQ:
8056     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8057     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8058       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8059     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8060     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8061       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8062     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8063                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8064   case ISD::SETULT:
8065   case ISD::SETLT:
8066     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8067     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8068       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8069     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8070   case ISD::SETOGE:
8071   case ISD::SETGE:
8072     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8073     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8074       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8075     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8076   case ISD::SETUGT:
8077   case ISD::SETGT:
8078     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8079     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8080       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8081     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8082   case ISD::SETOLE:
8083   case ISD::SETLE:
8084     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8085     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8086       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8087     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8088   }
8089   return Op;
8090 }
8091 
8092 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8093                                                SelectionDAG &DAG,
8094                                                const SDLoc &dl) const {
8095   assert(Op.getOperand(0).getValueType().isFloatingPoint());
8096   SDValue Src = Op.getOperand(0);
8097   if (Src.getValueType() == MVT::f32)
8098     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8099 
8100   SDValue Tmp;
8101   switch (Op.getSimpleValueType().SimpleTy) {
8102   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8103   case MVT::i32:
8104     Tmp = DAG.getNode(
8105         Op.getOpcode() == ISD::FP_TO_SINT
8106             ? PPCISD::FCTIWZ
8107             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
8108         dl, MVT::f64, Src);
8109     break;
8110   case MVT::i64:
8111     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
8112            "i64 FP_TO_UINT is supported only with FPCVT");
8113     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
8114                                                         PPCISD::FCTIDUZ,
8115                       dl, MVT::f64, Src);
8116     break;
8117   }
8118 
8119   // Convert the FP value to an int value through memory.
8120   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8121     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
8122   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8123   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8124   MachinePointerInfo MPI =
8125       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8126 
8127   // Emit a store to the stack slot.
8128   SDValue Chain;
8129   Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8130   if (i32Stack) {
8131     MachineFunction &MF = DAG.getMachineFunction();
8132     Alignment = Align(4);
8133     MachineMemOperand *MMO =
8134         MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8135     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
8136     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8137               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8138   } else
8139     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI, Alignment);
8140 
8141   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
8142   // add in a bias on big endian.
8143   if (Op.getValueType() == MVT::i32 && !i32Stack) {
8144     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8145                         DAG.getConstant(4, dl, FIPtr.getValueType()));
8146     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8147   }
8148 
8149   RLI.Chain = Chain;
8150   RLI.Ptr = FIPtr;
8151   RLI.MPI = MPI;
8152   RLI.Alignment = Alignment;
8153 }
8154 
8155 /// Custom lowers floating point to integer conversions to use
8156 /// the direct move instructions available in ISA 2.07 to avoid the
8157 /// need for load/store combinations.
8158 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8159                                                     SelectionDAG &DAG,
8160                                                     const SDLoc &dl) const {
8161   assert(Op.getOperand(0).getValueType().isFloatingPoint());
8162   SDValue Src = Op.getOperand(0);
8163 
8164   if (Src.getValueType() == MVT::f32)
8165     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8166 
8167   SDValue Tmp;
8168   switch (Op.getSimpleValueType().SimpleTy) {
8169   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8170   case MVT::i32:
8171     Tmp = DAG.getNode(
8172         Op.getOpcode() == ISD::FP_TO_SINT
8173             ? PPCISD::FCTIWZ
8174             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
8175         dl, MVT::f64, Src);
8176     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
8177     break;
8178   case MVT::i64:
8179     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
8180            "i64 FP_TO_UINT is supported only with FPCVT");
8181     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
8182                                                         PPCISD::FCTIDUZ,
8183                       dl, MVT::f64, Src);
8184     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
8185     break;
8186   }
8187   return Tmp;
8188 }
8189 
8190 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8191                                           const SDLoc &dl) const {
8192 
8193   // FP to INT conversions are legal for f128.
8194   if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128))
8195     return Op;
8196 
8197   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8198   // PPC (the libcall is not available).
8199   if (Op.getOperand(0).getValueType() == MVT::ppcf128) {
8200     if (Op.getValueType() == MVT::i32) {
8201       if (Op.getOpcode() == ISD::FP_TO_SINT) {
8202         SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
8203                                  MVT::f64, Op.getOperand(0),
8204                                  DAG.getIntPtrConstant(0, dl));
8205         SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
8206                                  MVT::f64, Op.getOperand(0),
8207                                  DAG.getIntPtrConstant(1, dl));
8208 
8209         // Add the two halves of the long double in round-to-zero mode.
8210         SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8211 
8212         // Now use a smaller FP_TO_SINT.
8213         return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8214       }
8215       if (Op.getOpcode() == ISD::FP_TO_UINT) {
8216         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8217         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8218         SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
8219         //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8220         // FIXME: generated code sucks.
8221         // TODO: Are there fast-math-flags to propagate to this FSUB?
8222         SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128,
8223                                    Op.getOperand(0), Tmp);
8224         True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8225         True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
8226                            DAG.getConstant(0x80000000, dl, MVT::i32));
8227         SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
8228                                     Op.getOperand(0));
8229         return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False,
8230                                ISD::SETGE);
8231       }
8232     }
8233 
8234     return SDValue();
8235   }
8236 
8237   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8238     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8239 
8240   ReuseLoadInfo RLI;
8241   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8242 
8243   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8244                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8245 }
8246 
8247 // We're trying to insert a regular store, S, and then a load, L. If the
8248 // incoming value, O, is a load, we might just be able to have our load use the
8249 // address used by O. However, we don't know if anything else will store to
8250 // that address before we can load from it. To prevent this situation, we need
8251 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8252 // the same chain operand as O, we create a token factor from the chain results
8253 // of O and L, and we replace all uses of O's chain result with that token
8254 // factor (see spliceIntoChain below for this last part).
8255 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8256                                             ReuseLoadInfo &RLI,
8257                                             SelectionDAG &DAG,
8258                                             ISD::LoadExtType ET) const {
8259   SDLoc dl(Op);
8260   bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8261                        (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8262   if (ET == ISD::NON_EXTLOAD &&
8263       (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8264       isOperationLegalOrCustom(Op.getOpcode(),
8265                                Op.getOperand(0).getValueType())) {
8266 
8267     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8268     return true;
8269   }
8270 
8271   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8272   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8273       LD->isNonTemporal())
8274     return false;
8275   if (LD->getMemoryVT() != MemVT)
8276     return false;
8277 
8278   RLI.Ptr = LD->getBasePtr();
8279   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8280     assert(LD->getAddressingMode() == ISD::PRE_INC &&
8281            "Non-pre-inc AM on PPC?");
8282     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8283                           LD->getOffset());
8284   }
8285 
8286   RLI.Chain = LD->getChain();
8287   RLI.MPI = LD->getPointerInfo();
8288   RLI.IsDereferenceable = LD->isDereferenceable();
8289   RLI.IsInvariant = LD->isInvariant();
8290   RLI.Alignment = LD->getAlign();
8291   RLI.AAInfo = LD->getAAInfo();
8292   RLI.Ranges = LD->getRanges();
8293 
8294   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8295   return true;
8296 }
8297 
8298 // Given the head of the old chain, ResChain, insert a token factor containing
8299 // it and NewResChain, and make users of ResChain now be users of that token
8300 // factor.
8301 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8302 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8303                                         SDValue NewResChain,
8304                                         SelectionDAG &DAG) const {
8305   if (!ResChain)
8306     return;
8307 
8308   SDLoc dl(NewResChain);
8309 
8310   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8311                            NewResChain, DAG.getUNDEF(MVT::Other));
8312   assert(TF.getNode() != NewResChain.getNode() &&
8313          "A new TF really is required here");
8314 
8315   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8316   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8317 }
8318 
8319 /// Analyze profitability of direct move
8320 /// prefer float load to int load plus direct move
8321 /// when there is no integer use of int load
8322 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8323   SDNode *Origin = Op.getOperand(0).getNode();
8324   if (Origin->getOpcode() != ISD::LOAD)
8325     return true;
8326 
8327   // If there is no LXSIBZX/LXSIHZX, like Power8,
8328   // prefer direct move if the memory size is 1 or 2 bytes.
8329   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8330   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8331     return true;
8332 
8333   for (SDNode::use_iterator UI = Origin->use_begin(),
8334                             UE = Origin->use_end();
8335        UI != UE; ++UI) {
8336 
8337     // Only look at the users of the loaded value.
8338     if (UI.getUse().get().getResNo() != 0)
8339       continue;
8340 
8341     if (UI->getOpcode() != ISD::SINT_TO_FP &&
8342         UI->getOpcode() != ISD::UINT_TO_FP)
8343       return true;
8344   }
8345 
8346   return false;
8347 }
8348 
8349 /// Custom lowers integer to floating point conversions to use
8350 /// the direct move instructions available in ISA 2.07 to avoid the
8351 /// need for load/store combinations.
8352 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8353                                                     SelectionDAG &DAG,
8354                                                     const SDLoc &dl) const {
8355   assert((Op.getValueType() == MVT::f32 ||
8356           Op.getValueType() == MVT::f64) &&
8357          "Invalid floating point type as target of conversion");
8358   assert(Subtarget.hasFPCVT() &&
8359          "Int to FP conversions with direct moves require FPCVT");
8360   SDValue FP;
8361   SDValue Src = Op.getOperand(0);
8362   bool SinglePrec = Op.getValueType() == MVT::f32;
8363   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8364   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
8365   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
8366                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
8367 
8368   if (WordInt) {
8369     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
8370                      dl, MVT::f64, Src);
8371     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
8372   }
8373   else {
8374     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
8375     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
8376   }
8377 
8378   return FP;
8379 }
8380 
8381 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8382 
8383   EVT VecVT = Vec.getValueType();
8384   assert(VecVT.isVector() && "Expected a vector type.");
8385   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8386 
8387   EVT EltVT = VecVT.getVectorElementType();
8388   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8389   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8390 
8391   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8392   SmallVector<SDValue, 16> Ops(NumConcat);
8393   Ops[0] = Vec;
8394   SDValue UndefVec = DAG.getUNDEF(VecVT);
8395   for (unsigned i = 1; i < NumConcat; ++i)
8396     Ops[i] = UndefVec;
8397 
8398   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8399 }
8400 
8401 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8402                                                 const SDLoc &dl) const {
8403 
8404   unsigned Opc = Op.getOpcode();
8405   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
8406          "Unexpected conversion type");
8407   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8408          "Supports conversions to v2f64/v4f32 only.");
8409 
8410   bool SignedConv = Opc == ISD::SINT_TO_FP;
8411   bool FourEltRes = Op.getValueType() == MVT::v4f32;
8412 
8413   SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
8414   EVT WideVT = Wide.getValueType();
8415   unsigned WideNumElts = WideVT.getVectorNumElements();
8416   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8417 
8418   SmallVector<int, 16> ShuffV;
8419   for (unsigned i = 0; i < WideNumElts; ++i)
8420     ShuffV.push_back(i + WideNumElts);
8421 
8422   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8423   int SaveElts = FourEltRes ? 4 : 2;
8424   if (Subtarget.isLittleEndian())
8425     for (int i = 0; i < SaveElts; i++)
8426       ShuffV[i * Stride] = i;
8427   else
8428     for (int i = 1; i <= SaveElts; i++)
8429       ShuffV[i * Stride - 1] = i - 1;
8430 
8431   SDValue ShuffleSrc2 =
8432       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8433   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8434 
8435   SDValue Extend;
8436   if (SignedConv) {
8437     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8438     EVT ExtVT = Op.getOperand(0).getValueType();
8439     if (Subtarget.hasP9Altivec())
8440       ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8441                                IntermediateVT.getVectorNumElements());
8442 
8443     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8444                          DAG.getValueType(ExtVT));
8445   } else
8446     Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8447 
8448   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8449 }
8450 
8451 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8452                                           SelectionDAG &DAG) const {
8453   SDLoc dl(Op);
8454 
8455   EVT InVT = Op.getOperand(0).getValueType();
8456   EVT OutVT = Op.getValueType();
8457   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8458       isOperationCustom(Op.getOpcode(), InVT))
8459     return LowerINT_TO_FPVector(Op, DAG, dl);
8460 
8461   // Conversions to f128 are legal.
8462   if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
8463     return Op;
8464 
8465   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
8466     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
8467       return SDValue();
8468 
8469     SDValue Value = Op.getOperand(0);
8470     // The values are now known to be -1 (false) or 1 (true). To convert this
8471     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8472     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8473     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8474 
8475     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8476 
8477     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8478 
8479     if (Op.getValueType() != MVT::v4f64)
8480       Value = DAG.getNode(ISD::FP_ROUND, dl,
8481                           Op.getValueType(), Value,
8482                           DAG.getIntPtrConstant(1, dl));
8483     return Value;
8484   }
8485 
8486   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8487   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8488     return SDValue();
8489 
8490   if (Op.getOperand(0).getValueType() == MVT::i1)
8491     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
8492                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
8493                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
8494 
8495   // If we have direct moves, we can do all the conversion, skip the store/load
8496   // however, without FPCVT we can't do most conversions.
8497   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8498       Subtarget.isPPC64() && Subtarget.hasFPCVT())
8499     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8500 
8501   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
8502          "UINT_TO_FP is supported only with FPCVT");
8503 
8504   // If we have FCFIDS, then use it when converting to single-precision.
8505   // Otherwise, convert to double-precision and then round.
8506   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
8507                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
8508                                                             : PPCISD::FCFIDS)
8509                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
8510                                                             : PPCISD::FCFID);
8511   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
8512                   ? MVT::f32
8513                   : MVT::f64;
8514 
8515   if (Op.getOperand(0).getValueType() == MVT::i64) {
8516     SDValue SINT = Op.getOperand(0);
8517     // When converting to single-precision, we actually need to convert
8518     // to double-precision first and then round to single-precision.
8519     // To avoid double-rounding effects during that operation, we have
8520     // to prepare the input operand.  Bits that might be truncated when
8521     // converting to double-precision are replaced by a bit that won't
8522     // be lost at this stage, but is below the single-precision rounding
8523     // position.
8524     //
8525     // However, if -enable-unsafe-fp-math is in effect, accept double
8526     // rounding to avoid the extra overhead.
8527     if (Op.getValueType() == MVT::f32 &&
8528         !Subtarget.hasFPCVT() &&
8529         !DAG.getTarget().Options.UnsafeFPMath) {
8530 
8531       // Twiddle input to make sure the low 11 bits are zero.  (If this
8532       // is the case, we are guaranteed the value will fit into the 53 bit
8533       // mantissa of an IEEE double-precision value without rounding.)
8534       // If any of those low 11 bits were not zero originally, make sure
8535       // bit 12 (value 2048) is set instead, so that the final rounding
8536       // to single-precision gets the correct result.
8537       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8538                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
8539       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8540                           Round, DAG.getConstant(2047, dl, MVT::i64));
8541       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8542       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8543                           Round, DAG.getConstant(-2048, dl, MVT::i64));
8544 
8545       // However, we cannot use that value unconditionally: if the magnitude
8546       // of the input value is small, the bit-twiddling we did above might
8547       // end up visibly changing the output.  Fortunately, in that case, we
8548       // don't need to twiddle bits since the original input will convert
8549       // exactly to double-precision floating-point already.  Therefore,
8550       // construct a conditional to use the original value if the top 11
8551       // bits are all sign-bit copies, and use the rounded value computed
8552       // above otherwise.
8553       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8554                                  SINT, DAG.getConstant(53, dl, MVT::i32));
8555       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8556                          Cond, DAG.getConstant(1, dl, MVT::i64));
8557       Cond = DAG.getSetCC(
8558           dl,
8559           getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8560           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8561 
8562       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8563     }
8564 
8565     ReuseLoadInfo RLI;
8566     SDValue Bits;
8567 
8568     MachineFunction &MF = DAG.getMachineFunction();
8569     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8570       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8571                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8572       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8573     } else if (Subtarget.hasLFIWAX() &&
8574                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8575       MachineMemOperand *MMO =
8576         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8577                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8578       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8579       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8580                                      DAG.getVTList(MVT::f64, MVT::Other),
8581                                      Ops, MVT::i32, MMO);
8582       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8583     } else if (Subtarget.hasFPCVT() &&
8584                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8585       MachineMemOperand *MMO =
8586         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8587                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8588       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8589       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8590                                      DAG.getVTList(MVT::f64, MVT::Other),
8591                                      Ops, MVT::i32, MMO);
8592       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8593     } else if (((Subtarget.hasLFIWAX() &&
8594                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8595                 (Subtarget.hasFPCVT() &&
8596                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8597                SINT.getOperand(0).getValueType() == MVT::i32) {
8598       MachineFrameInfo &MFI = MF.getFrameInfo();
8599       EVT PtrVT = getPointerTy(DAG.getDataLayout());
8600 
8601       int FrameIdx = MFI.CreateStackObject(4, 4, false);
8602       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8603 
8604       SDValue Store =
8605           DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
8606                        MachinePointerInfo::getFixedStack(
8607                            DAG.getMachineFunction(), FrameIdx));
8608 
8609       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8610              "Expected an i32 store");
8611 
8612       RLI.Ptr = FIdx;
8613       RLI.Chain = Store;
8614       RLI.MPI =
8615           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8616       RLI.Alignment = Align(4);
8617 
8618       MachineMemOperand *MMO =
8619         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8620                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8621       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8622       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8623                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
8624                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
8625                                      Ops, MVT::i32, MMO);
8626     } else
8627       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8628 
8629     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
8630 
8631     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
8632       FP = DAG.getNode(ISD::FP_ROUND, dl,
8633                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
8634     return FP;
8635   }
8636 
8637   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
8638          "Unhandled INT_TO_FP type in custom expander!");
8639   // Since we only generate this in 64-bit mode, we can take advantage of
8640   // 64-bit registers.  In particular, sign extend the input value into the
8641   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8642   // then lfd it and fcfid it.
8643   MachineFunction &MF = DAG.getMachineFunction();
8644   MachineFrameInfo &MFI = MF.getFrameInfo();
8645   EVT PtrVT = getPointerTy(MF.getDataLayout());
8646 
8647   SDValue Ld;
8648   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8649     ReuseLoadInfo RLI;
8650     bool ReusingLoad;
8651     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
8652                                             DAG))) {
8653       int FrameIdx = MFI.CreateStackObject(4, 4, false);
8654       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8655 
8656       SDValue Store =
8657           DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
8658                        MachinePointerInfo::getFixedStack(
8659                            DAG.getMachineFunction(), FrameIdx));
8660 
8661       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8662              "Expected an i32 store");
8663 
8664       RLI.Ptr = FIdx;
8665       RLI.Chain = Store;
8666       RLI.MPI =
8667           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8668       RLI.Alignment = Align(4);
8669     }
8670 
8671     MachineMemOperand *MMO =
8672       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8673                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8674     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8675     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
8676                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
8677                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
8678                                  Ops, MVT::i32, MMO);
8679     if (ReusingLoad)
8680       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8681   } else {
8682     assert(Subtarget.isPPC64() &&
8683            "i32->FP without LFIWAX supported only on PPC64");
8684 
8685     int FrameIdx = MFI.CreateStackObject(8, 8, false);
8686     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8687 
8688     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
8689                                 Op.getOperand(0));
8690 
8691     // STD the extended value into the stack slot.
8692     SDValue Store = DAG.getStore(
8693         DAG.getEntryNode(), dl, Ext64, FIdx,
8694         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8695 
8696     // Load the value as a double.
8697     Ld = DAG.getLoad(
8698         MVT::f64, dl, Store, FIdx,
8699         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8700   }
8701 
8702   // FCFID it and return it.
8703   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
8704   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
8705     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8706                      DAG.getIntPtrConstant(0, dl));
8707   return FP;
8708 }
8709 
8710 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8711                                             SelectionDAG &DAG) const {
8712   SDLoc dl(Op);
8713   /*
8714    The rounding mode is in bits 30:31 of FPSR, and has the following
8715    settings:
8716      00 Round to nearest
8717      01 Round to 0
8718      10 Round to +inf
8719      11 Round to -inf
8720 
8721   FLT_ROUNDS, on the other hand, expects the following:
8722     -1 Undefined
8723      0 Round to 0
8724      1 Round to nearest
8725      2 Round to +inf
8726      3 Round to -inf
8727 
8728   To perform the conversion, we do:
8729     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8730   */
8731 
8732   MachineFunction &MF = DAG.getMachineFunction();
8733   EVT VT = Op.getValueType();
8734   EVT PtrVT = getPointerTy(MF.getDataLayout());
8735 
8736   // Save FP Control Word to register
8737   SDValue Chain = Op.getOperand(0);
8738   SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8739   Chain = MFFS.getValue(1);
8740 
8741   // Save FP register to stack slot
8742   int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
8743   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8744   Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8745 
8746   // Load FP Control Word from low 32 bits of stack slot.
8747   SDValue Four = DAG.getConstant(4, dl, PtrVT);
8748   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8749   SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8750   Chain = CWD.getValue(1);
8751 
8752   // Transform as necessary
8753   SDValue CWD1 =
8754     DAG.getNode(ISD::AND, dl, MVT::i32,
8755                 CWD, DAG.getConstant(3, dl, MVT::i32));
8756   SDValue CWD2 =
8757     DAG.getNode(ISD::SRL, dl, MVT::i32,
8758                 DAG.getNode(ISD::AND, dl, MVT::i32,
8759                             DAG.getNode(ISD::XOR, dl, MVT::i32,
8760                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
8761                             DAG.getConstant(3, dl, MVT::i32)),
8762                 DAG.getConstant(1, dl, MVT::i32));
8763 
8764   SDValue RetVal =
8765     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8766 
8767   RetVal =
8768       DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8769                   dl, VT, RetVal);
8770 
8771   return DAG.getMergeValues({RetVal, Chain}, dl);
8772 }
8773 
8774 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8775   EVT VT = Op.getValueType();
8776   unsigned BitWidth = VT.getSizeInBits();
8777   SDLoc dl(Op);
8778   assert(Op.getNumOperands() == 3 &&
8779          VT == Op.getOperand(1).getValueType() &&
8780          "Unexpected SHL!");
8781 
8782   // Expand into a bunch of logical ops.  Note that these ops
8783   // depend on the PPC behavior for oversized shift amounts.
8784   SDValue Lo = Op.getOperand(0);
8785   SDValue Hi = Op.getOperand(1);
8786   SDValue Amt = Op.getOperand(2);
8787   EVT AmtVT = Amt.getValueType();
8788 
8789   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8790                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8791   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8792   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8793   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8794   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8795                              DAG.getConstant(-BitWidth, dl, AmtVT));
8796   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8797   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8798   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8799   SDValue OutOps[] = { OutLo, OutHi };
8800   return DAG.getMergeValues(OutOps, dl);
8801 }
8802 
8803 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8804   EVT VT = Op.getValueType();
8805   SDLoc dl(Op);
8806   unsigned BitWidth = VT.getSizeInBits();
8807   assert(Op.getNumOperands() == 3 &&
8808          VT == Op.getOperand(1).getValueType() &&
8809          "Unexpected SRL!");
8810 
8811   // Expand into a bunch of logical ops.  Note that these ops
8812   // depend on the PPC behavior for oversized shift amounts.
8813   SDValue Lo = Op.getOperand(0);
8814   SDValue Hi = Op.getOperand(1);
8815   SDValue Amt = Op.getOperand(2);
8816   EVT AmtVT = Amt.getValueType();
8817 
8818   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8819                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8820   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8821   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8822   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8823   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8824                              DAG.getConstant(-BitWidth, dl, AmtVT));
8825   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8826   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8827   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8828   SDValue OutOps[] = { OutLo, OutHi };
8829   return DAG.getMergeValues(OutOps, dl);
8830 }
8831 
8832 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8833   SDLoc dl(Op);
8834   EVT VT = Op.getValueType();
8835   unsigned BitWidth = VT.getSizeInBits();
8836   assert(Op.getNumOperands() == 3 &&
8837          VT == Op.getOperand(1).getValueType() &&
8838          "Unexpected SRA!");
8839 
8840   // Expand into a bunch of logical ops, followed by a select_cc.
8841   SDValue Lo = Op.getOperand(0);
8842   SDValue Hi = Op.getOperand(1);
8843   SDValue Amt = Op.getOperand(2);
8844   EVT AmtVT = Amt.getValueType();
8845 
8846   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8847                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8848   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8849   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8850   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8851   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8852                              DAG.getConstant(-BitWidth, dl, AmtVT));
8853   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8854   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8855   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8856                                   Tmp4, Tmp6, ISD::SETLE);
8857   SDValue OutOps[] = { OutLo, OutHi };
8858   return DAG.getMergeValues(OutOps, dl);
8859 }
8860 
8861 //===----------------------------------------------------------------------===//
8862 // Vector related lowering.
8863 //
8864 
8865 /// BuildSplatI - Build a canonical splati of Val with an element size of
8866 /// SplatSize.  Cast the result to VT.
8867 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
8868                            SelectionDAG &DAG, const SDLoc &dl) {
8869   static const MVT VTys[] = { // canonical VT to use for each size.
8870     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8871   };
8872 
8873   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8874 
8875   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
8876   if (Val == -1)
8877     SplatSize = 1;
8878 
8879   EVT CanonicalVT = VTys[SplatSize-1];
8880 
8881   // Build a canonical splat for this value.
8882   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8883 }
8884 
8885 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8886 /// specified intrinsic ID.
8887 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8888                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
8889   if (DestVT == MVT::Other) DestVT = Op.getValueType();
8890   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8891                      DAG.getConstant(IID, dl, MVT::i32), Op);
8892 }
8893 
8894 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8895 /// specified intrinsic ID.
8896 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8897                                 SelectionDAG &DAG, const SDLoc &dl,
8898                                 EVT DestVT = MVT::Other) {
8899   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8900   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8901                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8902 }
8903 
8904 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8905 /// specified intrinsic ID.
8906 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8907                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8908                                 EVT DestVT = MVT::Other) {
8909   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8910   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8911                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8912 }
8913 
8914 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8915 /// amount.  The result has the specified value type.
8916 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8917                            SelectionDAG &DAG, const SDLoc &dl) {
8918   // Force LHS/RHS to be the right type.
8919   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8920   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8921 
8922   int Ops[16];
8923   for (unsigned i = 0; i != 16; ++i)
8924     Ops[i] = i + Amt;
8925   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8926   return DAG.getNode(ISD::BITCAST, dl, VT, T);
8927 }
8928 
8929 /// Do we have an efficient pattern in a .td file for this node?
8930 ///
8931 /// \param V - pointer to the BuildVectorSDNode being matched
8932 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8933 ///
8934 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8935 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8936 /// the opposite is true (expansion is beneficial) are:
8937 /// - The node builds a vector out of integers that are not 32 or 64-bits
8938 /// - The node builds a vector out of constants
8939 /// - The node is a "load-and-splat"
8940 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8941 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
8942                                             bool HasDirectMove,
8943                                             bool HasP8Vector) {
8944   EVT VecVT = V->getValueType(0);
8945   bool RightType = VecVT == MVT::v2f64 ||
8946     (HasP8Vector && VecVT == MVT::v4f32) ||
8947     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8948   if (!RightType)
8949     return false;
8950 
8951   bool IsSplat = true;
8952   bool IsLoad = false;
8953   SDValue Op0 = V->getOperand(0);
8954 
8955   // This function is called in a block that confirms the node is not a constant
8956   // splat. So a constant BUILD_VECTOR here means the vector is built out of
8957   // different constants.
8958   if (V->isConstant())
8959     return false;
8960   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8961     if (V->getOperand(i).isUndef())
8962       return false;
8963     // We want to expand nodes that represent load-and-splat even if the
8964     // loaded value is a floating point truncation or conversion to int.
8965     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8966         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8967          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8968         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8969          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8970         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8971          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
8972       IsLoad = true;
8973     // If the operands are different or the input is not a load and has more
8974     // uses than just this BV node, then it isn't a splat.
8975     if (V->getOperand(i) != Op0 ||
8976         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8977       IsSplat = false;
8978   }
8979   return !(IsSplat && IsLoad);
8980 }
8981 
8982 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8983 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8984 
8985   SDLoc dl(Op);
8986   SDValue Op0 = Op->getOperand(0);
8987 
8988   if (!EnableQuadPrecision ||
8989       (Op.getValueType() != MVT::f128 ) ||
8990       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8991       (Op0.getOperand(0).getValueType() !=  MVT::i64) ||
8992       (Op0.getOperand(1).getValueType() != MVT::i64))
8993     return SDValue();
8994 
8995   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8996                      Op0.getOperand(1));
8997 }
8998 
8999 static const SDValue *getNormalLoadInput(const SDValue &Op) {
9000   const SDValue *InputLoad = &Op;
9001   if (InputLoad->getOpcode() == ISD::BITCAST)
9002     InputLoad = &InputLoad->getOperand(0);
9003   if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR)
9004     InputLoad = &InputLoad->getOperand(0);
9005   if (InputLoad->getOpcode() != ISD::LOAD)
9006     return nullptr;
9007   LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9008   return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9009 }
9010 
9011 // If this is a case we can't handle, return null and let the default
9012 // expansion code take care of it.  If we CAN select this case, and if it
9013 // selects to a single instruction, return Op.  Otherwise, if we can codegen
9014 // this case more efficiently than a constant pool load, lower it to the
9015 // sequence of ops that should be used.
9016 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9017                                              SelectionDAG &DAG) const {
9018   SDLoc dl(Op);
9019   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9020   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9021 
9022   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
9023     // We first build an i32 vector, load it into a QPX register,
9024     // then convert it to a floating-point vector and compare it
9025     // to a zero vector to get the boolean result.
9026     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9027     int FrameIdx = MFI.CreateStackObject(16, 16, false);
9028     MachinePointerInfo PtrInfo =
9029         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
9030     EVT PtrVT = getPointerTy(DAG.getDataLayout());
9031     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9032 
9033     assert(BVN->getNumOperands() == 4 &&
9034       "BUILD_VECTOR for v4i1 does not have 4 operands");
9035 
9036     bool IsConst = true;
9037     for (unsigned i = 0; i < 4; ++i) {
9038       if (BVN->getOperand(i).isUndef()) continue;
9039       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
9040         IsConst = false;
9041         break;
9042       }
9043     }
9044 
9045     if (IsConst) {
9046       Constant *One =
9047         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
9048       Constant *NegOne =
9049         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
9050 
9051       Constant *CV[4];
9052       for (unsigned i = 0; i < 4; ++i) {
9053         if (BVN->getOperand(i).isUndef())
9054           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
9055         else if (isNullConstant(BVN->getOperand(i)))
9056           CV[i] = NegOne;
9057         else
9058           CV[i] = One;
9059       }
9060 
9061       Constant *CP = ConstantVector::get(CV);
9062       SDValue CPIdx =
9063           DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), Align(16));
9064 
9065       SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
9066       SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
9067       return DAG.getMemIntrinsicNode(
9068           PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
9069           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
9070     }
9071 
9072     SmallVector<SDValue, 4> Stores;
9073     for (unsigned i = 0; i < 4; ++i) {
9074       if (BVN->getOperand(i).isUndef()) continue;
9075 
9076       unsigned Offset = 4*i;
9077       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9078       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9079 
9080       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
9081       if (StoreSize > 4) {
9082         Stores.push_back(
9083             DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
9084                               PtrInfo.getWithOffset(Offset), MVT::i32));
9085       } else {
9086         SDValue StoreValue = BVN->getOperand(i);
9087         if (StoreSize < 4)
9088           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
9089 
9090         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
9091                                       PtrInfo.getWithOffset(Offset)));
9092       }
9093     }
9094 
9095     SDValue StoreChain;
9096     if (!Stores.empty())
9097       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9098     else
9099       StoreChain = DAG.getEntryNode();
9100 
9101     // Now load from v4i32 into the QPX register; this will extend it to
9102     // v4i64 but not yet convert it to a floating point. Nevertheless, this
9103     // is typed as v4f64 because the QPX register integer states are not
9104     // explicitly represented.
9105 
9106     SDValue Ops[] = {StoreChain,
9107                      DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
9108                      FIdx};
9109     SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
9110 
9111     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
9112       dl, VTs, Ops, MVT::v4i32, PtrInfo);
9113     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
9114       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
9115       LoadedVect);
9116 
9117     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
9118 
9119     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
9120   }
9121 
9122   // All other QPX vectors are handled by generic code.
9123   if (Subtarget.hasQPX())
9124     return SDValue();
9125 
9126   // Check if this is a splat of a constant value.
9127   APInt APSplatBits, APSplatUndef;
9128   unsigned SplatBitSize;
9129   bool HasAnyUndefs;
9130   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9131                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9132       SplatBitSize > 32) {
9133 
9134     const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
9135     // Handle load-and-splat patterns as we have instructions that will do this
9136     // in one go.
9137     if (InputLoad && DAG.isSplatValue(Op, true)) {
9138       LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9139 
9140       // We have handling for 4 and 8 byte elements.
9141       unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
9142 
9143       // Checking for a single use of this load, we have to check for vector
9144       // width (128 bits) / ElementSize uses (since each operand of the
9145       // BUILD_VECTOR is a separate use of the value.
9146       if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
9147           ((Subtarget.hasVSX() && ElementSize == 64) ||
9148            (Subtarget.hasP9Vector() && ElementSize == 32))) {
9149         SDValue Ops[] = {
9150           LD->getChain(),    // Chain
9151           LD->getBasePtr(),  // Ptr
9152           DAG.getValueType(Op.getValueType()) // VT
9153         };
9154         return
9155           DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
9156                                   DAG.getVTList(Op.getValueType(), MVT::Other),
9157                                   Ops, LD->getMemoryVT(), LD->getMemOperand());
9158       }
9159     }
9160 
9161     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
9162     // lowered to VSX instructions under certain conditions.
9163     // Without VSX, there is no pattern more efficient than expanding the node.
9164     if (Subtarget.hasVSX() &&
9165         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9166                                         Subtarget.hasP8Vector()))
9167       return Op;
9168     return SDValue();
9169   }
9170 
9171   unsigned SplatBits = APSplatBits.getZExtValue();
9172   unsigned SplatUndef = APSplatUndef.getZExtValue();
9173   unsigned SplatSize = SplatBitSize / 8;
9174 
9175   // First, handle single instruction cases.
9176 
9177   // All zeros?
9178   if (SplatBits == 0) {
9179     // Canonicalize all zero vectors to be v4i32.
9180     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9181       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9182       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9183     }
9184     return Op;
9185   }
9186 
9187   // We have XXSPLTIB for constant splats one byte wide
9188   // FIXME: SplatBits is an unsigned int being cast to an int while passing it
9189   // as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here.
9190   if (Subtarget.hasP9Vector() && SplatSize == 1)
9191     return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl);
9192 
9193   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9194   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9195                     (32-SplatBitSize));
9196   if (SextVal >= -16 && SextVal <= 15)
9197     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
9198 
9199   // Two instruction sequences.
9200 
9201   // If this value is in the range [-32,30] and is even, use:
9202   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9203   // If this value is in the range [17,31] and is odd, use:
9204   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9205   // If this value is in the range [-31,-17] and is odd, use:
9206   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9207   // Note the last two are three-instruction sequences.
9208   if (SextVal >= -32 && SextVal <= 31) {
9209     // To avoid having these optimizations undone by constant folding,
9210     // we convert to a pseudo that will be expanded later into one of
9211     // the above forms.
9212     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9213     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9214               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9215     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9216     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9217     if (VT == Op.getValueType())
9218       return RetVal;
9219     else
9220       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9221   }
9222 
9223   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
9224   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
9225   // for fneg/fabs.
9226   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9227     // Make -1 and vspltisw -1:
9228     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
9229 
9230     // Make the VSLW intrinsic, computing 0x8000_0000.
9231     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9232                                    OnesV, DAG, dl);
9233 
9234     // xor by OnesV to invert it.
9235     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9236     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9237   }
9238 
9239   // Check to see if this is a wide variety of vsplti*, binop self cases.
9240   static const signed char SplatCsts[] = {
9241     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9242     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9243   };
9244 
9245   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9246     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9247     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
9248     int i = SplatCsts[idx];
9249 
9250     // Figure out what shift amount will be used by altivec if shifted by i in
9251     // this splat size.
9252     unsigned TypeShiftAmt = i & (SplatBitSize-1);
9253 
9254     // vsplti + shl self.
9255     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9256       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
9257       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9258         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9259         Intrinsic::ppc_altivec_vslw
9260       };
9261       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9262       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9263     }
9264 
9265     // vsplti + srl self.
9266     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9267       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
9268       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9269         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9270         Intrinsic::ppc_altivec_vsrw
9271       };
9272       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9273       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9274     }
9275 
9276     // vsplti + sra self.
9277     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9278       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
9279       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9280         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
9281         Intrinsic::ppc_altivec_vsraw
9282       };
9283       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9284       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9285     }
9286 
9287     // vsplti + rol self.
9288     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9289                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9290       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
9291       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9292         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9293         Intrinsic::ppc_altivec_vrlw
9294       };
9295       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9296       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9297     }
9298 
9299     // t = vsplti c, result = vsldoi t, t, 1
9300     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9301       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
9302       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9303       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9304     }
9305     // t = vsplti c, result = vsldoi t, t, 2
9306     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9307       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
9308       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9309       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9310     }
9311     // t = vsplti c, result = vsldoi t, t, 3
9312     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9313       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
9314       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9315       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9316     }
9317   }
9318 
9319   return SDValue();
9320 }
9321 
9322 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9323 /// the specified operations to build the shuffle.
9324 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9325                                       SDValue RHS, SelectionDAG &DAG,
9326                                       const SDLoc &dl) {
9327   unsigned OpNum = (PFEntry >> 26) & 0x0F;
9328   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9329   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
9330 
9331   enum {
9332     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9333     OP_VMRGHW,
9334     OP_VMRGLW,
9335     OP_VSPLTISW0,
9336     OP_VSPLTISW1,
9337     OP_VSPLTISW2,
9338     OP_VSPLTISW3,
9339     OP_VSLDOI4,
9340     OP_VSLDOI8,
9341     OP_VSLDOI12
9342   };
9343 
9344   if (OpNum == OP_COPY) {
9345     if (LHSID == (1*9+2)*9+3) return LHS;
9346     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9347     return RHS;
9348   }
9349 
9350   SDValue OpLHS, OpRHS;
9351   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9352   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9353 
9354   int ShufIdxs[16];
9355   switch (OpNum) {
9356   default: llvm_unreachable("Unknown i32 permute!");
9357   case OP_VMRGHW:
9358     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
9359     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9360     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
9361     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9362     break;
9363   case OP_VMRGLW:
9364     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9365     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9366     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9367     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9368     break;
9369   case OP_VSPLTISW0:
9370     for (unsigned i = 0; i != 16; ++i)
9371       ShufIdxs[i] = (i&3)+0;
9372     break;
9373   case OP_VSPLTISW1:
9374     for (unsigned i = 0; i != 16; ++i)
9375       ShufIdxs[i] = (i&3)+4;
9376     break;
9377   case OP_VSPLTISW2:
9378     for (unsigned i = 0; i != 16; ++i)
9379       ShufIdxs[i] = (i&3)+8;
9380     break;
9381   case OP_VSPLTISW3:
9382     for (unsigned i = 0; i != 16; ++i)
9383       ShufIdxs[i] = (i&3)+12;
9384     break;
9385   case OP_VSLDOI4:
9386     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9387   case OP_VSLDOI8:
9388     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9389   case OP_VSLDOI12:
9390     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9391   }
9392   EVT VT = OpLHS.getValueType();
9393   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9394   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9395   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9396   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9397 }
9398 
9399 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9400 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9401 /// SDValue.
9402 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9403                                            SelectionDAG &DAG) const {
9404   const unsigned BytesInVector = 16;
9405   bool IsLE = Subtarget.isLittleEndian();
9406   SDLoc dl(N);
9407   SDValue V1 = N->getOperand(0);
9408   SDValue V2 = N->getOperand(1);
9409   unsigned ShiftElts = 0, InsertAtByte = 0;
9410   bool Swap = false;
9411 
9412   // Shifts required to get the byte we want at element 7.
9413   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
9414                                    0, 15, 14, 13, 12, 11, 10, 9};
9415   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9416                                 1, 2,  3,  4,  5,  6,  7,  8};
9417 
9418   ArrayRef<int> Mask = N->getMask();
9419   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9420 
9421   // For each mask element, find out if we're just inserting something
9422   // from V2 into V1 or vice versa.
9423   // Possible permutations inserting an element from V2 into V1:
9424   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9425   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9426   //   ...
9427   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9428   // Inserting from V1 into V2 will be similar, except mask range will be
9429   // [16,31].
9430 
9431   bool FoundCandidate = false;
9432   // If both vector operands for the shuffle are the same vector, the mask
9433   // will contain only elements from the first one and the second one will be
9434   // undef.
9435   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9436   // Go through the mask of half-words to find an element that's being moved
9437   // from one vector to the other.
9438   for (unsigned i = 0; i < BytesInVector; ++i) {
9439     unsigned CurrentElement = Mask[i];
9440     // If 2nd operand is undefined, we should only look for element 7 in the
9441     // Mask.
9442     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9443       continue;
9444 
9445     bool OtherElementsInOrder = true;
9446     // Examine the other elements in the Mask to see if they're in original
9447     // order.
9448     for (unsigned j = 0; j < BytesInVector; ++j) {
9449       if (j == i)
9450         continue;
9451       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9452       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
9453       // in which we always assume we're always picking from the 1st operand.
9454       int MaskOffset =
9455           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9456       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9457         OtherElementsInOrder = false;
9458         break;
9459       }
9460     }
9461     // If other elements are in original order, we record the number of shifts
9462     // we need to get the element we want into element 7. Also record which byte
9463     // in the vector we should insert into.
9464     if (OtherElementsInOrder) {
9465       // If 2nd operand is undefined, we assume no shifts and no swapping.
9466       if (V2.isUndef()) {
9467         ShiftElts = 0;
9468         Swap = false;
9469       } else {
9470         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9471         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9472                          : BigEndianShifts[CurrentElement & 0xF];
9473         Swap = CurrentElement < BytesInVector;
9474       }
9475       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9476       FoundCandidate = true;
9477       break;
9478     }
9479   }
9480 
9481   if (!FoundCandidate)
9482     return SDValue();
9483 
9484   // Candidate found, construct the proper SDAG sequence with VINSERTB,
9485   // optionally with VECSHL if shift is required.
9486   if (Swap)
9487     std::swap(V1, V2);
9488   if (V2.isUndef())
9489     V2 = V1;
9490   if (ShiftElts) {
9491     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9492                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9493     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9494                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9495   }
9496   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9497                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
9498 }
9499 
9500 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9501 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9502 /// SDValue.
9503 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9504                                            SelectionDAG &DAG) const {
9505   const unsigned NumHalfWords = 8;
9506   const unsigned BytesInVector = NumHalfWords * 2;
9507   // Check that the shuffle is on half-words.
9508   if (!isNByteElemShuffleMask(N, 2, 1))
9509     return SDValue();
9510 
9511   bool IsLE = Subtarget.isLittleEndian();
9512   SDLoc dl(N);
9513   SDValue V1 = N->getOperand(0);
9514   SDValue V2 = N->getOperand(1);
9515   unsigned ShiftElts = 0, InsertAtByte = 0;
9516   bool Swap = false;
9517 
9518   // Shifts required to get the half-word we want at element 3.
9519   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9520   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9521 
9522   uint32_t Mask = 0;
9523   uint32_t OriginalOrderLow = 0x1234567;
9524   uint32_t OriginalOrderHigh = 0x89ABCDEF;
9525   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
9526   // 32-bit space, only need 4-bit nibbles per element.
9527   for (unsigned i = 0; i < NumHalfWords; ++i) {
9528     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9529     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9530   }
9531 
9532   // For each mask element, find out if we're just inserting something
9533   // from V2 into V1 or vice versa.  Possible permutations inserting an element
9534   // from V2 into V1:
9535   //   X, 1, 2, 3, 4, 5, 6, 7
9536   //   0, X, 2, 3, 4, 5, 6, 7
9537   //   0, 1, X, 3, 4, 5, 6, 7
9538   //   0, 1, 2, X, 4, 5, 6, 7
9539   //   0, 1, 2, 3, X, 5, 6, 7
9540   //   0, 1, 2, 3, 4, X, 6, 7
9541   //   0, 1, 2, 3, 4, 5, X, 7
9542   //   0, 1, 2, 3, 4, 5, 6, X
9543   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9544 
9545   bool FoundCandidate = false;
9546   // Go through the mask of half-words to find an element that's being moved
9547   // from one vector to the other.
9548   for (unsigned i = 0; i < NumHalfWords; ++i) {
9549     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9550     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9551     uint32_t MaskOtherElts = ~(0xF << MaskShift);
9552     uint32_t TargetOrder = 0x0;
9553 
9554     // If both vector operands for the shuffle are the same vector, the mask
9555     // will contain only elements from the first one and the second one will be
9556     // undef.
9557     if (V2.isUndef()) {
9558       ShiftElts = 0;
9559       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9560       TargetOrder = OriginalOrderLow;
9561       Swap = false;
9562       // Skip if not the correct element or mask of other elements don't equal
9563       // to our expected order.
9564       if (MaskOneElt == VINSERTHSrcElem &&
9565           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9566         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9567         FoundCandidate = true;
9568         break;
9569       }
9570     } else { // If both operands are defined.
9571       // Target order is [8,15] if the current mask is between [0,7].
9572       TargetOrder =
9573           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9574       // Skip if mask of other elements don't equal our expected order.
9575       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9576         // We only need the last 3 bits for the number of shifts.
9577         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9578                          : BigEndianShifts[MaskOneElt & 0x7];
9579         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9580         Swap = MaskOneElt < NumHalfWords;
9581         FoundCandidate = true;
9582         break;
9583       }
9584     }
9585   }
9586 
9587   if (!FoundCandidate)
9588     return SDValue();
9589 
9590   // Candidate found, construct the proper SDAG sequence with VINSERTH,
9591   // optionally with VECSHL if shift is required.
9592   if (Swap)
9593     std::swap(V1, V2);
9594   if (V2.isUndef())
9595     V2 = V1;
9596   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9597   if (ShiftElts) {
9598     // Double ShiftElts because we're left shifting on v16i8 type.
9599     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9600                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9601     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9602     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9603                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9604     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9605   }
9606   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9607   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9608                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
9609   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9610 }
9611 
9612 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
9613 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
9614 /// return the code it can be lowered into.  Worst case, it can always be
9615 /// lowered into a vperm.
9616 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9617                                                SelectionDAG &DAG) const {
9618   SDLoc dl(Op);
9619   SDValue V1 = Op.getOperand(0);
9620   SDValue V2 = Op.getOperand(1);
9621   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9622   EVT VT = Op.getValueType();
9623   bool isLittleEndian = Subtarget.isLittleEndian();
9624 
9625   unsigned ShiftElts, InsertAtByte;
9626   bool Swap = false;
9627 
9628   // If this is a load-and-splat, we can do that with a single instruction
9629   // in some cases. However if the load has multiple uses, we don't want to
9630   // combine it because that will just produce multiple loads.
9631   const SDValue *InputLoad = getNormalLoadInput(V1);
9632   if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9633       (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9634       InputLoad->hasOneUse()) {
9635     bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9636     int SplatIdx =
9637       PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9638 
9639     LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9640     // For 4-byte load-and-splat, we need Power9.
9641     if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9642       uint64_t Offset = 0;
9643       if (IsFourByte)
9644         Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9645       else
9646         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9647       SDValue BasePtr = LD->getBasePtr();
9648       if (Offset != 0)
9649         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9650                               BasePtr, DAG.getIntPtrConstant(Offset, dl));
9651       SDValue Ops[] = {
9652         LD->getChain(),    // Chain
9653         BasePtr,           // BasePtr
9654         DAG.getValueType(Op.getValueType()) // VT
9655       };
9656       SDVTList VTL =
9657         DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9658       SDValue LdSplt =
9659         DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9660                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
9661       if (LdSplt.getValueType() != SVOp->getValueType(0))
9662         LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9663       return LdSplt;
9664     }
9665   }
9666   if (Subtarget.hasP9Vector() &&
9667       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9668                            isLittleEndian)) {
9669     if (Swap)
9670       std::swap(V1, V2);
9671     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9672     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9673     if (ShiftElts) {
9674       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9675                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
9676       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9677                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9678       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9679     }
9680     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9681                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9682     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9683   }
9684 
9685   if (Subtarget.hasP9Altivec()) {
9686     SDValue NewISDNode;
9687     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9688       return NewISDNode;
9689 
9690     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9691       return NewISDNode;
9692   }
9693 
9694   if (Subtarget.hasVSX() &&
9695       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9696     if (Swap)
9697       std::swap(V1, V2);
9698     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9699     SDValue Conv2 =
9700         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9701 
9702     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9703                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9704     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9705   }
9706 
9707   if (Subtarget.hasVSX() &&
9708     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9709     if (Swap)
9710       std::swap(V1, V2);
9711     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9712     SDValue Conv2 =
9713         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9714 
9715     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9716                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9717     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9718   }
9719 
9720   if (Subtarget.hasP9Vector()) {
9721      if (PPC::isXXBRHShuffleMask(SVOp)) {
9722       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9723       SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9724       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9725     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9726       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9727       SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9728       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9729     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9730       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9731       SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9732       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9733     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9734       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9735       SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9736       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9737     }
9738   }
9739 
9740   if (Subtarget.hasVSX()) {
9741     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9742       int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9743 
9744       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9745       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9746                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
9747       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9748     }
9749 
9750     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9751     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9752       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9753       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
9754       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9755     }
9756   }
9757 
9758   if (Subtarget.hasQPX()) {
9759     if (VT.getVectorNumElements() != 4)
9760       return SDValue();
9761 
9762     if (V2.isUndef()) V2 = V1;
9763 
9764     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
9765     if (AlignIdx != -1) {
9766       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
9767                          DAG.getConstant(AlignIdx, dl, MVT::i32));
9768     } else if (SVOp->isSplat()) {
9769       int SplatIdx = SVOp->getSplatIndex();
9770       if (SplatIdx >= 4) {
9771         std::swap(V1, V2);
9772         SplatIdx -= 4;
9773       }
9774 
9775       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
9776                          DAG.getConstant(SplatIdx, dl, MVT::i32));
9777     }
9778 
9779     // Lower this into a qvgpci/qvfperm pair.
9780 
9781     // Compute the qvgpci literal
9782     unsigned idx = 0;
9783     for (unsigned i = 0; i < 4; ++i) {
9784       int m = SVOp->getMaskElt(i);
9785       unsigned mm = m >= 0 ? (unsigned) m : i;
9786       idx |= mm << (3-i)*3;
9787     }
9788 
9789     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
9790                              DAG.getConstant(idx, dl, MVT::i32));
9791     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
9792   }
9793 
9794   // Cases that are handled by instructions that take permute immediates
9795   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9796   // selected by the instruction selector.
9797   if (V2.isUndef()) {
9798     if (PPC::isSplatShuffleMask(SVOp, 1) ||
9799         PPC::isSplatShuffleMask(SVOp, 2) ||
9800         PPC::isSplatShuffleMask(SVOp, 4) ||
9801         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
9802         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
9803         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9804         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9805         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9806         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9807         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9808         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9809         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9810         (Subtarget.hasP8Altivec() && (
9811          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
9812          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9813          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9814       return Op;
9815     }
9816   }
9817 
9818   // Altivec has a variety of "shuffle immediates" that take two vector inputs
9819   // and produce a fixed permutation.  If any of these match, do not lower to
9820   // VPERM.
9821   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
9822   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9823       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9824       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
9825       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9826       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9827       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9828       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9829       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9830       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9831       (Subtarget.hasP8Altivec() && (
9832        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9833        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
9834        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
9835     return Op;
9836 
9837   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
9838   // perfect shuffle table to emit an optimal matching sequence.
9839   ArrayRef<int> PermMask = SVOp->getMask();
9840 
9841   unsigned PFIndexes[4];
9842   bool isFourElementShuffle = true;
9843   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
9844     unsigned EltNo = 8;   // Start out undef.
9845     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
9846       if (PermMask[i*4+j] < 0)
9847         continue;   // Undef, ignore it.
9848 
9849       unsigned ByteSource = PermMask[i*4+j];
9850       if ((ByteSource & 3) != j) {
9851         isFourElementShuffle = false;
9852         break;
9853       }
9854 
9855       if (EltNo == 8) {
9856         EltNo = ByteSource/4;
9857       } else if (EltNo != ByteSource/4) {
9858         isFourElementShuffle = false;
9859         break;
9860       }
9861     }
9862     PFIndexes[i] = EltNo;
9863   }
9864 
9865   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
9866   // perfect shuffle vector to determine if it is cost effective to do this as
9867   // discrete instructions, or whether we should use a vperm.
9868   // For now, we skip this for little endian until such time as we have a
9869   // little-endian perfect shuffle table.
9870   if (isFourElementShuffle && !isLittleEndian) {
9871     // Compute the index in the perfect shuffle table.
9872     unsigned PFTableIndex =
9873       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9874 
9875     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9876     unsigned Cost  = (PFEntry >> 30);
9877 
9878     // Determining when to avoid vperm is tricky.  Many things affect the cost
9879     // of vperm, particularly how many times the perm mask needs to be computed.
9880     // For example, if the perm mask can be hoisted out of a loop or is already
9881     // used (perhaps because there are multiple permutes with the same shuffle
9882     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
9883     // the loop requires an extra register.
9884     //
9885     // As a compromise, we only emit discrete instructions if the shuffle can be
9886     // generated in 3 or fewer operations.  When we have loop information
9887     // available, if this block is within a loop, we should avoid using vperm
9888     // for 3-operation perms and use a constant pool load instead.
9889     if (Cost < 3)
9890       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9891   }
9892 
9893   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9894   // vector that will get spilled to the constant pool.
9895   if (V2.isUndef()) V2 = V1;
9896 
9897   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9898   // that it is in input element units, not in bytes.  Convert now.
9899 
9900   // For little endian, the order of the input vectors is reversed, and
9901   // the permutation mask is complemented with respect to 31.  This is
9902   // necessary to produce proper semantics with the big-endian-biased vperm
9903   // instruction.
9904   EVT EltVT = V1.getValueType().getVectorElementType();
9905   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
9906 
9907   SmallVector<SDValue, 16> ResultMask;
9908   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
9909     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
9910 
9911     for (unsigned j = 0; j != BytesPerElement; ++j)
9912       if (isLittleEndian)
9913         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
9914                                              dl, MVT::i32));
9915       else
9916         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
9917                                              MVT::i32));
9918   }
9919 
9920   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
9921   if (isLittleEndian)
9922     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9923                        V2, V1, VPermMask);
9924   else
9925     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9926                        V1, V2, VPermMask);
9927 }
9928 
9929 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9930 /// vector comparison.  If it is, return true and fill in Opc/isDot with
9931 /// information about the intrinsic.
9932 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
9933                                  bool &isDot, const PPCSubtarget &Subtarget) {
9934   unsigned IntrinsicID =
9935       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9936   CompareOpc = -1;
9937   isDot = false;
9938   switch (IntrinsicID) {
9939   default:
9940     return false;
9941   // Comparison predicates.
9942   case Intrinsic::ppc_altivec_vcmpbfp_p:
9943     CompareOpc = 966;
9944     isDot = true;
9945     break;
9946   case Intrinsic::ppc_altivec_vcmpeqfp_p:
9947     CompareOpc = 198;
9948     isDot = true;
9949     break;
9950   case Intrinsic::ppc_altivec_vcmpequb_p:
9951     CompareOpc = 6;
9952     isDot = true;
9953     break;
9954   case Intrinsic::ppc_altivec_vcmpequh_p:
9955     CompareOpc = 70;
9956     isDot = true;
9957     break;
9958   case Intrinsic::ppc_altivec_vcmpequw_p:
9959     CompareOpc = 134;
9960     isDot = true;
9961     break;
9962   case Intrinsic::ppc_altivec_vcmpequd_p:
9963     if (Subtarget.hasP8Altivec()) {
9964       CompareOpc = 199;
9965       isDot = true;
9966     } else
9967       return false;
9968     break;
9969   case Intrinsic::ppc_altivec_vcmpneb_p:
9970   case Intrinsic::ppc_altivec_vcmpneh_p:
9971   case Intrinsic::ppc_altivec_vcmpnew_p:
9972   case Intrinsic::ppc_altivec_vcmpnezb_p:
9973   case Intrinsic::ppc_altivec_vcmpnezh_p:
9974   case Intrinsic::ppc_altivec_vcmpnezw_p:
9975     if (Subtarget.hasP9Altivec()) {
9976       switch (IntrinsicID) {
9977       default:
9978         llvm_unreachable("Unknown comparison intrinsic.");
9979       case Intrinsic::ppc_altivec_vcmpneb_p:
9980         CompareOpc = 7;
9981         break;
9982       case Intrinsic::ppc_altivec_vcmpneh_p:
9983         CompareOpc = 71;
9984         break;
9985       case Intrinsic::ppc_altivec_vcmpnew_p:
9986         CompareOpc = 135;
9987         break;
9988       case Intrinsic::ppc_altivec_vcmpnezb_p:
9989         CompareOpc = 263;
9990         break;
9991       case Intrinsic::ppc_altivec_vcmpnezh_p:
9992         CompareOpc = 327;
9993         break;
9994       case Intrinsic::ppc_altivec_vcmpnezw_p:
9995         CompareOpc = 391;
9996         break;
9997       }
9998       isDot = true;
9999     } else
10000       return false;
10001     break;
10002   case Intrinsic::ppc_altivec_vcmpgefp_p:
10003     CompareOpc = 454;
10004     isDot = true;
10005     break;
10006   case Intrinsic::ppc_altivec_vcmpgtfp_p:
10007     CompareOpc = 710;
10008     isDot = true;
10009     break;
10010   case Intrinsic::ppc_altivec_vcmpgtsb_p:
10011     CompareOpc = 774;
10012     isDot = true;
10013     break;
10014   case Intrinsic::ppc_altivec_vcmpgtsh_p:
10015     CompareOpc = 838;
10016     isDot = true;
10017     break;
10018   case Intrinsic::ppc_altivec_vcmpgtsw_p:
10019     CompareOpc = 902;
10020     isDot = true;
10021     break;
10022   case Intrinsic::ppc_altivec_vcmpgtsd_p:
10023     if (Subtarget.hasP8Altivec()) {
10024       CompareOpc = 967;
10025       isDot = true;
10026     } else
10027       return false;
10028     break;
10029   case Intrinsic::ppc_altivec_vcmpgtub_p:
10030     CompareOpc = 518;
10031     isDot = true;
10032     break;
10033   case Intrinsic::ppc_altivec_vcmpgtuh_p:
10034     CompareOpc = 582;
10035     isDot = true;
10036     break;
10037   case Intrinsic::ppc_altivec_vcmpgtuw_p:
10038     CompareOpc = 646;
10039     isDot = true;
10040     break;
10041   case Intrinsic::ppc_altivec_vcmpgtud_p:
10042     if (Subtarget.hasP8Altivec()) {
10043       CompareOpc = 711;
10044       isDot = true;
10045     } else
10046       return false;
10047     break;
10048 
10049   // VSX predicate comparisons use the same infrastructure
10050   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10051   case Intrinsic::ppc_vsx_xvcmpgedp_p:
10052   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10053   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10054   case Intrinsic::ppc_vsx_xvcmpgesp_p:
10055   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10056     if (Subtarget.hasVSX()) {
10057       switch (IntrinsicID) {
10058       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10059         CompareOpc = 99;
10060         break;
10061       case Intrinsic::ppc_vsx_xvcmpgedp_p:
10062         CompareOpc = 115;
10063         break;
10064       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10065         CompareOpc = 107;
10066         break;
10067       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10068         CompareOpc = 67;
10069         break;
10070       case Intrinsic::ppc_vsx_xvcmpgesp_p:
10071         CompareOpc = 83;
10072         break;
10073       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10074         CompareOpc = 75;
10075         break;
10076       }
10077       isDot = true;
10078     } else
10079       return false;
10080     break;
10081 
10082   // Normal Comparisons.
10083   case Intrinsic::ppc_altivec_vcmpbfp:
10084     CompareOpc = 966;
10085     break;
10086   case Intrinsic::ppc_altivec_vcmpeqfp:
10087     CompareOpc = 198;
10088     break;
10089   case Intrinsic::ppc_altivec_vcmpequb:
10090     CompareOpc = 6;
10091     break;
10092   case Intrinsic::ppc_altivec_vcmpequh:
10093     CompareOpc = 70;
10094     break;
10095   case Intrinsic::ppc_altivec_vcmpequw:
10096     CompareOpc = 134;
10097     break;
10098   case Intrinsic::ppc_altivec_vcmpequd:
10099     if (Subtarget.hasP8Altivec())
10100       CompareOpc = 199;
10101     else
10102       return false;
10103     break;
10104   case Intrinsic::ppc_altivec_vcmpneb:
10105   case Intrinsic::ppc_altivec_vcmpneh:
10106   case Intrinsic::ppc_altivec_vcmpnew:
10107   case Intrinsic::ppc_altivec_vcmpnezb:
10108   case Intrinsic::ppc_altivec_vcmpnezh:
10109   case Intrinsic::ppc_altivec_vcmpnezw:
10110     if (Subtarget.hasP9Altivec())
10111       switch (IntrinsicID) {
10112       default:
10113         llvm_unreachable("Unknown comparison intrinsic.");
10114       case Intrinsic::ppc_altivec_vcmpneb:
10115         CompareOpc = 7;
10116         break;
10117       case Intrinsic::ppc_altivec_vcmpneh:
10118         CompareOpc = 71;
10119         break;
10120       case Intrinsic::ppc_altivec_vcmpnew:
10121         CompareOpc = 135;
10122         break;
10123       case Intrinsic::ppc_altivec_vcmpnezb:
10124         CompareOpc = 263;
10125         break;
10126       case Intrinsic::ppc_altivec_vcmpnezh:
10127         CompareOpc = 327;
10128         break;
10129       case Intrinsic::ppc_altivec_vcmpnezw:
10130         CompareOpc = 391;
10131         break;
10132       }
10133     else
10134       return false;
10135     break;
10136   case Intrinsic::ppc_altivec_vcmpgefp:
10137     CompareOpc = 454;
10138     break;
10139   case Intrinsic::ppc_altivec_vcmpgtfp:
10140     CompareOpc = 710;
10141     break;
10142   case Intrinsic::ppc_altivec_vcmpgtsb:
10143     CompareOpc = 774;
10144     break;
10145   case Intrinsic::ppc_altivec_vcmpgtsh:
10146     CompareOpc = 838;
10147     break;
10148   case Intrinsic::ppc_altivec_vcmpgtsw:
10149     CompareOpc = 902;
10150     break;
10151   case Intrinsic::ppc_altivec_vcmpgtsd:
10152     if (Subtarget.hasP8Altivec())
10153       CompareOpc = 967;
10154     else
10155       return false;
10156     break;
10157   case Intrinsic::ppc_altivec_vcmpgtub:
10158     CompareOpc = 518;
10159     break;
10160   case Intrinsic::ppc_altivec_vcmpgtuh:
10161     CompareOpc = 582;
10162     break;
10163   case Intrinsic::ppc_altivec_vcmpgtuw:
10164     CompareOpc = 646;
10165     break;
10166   case Intrinsic::ppc_altivec_vcmpgtud:
10167     if (Subtarget.hasP8Altivec())
10168       CompareOpc = 711;
10169     else
10170       return false;
10171     break;
10172   }
10173   return true;
10174 }
10175 
10176 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10177 /// lower, do it, otherwise return null.
10178 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10179                                                    SelectionDAG &DAG) const {
10180   unsigned IntrinsicID =
10181     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10182 
10183   SDLoc dl(Op);
10184 
10185   if (IntrinsicID == Intrinsic::thread_pointer) {
10186     // Reads the thread pointer register, used for __builtin_thread_pointer.
10187     if (Subtarget.isPPC64())
10188       return DAG.getRegister(PPC::X13, MVT::i64);
10189     return DAG.getRegister(PPC::R2, MVT::i32);
10190   }
10191 
10192   // If this is a lowered altivec predicate compare, CompareOpc is set to the
10193   // opcode number of the comparison.
10194   int CompareOpc;
10195   bool isDot;
10196   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10197     return SDValue();    // Don't custom lower most intrinsics.
10198 
10199   // If this is a non-dot comparison, make the VCMP node and we are done.
10200   if (!isDot) {
10201     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10202                               Op.getOperand(1), Op.getOperand(2),
10203                               DAG.getConstant(CompareOpc, dl, MVT::i32));
10204     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10205   }
10206 
10207   // Create the PPCISD altivec 'dot' comparison node.
10208   SDValue Ops[] = {
10209     Op.getOperand(2),  // LHS
10210     Op.getOperand(3),  // RHS
10211     DAG.getConstant(CompareOpc, dl, MVT::i32)
10212   };
10213   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10214   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
10215 
10216   // Now that we have the comparison, emit a copy from the CR to a GPR.
10217   // This is flagged to the above dot comparison.
10218   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10219                                 DAG.getRegister(PPC::CR6, MVT::i32),
10220                                 CompNode.getValue(1));
10221 
10222   // Unpack the result based on how the target uses it.
10223   unsigned BitNo;   // Bit # of CR6.
10224   bool InvertBit;   // Invert result?
10225   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10226   default:  // Can't happen, don't crash on invalid number though.
10227   case 0:   // Return the value of the EQ bit of CR6.
10228     BitNo = 0; InvertBit = false;
10229     break;
10230   case 1:   // Return the inverted value of the EQ bit of CR6.
10231     BitNo = 0; InvertBit = true;
10232     break;
10233   case 2:   // Return the value of the LT bit of CR6.
10234     BitNo = 2; InvertBit = false;
10235     break;
10236   case 3:   // Return the inverted value of the LT bit of CR6.
10237     BitNo = 2; InvertBit = true;
10238     break;
10239   }
10240 
10241   // Shift the bit into the low position.
10242   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10243                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10244   // Isolate the bit.
10245   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10246                       DAG.getConstant(1, dl, MVT::i32));
10247 
10248   // If we are supposed to, toggle the bit.
10249   if (InvertBit)
10250     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10251                         DAG.getConstant(1, dl, MVT::i32));
10252   return Flags;
10253 }
10254 
10255 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10256                                                SelectionDAG &DAG) const {
10257   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10258   // the beginning of the argument list.
10259   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10260   SDLoc DL(Op);
10261   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10262   case Intrinsic::ppc_cfence: {
10263     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10264     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10265     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10266                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
10267                                                   Op.getOperand(ArgStart + 1)),
10268                                       Op.getOperand(0)),
10269                    0);
10270   }
10271   default:
10272     break;
10273   }
10274   return SDValue();
10275 }
10276 
10277 SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
10278   // Check for a DIV with the same operands as this REM.
10279   for (auto UI : Op.getOperand(1)->uses()) {
10280     if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
10281         (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
10282       if (UI->getOperand(0) == Op.getOperand(0) &&
10283           UI->getOperand(1) == Op.getOperand(1))
10284         return SDValue();
10285   }
10286   return Op;
10287 }
10288 
10289 // Lower scalar BSWAP64 to xxbrd.
10290 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10291   SDLoc dl(Op);
10292   // MTVSRDD
10293   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10294                    Op.getOperand(0));
10295   // XXBRD
10296   Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10297   // MFVSRD
10298   int VectorIndex = 0;
10299   if (Subtarget.isLittleEndian())
10300     VectorIndex = 1;
10301   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10302                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10303   return Op;
10304 }
10305 
10306 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10307 // compared to a value that is atomically loaded (atomic loads zero-extend).
10308 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10309                                                 SelectionDAG &DAG) const {
10310   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10311          "Expecting an atomic compare-and-swap here.");
10312   SDLoc dl(Op);
10313   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10314   EVT MemVT = AtomicNode->getMemoryVT();
10315   if (MemVT.getSizeInBits() >= 32)
10316     return Op;
10317 
10318   SDValue CmpOp = Op.getOperand(2);
10319   // If this is already correctly zero-extended, leave it alone.
10320   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10321   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10322     return Op;
10323 
10324   // Clear the high bits of the compare operand.
10325   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10326   SDValue NewCmpOp =
10327     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10328                 DAG.getConstant(MaskVal, dl, MVT::i32));
10329 
10330   // Replace the existing compare operand with the properly zero-extended one.
10331   SmallVector<SDValue, 4> Ops;
10332   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10333     Ops.push_back(AtomicNode->getOperand(i));
10334   Ops[2] = NewCmpOp;
10335   MachineMemOperand *MMO = AtomicNode->getMemOperand();
10336   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10337   auto NodeTy =
10338     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10339   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10340 }
10341 
10342 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10343                                                  SelectionDAG &DAG) const {
10344   SDLoc dl(Op);
10345   // Create a stack slot that is 16-byte aligned.
10346   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10347   int FrameIdx = MFI.CreateStackObject(16, 16, false);
10348   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10349   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10350 
10351   // Store the input value into Value#0 of the stack slot.
10352   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10353                                MachinePointerInfo());
10354   // Load it out.
10355   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10356 }
10357 
10358 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10359                                                   SelectionDAG &DAG) const {
10360   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10361          "Should only be called for ISD::INSERT_VECTOR_ELT");
10362 
10363   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10364   // We have legal lowering for constant indices but not for variable ones.
10365   if (!C)
10366     return SDValue();
10367 
10368   EVT VT = Op.getValueType();
10369   SDLoc dl(Op);
10370   SDValue V1 = Op.getOperand(0);
10371   SDValue V2 = Op.getOperand(1);
10372   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10373   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10374     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10375     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10376     unsigned InsertAtElement = C->getZExtValue();
10377     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10378     if (Subtarget.isLittleEndian()) {
10379       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10380     }
10381     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10382                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
10383   }
10384   return Op;
10385 }
10386 
10387 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
10388                                                    SelectionDAG &DAG) const {
10389   SDLoc dl(Op);
10390   SDNode *N = Op.getNode();
10391 
10392   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
10393          "Unknown extract_vector_elt type");
10394 
10395   SDValue Value = N->getOperand(0);
10396 
10397   // The first part of this is like the store lowering except that we don't
10398   // need to track the chain.
10399 
10400   // The values are now known to be -1 (false) or 1 (true). To convert this
10401   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
10402   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
10403   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
10404 
10405   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
10406   // understand how to form the extending load.
10407   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
10408 
10409   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
10410 
10411   // Now convert to an integer and store.
10412   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
10413     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
10414     Value);
10415 
10416   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10417   int FrameIdx = MFI.CreateStackObject(16, 16, false);
10418   MachinePointerInfo PtrInfo =
10419       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
10420   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10421   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10422 
10423   SDValue StoreChain = DAG.getEntryNode();
10424   SDValue Ops[] = {StoreChain,
10425                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
10426                    Value, FIdx};
10427   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
10428 
10429   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
10430     dl, VTs, Ops, MVT::v4i32, PtrInfo);
10431 
10432   // Extract the value requested.
10433   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
10434   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
10435   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
10436 
10437   SDValue IntVal =
10438       DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
10439 
10440   if (!Subtarget.useCRBits())
10441     return IntVal;
10442 
10443   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
10444 }
10445 
10446 /// Lowering for QPX v4i1 loads
10447 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10448                                            SelectionDAG &DAG) const {
10449   SDLoc dl(Op);
10450   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10451   SDValue LoadChain = LN->getChain();
10452   SDValue BasePtr = LN->getBasePtr();
10453 
10454   if (Op.getValueType() == MVT::v4f64 ||
10455       Op.getValueType() == MVT::v4f32) {
10456     EVT MemVT = LN->getMemoryVT();
10457     unsigned Alignment = LN->getAlignment();
10458 
10459     // If this load is properly aligned, then it is legal.
10460     if (Alignment >= MemVT.getStoreSize())
10461       return Op;
10462 
10463     EVT ScalarVT = Op.getValueType().getScalarType(),
10464         ScalarMemVT = MemVT.getScalarType();
10465     unsigned Stride = ScalarMemVT.getStoreSize();
10466 
10467     SDValue Vals[4], LoadChains[4];
10468     for (unsigned Idx = 0; Idx < 4; ++Idx) {
10469       SDValue Load;
10470       if (ScalarVT != ScalarMemVT)
10471         Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
10472                               BasePtr,
10473                               LN->getPointerInfo().getWithOffset(Idx * Stride),
10474                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
10475                               LN->getMemOperand()->getFlags(), LN->getAAInfo());
10476       else
10477         Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
10478                            LN->getPointerInfo().getWithOffset(Idx * Stride),
10479                            MinAlign(Alignment, Idx * Stride),
10480                            LN->getMemOperand()->getFlags(), LN->getAAInfo());
10481 
10482       if (Idx == 0 && LN->isIndexed()) {
10483         assert(LN->getAddressingMode() == ISD::PRE_INC &&
10484                "Unknown addressing mode on vector load");
10485         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
10486                                   LN->getAddressingMode());
10487       }
10488 
10489       Vals[Idx] = Load;
10490       LoadChains[Idx] = Load.getValue(1);
10491 
10492       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10493                             DAG.getConstant(Stride, dl,
10494                                             BasePtr.getValueType()));
10495     }
10496 
10497     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10498     SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
10499 
10500     if (LN->isIndexed()) {
10501       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
10502       return DAG.getMergeValues(RetOps, dl);
10503     }
10504 
10505     SDValue RetOps[] = { Value, TF };
10506     return DAG.getMergeValues(RetOps, dl);
10507   }
10508 
10509   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
10510   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
10511 
10512   // To lower v4i1 from a byte array, we load the byte elements of the
10513   // vector and then reuse the BUILD_VECTOR logic.
10514 
10515   SDValue VectElmts[4], VectElmtChains[4];
10516   for (unsigned i = 0; i < 4; ++i) {
10517     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
10518     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
10519 
10520     VectElmts[i] = DAG.getExtLoad(
10521         ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
10522         LN->getPointerInfo().getWithOffset(i), MVT::i8,
10523         /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
10524     VectElmtChains[i] = VectElmts[i].getValue(1);
10525   }
10526 
10527   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
10528   SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
10529 
10530   SDValue RVals[] = { Value, LoadChain };
10531   return DAG.getMergeValues(RVals, dl);
10532 }
10533 
10534 /// Lowering for QPX v4i1 stores
10535 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10536                                             SelectionDAG &DAG) const {
10537   SDLoc dl(Op);
10538   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10539   SDValue StoreChain = SN->getChain();
10540   SDValue BasePtr = SN->getBasePtr();
10541   SDValue Value = SN->getValue();
10542 
10543   if (Value.getValueType() == MVT::v4f64 ||
10544       Value.getValueType() == MVT::v4f32) {
10545     EVT MemVT = SN->getMemoryVT();
10546     unsigned Alignment = SN->getAlignment();
10547 
10548     // If this store is properly aligned, then it is legal.
10549     if (Alignment >= MemVT.getStoreSize())
10550       return Op;
10551 
10552     EVT ScalarVT = Value.getValueType().getScalarType(),
10553         ScalarMemVT = MemVT.getScalarType();
10554     unsigned Stride = ScalarMemVT.getStoreSize();
10555 
10556     SDValue Stores[4];
10557     for (unsigned Idx = 0; Idx < 4; ++Idx) {
10558       SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
10559                                DAG.getVectorIdxConstant(Idx, dl));
10560       SDValue Store;
10561       if (ScalarVT != ScalarMemVT)
10562         Store =
10563             DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
10564                               SN->getPointerInfo().getWithOffset(Idx * Stride),
10565                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
10566                               SN->getMemOperand()->getFlags(), SN->getAAInfo());
10567       else
10568         Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
10569                              SN->getPointerInfo().getWithOffset(Idx * Stride),
10570                              MinAlign(Alignment, Idx * Stride),
10571                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
10572 
10573       if (Idx == 0 && SN->isIndexed()) {
10574         assert(SN->getAddressingMode() == ISD::PRE_INC &&
10575                "Unknown addressing mode on vector store");
10576         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
10577                                     SN->getAddressingMode());
10578       }
10579 
10580       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10581                             DAG.getConstant(Stride, dl,
10582                                             BasePtr.getValueType()));
10583       Stores[Idx] = Store;
10584     }
10585 
10586     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10587 
10588     if (SN->isIndexed()) {
10589       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
10590       return DAG.getMergeValues(RetOps, dl);
10591     }
10592 
10593     return TF;
10594   }
10595 
10596   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
10597   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
10598 
10599   // The values are now known to be -1 (false) or 1 (true). To convert this
10600   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
10601   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
10602   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
10603 
10604   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
10605   // understand how to form the extending load.
10606   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
10607 
10608   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
10609 
10610   // Now convert to an integer and store.
10611   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
10612     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
10613     Value);
10614 
10615   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10616   int FrameIdx = MFI.CreateStackObject(16, 16, false);
10617   MachinePointerInfo PtrInfo =
10618       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
10619   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10620   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10621 
10622   SDValue Ops[] = {StoreChain,
10623                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
10624                    Value, FIdx};
10625   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
10626 
10627   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
10628     dl, VTs, Ops, MVT::v4i32, PtrInfo);
10629 
10630   // Move data into the byte array.
10631   SDValue Loads[4], LoadChains[4];
10632   for (unsigned i = 0; i < 4; ++i) {
10633     unsigned Offset = 4*i;
10634     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
10635     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
10636 
10637     Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
10638                            PtrInfo.getWithOffset(Offset));
10639     LoadChains[i] = Loads[i].getValue(1);
10640   }
10641 
10642   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10643 
10644   SDValue Stores[4];
10645   for (unsigned i = 0; i < 4; ++i) {
10646     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
10647     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
10648 
10649     Stores[i] = DAG.getTruncStore(
10650         StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
10651         MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
10652         SN->getAAInfo());
10653   }
10654 
10655   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10656 
10657   return StoreChain;
10658 }
10659 
10660 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10661   SDLoc dl(Op);
10662   if (Op.getValueType() == MVT::v4i32) {
10663     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10664 
10665     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
10666     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
10667 
10668     SDValue RHSSwap =   // = vrlw RHS, 16
10669       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10670 
10671     // Shrinkify inputs to v8i16.
10672     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10673     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10674     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10675 
10676     // Low parts multiplied together, generating 32-bit results (we ignore the
10677     // top parts).
10678     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10679                                         LHS, RHS, DAG, dl, MVT::v4i32);
10680 
10681     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10682                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10683     // Shift the high parts up 16 bits.
10684     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10685                               Neg16, DAG, dl);
10686     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10687   } else if (Op.getValueType() == MVT::v16i8) {
10688     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10689     bool isLittleEndian = Subtarget.isLittleEndian();
10690 
10691     // Multiply the even 8-bit parts, producing 16-bit sums.
10692     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10693                                            LHS, RHS, DAG, dl, MVT::v8i16);
10694     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10695 
10696     // Multiply the odd 8-bit parts, producing 16-bit sums.
10697     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10698                                           LHS, RHS, DAG, dl, MVT::v8i16);
10699     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10700 
10701     // Merge the results together.  Because vmuleub and vmuloub are
10702     // instructions with a big-endian bias, we must reverse the
10703     // element numbering and reverse the meaning of "odd" and "even"
10704     // when generating little endian code.
10705     int Ops[16];
10706     for (unsigned i = 0; i != 8; ++i) {
10707       if (isLittleEndian) {
10708         Ops[i*2  ] = 2*i;
10709         Ops[i*2+1] = 2*i+16;
10710       } else {
10711         Ops[i*2  ] = 2*i+1;
10712         Ops[i*2+1] = 2*i+1+16;
10713       }
10714     }
10715     if (isLittleEndian)
10716       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10717     else
10718       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10719   } else {
10720     llvm_unreachable("Unknown mul to lower!");
10721   }
10722 }
10723 
10724 SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
10725 
10726   assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
10727 
10728   EVT VT = Op.getValueType();
10729   assert(VT.isVector() &&
10730          "Only set vector abs as custom, scalar abs shouldn't reach here!");
10731   assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
10732           VT == MVT::v16i8) &&
10733          "Unexpected vector element type!");
10734   assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
10735          "Current subtarget doesn't support smax v2i64!");
10736 
10737   // For vector abs, it can be lowered to:
10738   // abs x
10739   // ==>
10740   // y = -x
10741   // smax(x, y)
10742 
10743   SDLoc dl(Op);
10744   SDValue X = Op.getOperand(0);
10745   SDValue Zero = DAG.getConstant(0, dl, VT);
10746   SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
10747 
10748   // SMAX patch https://reviews.llvm.org/D47332
10749   // hasn't landed yet, so use intrinsic first here.
10750   // TODO: Should use SMAX directly once SMAX patch landed
10751   Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
10752   if (VT == MVT::v2i64)
10753     BifID = Intrinsic::ppc_altivec_vmaxsd;
10754   else if (VT == MVT::v8i16)
10755     BifID = Intrinsic::ppc_altivec_vmaxsh;
10756   else if (VT == MVT::v16i8)
10757     BifID = Intrinsic::ppc_altivec_vmaxsb;
10758 
10759   return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
10760 }
10761 
10762 // Custom lowering for fpext vf32 to v2f64
10763 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10764 
10765   assert(Op.getOpcode() == ISD::FP_EXTEND &&
10766          "Should only be called for ISD::FP_EXTEND");
10767 
10768   // FIXME: handle extends from half precision float vectors on P9.
10769   // We only want to custom lower an extend from v2f32 to v2f64.
10770   if (Op.getValueType() != MVT::v2f64 ||
10771       Op.getOperand(0).getValueType() != MVT::v2f32)
10772     return SDValue();
10773 
10774   SDLoc dl(Op);
10775   SDValue Op0 = Op.getOperand(0);
10776 
10777   switch (Op0.getOpcode()) {
10778   default:
10779     return SDValue();
10780   case ISD::EXTRACT_SUBVECTOR: {
10781     assert(Op0.getNumOperands() == 2 &&
10782            isa<ConstantSDNode>(Op0->getOperand(1)) &&
10783            "Node should have 2 operands with second one being a constant!");
10784 
10785     if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10786       return SDValue();
10787 
10788     // Custom lower is only done for high or low doubleword.
10789     int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10790     if (Idx % 2 != 0)
10791       return SDValue();
10792 
10793     // Since input is v4f32, at this point Idx is either 0 or 2.
10794     // Shift to get the doubleword position we want.
10795     int DWord = Idx >> 1;
10796 
10797     // High and low word positions are different on little endian.
10798     if (Subtarget.isLittleEndian())
10799       DWord ^= 0x1;
10800 
10801     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10802                        Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10803   }
10804   case ISD::FADD:
10805   case ISD::FMUL:
10806   case ISD::FSUB: {
10807     SDValue NewLoad[2];
10808     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10809       // Ensure both input are loads.
10810       SDValue LdOp = Op0.getOperand(i);
10811       if (LdOp.getOpcode() != ISD::LOAD)
10812         return SDValue();
10813       // Generate new load node.
10814       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10815       SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10816       NewLoad[i] = DAG.getMemIntrinsicNode(
10817           PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10818           LD->getMemoryVT(), LD->getMemOperand());
10819     }
10820     SDValue NewOp =
10821         DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10822                     NewLoad[1], Op0.getNode()->getFlags());
10823     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10824                        DAG.getConstant(0, dl, MVT::i32));
10825   }
10826   case ISD::LOAD: {
10827     LoadSDNode *LD = cast<LoadSDNode>(Op0);
10828     SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10829     SDValue NewLd = DAG.getMemIntrinsicNode(
10830         PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10831         LD->getMemoryVT(), LD->getMemOperand());
10832     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10833                        DAG.getConstant(0, dl, MVT::i32));
10834   }
10835   }
10836   llvm_unreachable("ERROR:Should return for all cases within swtich.");
10837 }
10838 
10839 /// LowerOperation - Provide custom lowering hooks for some operations.
10840 ///
10841 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10842   switch (Op.getOpcode()) {
10843   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10844   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
10845   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
10846   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
10847   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
10848   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
10849   case ISD::SETCC:              return LowerSETCC(Op, DAG);
10850   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
10851   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
10852 
10853   // Variable argument lowering.
10854   case ISD::VASTART:            return LowerVASTART(Op, DAG);
10855   case ISD::VAARG:              return LowerVAARG(Op, DAG);
10856   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
10857 
10858   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
10859   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10860   case ISD::GET_DYNAMIC_AREA_OFFSET:
10861     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10862 
10863   // Exception handling lowering.
10864   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
10865   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
10866   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
10867 
10868   case ISD::LOAD:               return LowerLOAD(Op, DAG);
10869   case ISD::STORE:              return LowerSTORE(Op, DAG);
10870   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
10871   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
10872   case ISD::FP_TO_UINT:
10873   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10874   case ISD::UINT_TO_FP:
10875   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
10876   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
10877 
10878   // Lower 64-bit shifts.
10879   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
10880   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
10881   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
10882 
10883   // Vector-related lowering.
10884   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
10885   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
10886   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10887   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
10888   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
10889   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
10890   case ISD::MUL:                return LowerMUL(Op, DAG);
10891   case ISD::ABS:                return LowerABS(Op, DAG);
10892   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
10893 
10894   // For counter-based loop handling.
10895   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
10896 
10897   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
10898 
10899   // Frame & Return address.
10900   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
10901   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
10902 
10903   case ISD::INTRINSIC_VOID:
10904     return LowerINTRINSIC_VOID(Op, DAG);
10905   case ISD::SREM:
10906   case ISD::UREM:
10907     return LowerREM(Op, DAG);
10908   case ISD::BSWAP:
10909     return LowerBSWAP(Op, DAG);
10910   case ISD::ATOMIC_CMP_SWAP:
10911     return LowerATOMIC_CMP_SWAP(Op, DAG);
10912   }
10913 }
10914 
10915 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
10916                                            SmallVectorImpl<SDValue>&Results,
10917                                            SelectionDAG &DAG) const {
10918   SDLoc dl(N);
10919   switch (N->getOpcode()) {
10920   default:
10921     llvm_unreachable("Do not know how to custom type legalize this operation!");
10922   case ISD::READCYCLECOUNTER: {
10923     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
10924     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10925 
10926     Results.push_back(
10927         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
10928     Results.push_back(RTB.getValue(2));
10929     break;
10930   }
10931   case ISD::INTRINSIC_W_CHAIN: {
10932     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10933         Intrinsic::loop_decrement)
10934       break;
10935 
10936     assert(N->getValueType(0) == MVT::i1 &&
10937            "Unexpected result type for CTR decrement intrinsic");
10938     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10939                                  N->getValueType(0));
10940     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10941     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10942                                  N->getOperand(1));
10943 
10944     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10945     Results.push_back(NewInt.getValue(1));
10946     break;
10947   }
10948   case ISD::VAARG: {
10949     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10950       return;
10951 
10952     EVT VT = N->getValueType(0);
10953 
10954     if (VT == MVT::i64) {
10955       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10956 
10957       Results.push_back(NewNode);
10958       Results.push_back(NewNode.getValue(1));
10959     }
10960     return;
10961   }
10962   case ISD::FP_TO_SINT:
10963   case ISD::FP_TO_UINT:
10964     // LowerFP_TO_INT() can only handle f32 and f64.
10965     if (N->getOperand(0).getValueType() == MVT::ppcf128)
10966       return;
10967     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10968     return;
10969   case ISD::TRUNCATE: {
10970     EVT TrgVT = N->getValueType(0);
10971     EVT OpVT = N->getOperand(0).getValueType();
10972     if (TrgVT.isVector() &&
10973         isOperationCustom(N->getOpcode(), TrgVT) &&
10974         OpVT.getSizeInBits() <= 128 &&
10975         isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))
10976       Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
10977     return;
10978   }
10979   case ISD::BITCAST:
10980     // Don't handle bitcast here.
10981     return;
10982   case ISD::FP_EXTEND:
10983     SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
10984     if (Lowered)
10985       Results.push_back(Lowered);
10986     return;
10987   }
10988 }
10989 
10990 //===----------------------------------------------------------------------===//
10991 //  Other Lowering Code
10992 //===----------------------------------------------------------------------===//
10993 
10994 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
10995   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10996   Function *Func = Intrinsic::getDeclaration(M, Id);
10997   return Builder.CreateCall(Func, {});
10998 }
10999 
11000 // The mappings for emitLeading/TrailingFence is taken from
11001 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11002 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
11003                                                  Instruction *Inst,
11004                                                  AtomicOrdering Ord) const {
11005   if (Ord == AtomicOrdering::SequentiallyConsistent)
11006     return callIntrinsic(Builder, Intrinsic::ppc_sync);
11007   if (isReleaseOrStronger(Ord))
11008     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11009   return nullptr;
11010 }
11011 
11012 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
11013                                                   Instruction *Inst,
11014                                                   AtomicOrdering Ord) const {
11015   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11016     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11017     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11018     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11019     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11020       return Builder.CreateCall(
11021           Intrinsic::getDeclaration(
11022               Builder.GetInsertBlock()->getParent()->getParent(),
11023               Intrinsic::ppc_cfence, {Inst->getType()}),
11024           {Inst});
11025     // FIXME: Can use isync for rmw operation.
11026     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11027   }
11028   return nullptr;
11029 }
11030 
11031 MachineBasicBlock *
11032 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
11033                                     unsigned AtomicSize,
11034                                     unsigned BinOpcode,
11035                                     unsigned CmpOpcode,
11036                                     unsigned CmpPred) const {
11037   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11038   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11039 
11040   auto LoadMnemonic = PPC::LDARX;
11041   auto StoreMnemonic = PPC::STDCX;
11042   switch (AtomicSize) {
11043   default:
11044     llvm_unreachable("Unexpected size of atomic entity");
11045   case 1:
11046     LoadMnemonic = PPC::LBARX;
11047     StoreMnemonic = PPC::STBCX;
11048     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11049     break;
11050   case 2:
11051     LoadMnemonic = PPC::LHARX;
11052     StoreMnemonic = PPC::STHCX;
11053     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11054     break;
11055   case 4:
11056     LoadMnemonic = PPC::LWARX;
11057     StoreMnemonic = PPC::STWCX;
11058     break;
11059   case 8:
11060     LoadMnemonic = PPC::LDARX;
11061     StoreMnemonic = PPC::STDCX;
11062     break;
11063   }
11064 
11065   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11066   MachineFunction *F = BB->getParent();
11067   MachineFunction::iterator It = ++BB->getIterator();
11068 
11069   Register dest = MI.getOperand(0).getReg();
11070   Register ptrA = MI.getOperand(1).getReg();
11071   Register ptrB = MI.getOperand(2).getReg();
11072   Register incr = MI.getOperand(3).getReg();
11073   DebugLoc dl = MI.getDebugLoc();
11074 
11075   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11076   MachineBasicBlock *loop2MBB =
11077     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11078   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11079   F->insert(It, loopMBB);
11080   if (CmpOpcode)
11081     F->insert(It, loop2MBB);
11082   F->insert(It, exitMBB);
11083   exitMBB->splice(exitMBB->begin(), BB,
11084                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11085   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11086 
11087   MachineRegisterInfo &RegInfo = F->getRegInfo();
11088   Register TmpReg = (!BinOpcode) ? incr :
11089     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11090                                            : &PPC::GPRCRegClass);
11091 
11092   //  thisMBB:
11093   //   ...
11094   //   fallthrough --> loopMBB
11095   BB->addSuccessor(loopMBB);
11096 
11097   //  loopMBB:
11098   //   l[wd]arx dest, ptr
11099   //   add r0, dest, incr
11100   //   st[wd]cx. r0, ptr
11101   //   bne- loopMBB
11102   //   fallthrough --> exitMBB
11103 
11104   // For max/min...
11105   //  loopMBB:
11106   //   l[wd]arx dest, ptr
11107   //   cmpl?[wd] incr, dest
11108   //   bgt exitMBB
11109   //  loop2MBB:
11110   //   st[wd]cx. dest, ptr
11111   //   bne- loopMBB
11112   //   fallthrough --> exitMBB
11113 
11114   BB = loopMBB;
11115   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11116     .addReg(ptrA).addReg(ptrB);
11117   if (BinOpcode)
11118     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11119   if (CmpOpcode) {
11120     // Signed comparisons of byte or halfword values must be sign-extended.
11121     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11122       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11123       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11124               ExtReg).addReg(dest);
11125       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11126         .addReg(incr).addReg(ExtReg);
11127     } else
11128       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11129         .addReg(incr).addReg(dest);
11130 
11131     BuildMI(BB, dl, TII->get(PPC::BCC))
11132       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11133     BB->addSuccessor(loop2MBB);
11134     BB->addSuccessor(exitMBB);
11135     BB = loop2MBB;
11136   }
11137   BuildMI(BB, dl, TII->get(StoreMnemonic))
11138     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11139   BuildMI(BB, dl, TII->get(PPC::BCC))
11140     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11141   BB->addSuccessor(loopMBB);
11142   BB->addSuccessor(exitMBB);
11143 
11144   //  exitMBB:
11145   //   ...
11146   BB = exitMBB;
11147   return BB;
11148 }
11149 
11150 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
11151     MachineInstr &MI, MachineBasicBlock *BB,
11152     bool is8bit, // operation
11153     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11154   // If we support part-word atomic mnemonics, just use them
11155   if (Subtarget.hasPartwordAtomics())
11156     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11157                             CmpPred);
11158 
11159   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11160   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11161   // In 64 bit mode we have to use 64 bits for addresses, even though the
11162   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
11163   // registers without caring whether they're 32 or 64, but here we're
11164   // doing actual arithmetic on the addresses.
11165   bool is64bit = Subtarget.isPPC64();
11166   bool isLittleEndian = Subtarget.isLittleEndian();
11167   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11168 
11169   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11170   MachineFunction *F = BB->getParent();
11171   MachineFunction::iterator It = ++BB->getIterator();
11172 
11173   Register dest = MI.getOperand(0).getReg();
11174   Register ptrA = MI.getOperand(1).getReg();
11175   Register ptrB = MI.getOperand(2).getReg();
11176   Register incr = MI.getOperand(3).getReg();
11177   DebugLoc dl = MI.getDebugLoc();
11178 
11179   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11180   MachineBasicBlock *loop2MBB =
11181       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11182   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11183   F->insert(It, loopMBB);
11184   if (CmpOpcode)
11185     F->insert(It, loop2MBB);
11186   F->insert(It, exitMBB);
11187   exitMBB->splice(exitMBB->begin(), BB,
11188                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11189   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11190 
11191   MachineRegisterInfo &RegInfo = F->getRegInfo();
11192   const TargetRegisterClass *RC =
11193       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11194   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11195 
11196   Register PtrReg = RegInfo.createVirtualRegister(RC);
11197   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11198   Register ShiftReg =
11199       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11200   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11201   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11202   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11203   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11204   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11205   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11206   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11207   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11208   Register Ptr1Reg;
11209   Register TmpReg =
11210       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11211 
11212   //  thisMBB:
11213   //   ...
11214   //   fallthrough --> loopMBB
11215   BB->addSuccessor(loopMBB);
11216 
11217   // The 4-byte load must be aligned, while a char or short may be
11218   // anywhere in the word.  Hence all this nasty bookkeeping code.
11219   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
11220   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11221   //   xori shift, shift1, 24 [16]
11222   //   rlwinm ptr, ptr1, 0, 0, 29
11223   //   slw incr2, incr, shift
11224   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11225   //   slw mask, mask2, shift
11226   //  loopMBB:
11227   //   lwarx tmpDest, ptr
11228   //   add tmp, tmpDest, incr2
11229   //   andc tmp2, tmpDest, mask
11230   //   and tmp3, tmp, mask
11231   //   or tmp4, tmp3, tmp2
11232   //   stwcx. tmp4, ptr
11233   //   bne- loopMBB
11234   //   fallthrough --> exitMBB
11235   //   srw dest, tmpDest, shift
11236   if (ptrA != ZeroReg) {
11237     Ptr1Reg = RegInfo.createVirtualRegister(RC);
11238     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11239         .addReg(ptrA)
11240         .addReg(ptrB);
11241   } else {
11242     Ptr1Reg = ptrB;
11243   }
11244   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11245   // mode.
11246   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11247       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11248       .addImm(3)
11249       .addImm(27)
11250       .addImm(is8bit ? 28 : 27);
11251   if (!isLittleEndian)
11252     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11253         .addReg(Shift1Reg)
11254         .addImm(is8bit ? 24 : 16);
11255   if (is64bit)
11256     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11257         .addReg(Ptr1Reg)
11258         .addImm(0)
11259         .addImm(61);
11260   else
11261     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11262         .addReg(Ptr1Reg)
11263         .addImm(0)
11264         .addImm(0)
11265         .addImm(29);
11266   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11267   if (is8bit)
11268     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11269   else {
11270     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11271     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11272         .addReg(Mask3Reg)
11273         .addImm(65535);
11274   }
11275   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11276       .addReg(Mask2Reg)
11277       .addReg(ShiftReg);
11278 
11279   BB = loopMBB;
11280   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11281       .addReg(ZeroReg)
11282       .addReg(PtrReg);
11283   if (BinOpcode)
11284     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11285         .addReg(Incr2Reg)
11286         .addReg(TmpDestReg);
11287   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11288       .addReg(TmpDestReg)
11289       .addReg(MaskReg);
11290   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11291   if (CmpOpcode) {
11292     // For unsigned comparisons, we can directly compare the shifted values.
11293     // For signed comparisons we shift and sign extend.
11294     Register SReg = RegInfo.createVirtualRegister(GPRC);
11295     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11296         .addReg(TmpDestReg)
11297         .addReg(MaskReg);
11298     unsigned ValueReg = SReg;
11299     unsigned CmpReg = Incr2Reg;
11300     if (CmpOpcode == PPC::CMPW) {
11301       ValueReg = RegInfo.createVirtualRegister(GPRC);
11302       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11303           .addReg(SReg)
11304           .addReg(ShiftReg);
11305       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11306       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11307           .addReg(ValueReg);
11308       ValueReg = ValueSReg;
11309       CmpReg = incr;
11310     }
11311     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11312         .addReg(CmpReg)
11313         .addReg(ValueReg);
11314     BuildMI(BB, dl, TII->get(PPC::BCC))
11315         .addImm(CmpPred)
11316         .addReg(PPC::CR0)
11317         .addMBB(exitMBB);
11318     BB->addSuccessor(loop2MBB);
11319     BB->addSuccessor(exitMBB);
11320     BB = loop2MBB;
11321   }
11322   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11323   BuildMI(BB, dl, TII->get(PPC::STWCX))
11324       .addReg(Tmp4Reg)
11325       .addReg(ZeroReg)
11326       .addReg(PtrReg);
11327   BuildMI(BB, dl, TII->get(PPC::BCC))
11328       .addImm(PPC::PRED_NE)
11329       .addReg(PPC::CR0)
11330       .addMBB(loopMBB);
11331   BB->addSuccessor(loopMBB);
11332   BB->addSuccessor(exitMBB);
11333 
11334   //  exitMBB:
11335   //   ...
11336   BB = exitMBB;
11337   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11338       .addReg(TmpDestReg)
11339       .addReg(ShiftReg);
11340   return BB;
11341 }
11342 
11343 llvm::MachineBasicBlock *
11344 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
11345                                     MachineBasicBlock *MBB) const {
11346   DebugLoc DL = MI.getDebugLoc();
11347   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11348   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11349 
11350   MachineFunction *MF = MBB->getParent();
11351   MachineRegisterInfo &MRI = MF->getRegInfo();
11352 
11353   const BasicBlock *BB = MBB->getBasicBlock();
11354   MachineFunction::iterator I = ++MBB->getIterator();
11355 
11356   Register DstReg = MI.getOperand(0).getReg();
11357   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11358   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11359   Register mainDstReg = MRI.createVirtualRegister(RC);
11360   Register restoreDstReg = MRI.createVirtualRegister(RC);
11361 
11362   MVT PVT = getPointerTy(MF->getDataLayout());
11363   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11364          "Invalid Pointer Size!");
11365   // For v = setjmp(buf), we generate
11366   //
11367   // thisMBB:
11368   //  SjLjSetup mainMBB
11369   //  bl mainMBB
11370   //  v_restore = 1
11371   //  b sinkMBB
11372   //
11373   // mainMBB:
11374   //  buf[LabelOffset] = LR
11375   //  v_main = 0
11376   //
11377   // sinkMBB:
11378   //  v = phi(main, restore)
11379   //
11380 
11381   MachineBasicBlock *thisMBB = MBB;
11382   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
11383   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
11384   MF->insert(I, mainMBB);
11385   MF->insert(I, sinkMBB);
11386 
11387   MachineInstrBuilder MIB;
11388 
11389   // Transfer the remainder of BB and its successor edges to sinkMBB.
11390   sinkMBB->splice(sinkMBB->begin(), MBB,
11391                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11392   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
11393 
11394   // Note that the structure of the jmp_buf used here is not compatible
11395   // with that used by libc, and is not designed to be. Specifically, it
11396   // stores only those 'reserved' registers that LLVM does not otherwise
11397   // understand how to spill. Also, by convention, by the time this
11398   // intrinsic is called, Clang has already stored the frame address in the
11399   // first slot of the buffer and stack address in the third. Following the
11400   // X86 target code, we'll store the jump address in the second slot. We also
11401   // need to save the TOC pointer (R2) to handle jumps between shared
11402   // libraries, and that will be stored in the fourth slot. The thread
11403   // identifier (R13) is not affected.
11404 
11405   // thisMBB:
11406   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11407   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11408   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11409 
11410   // Prepare IP either in reg.
11411   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11412   Register LabelReg = MRI.createVirtualRegister(PtrRC);
11413   Register BufReg = MI.getOperand(1).getReg();
11414 
11415   if (Subtarget.is64BitELFABI()) {
11416     setUsesTOCBasePtr(*MBB->getParent());
11417     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11418               .addReg(PPC::X2)
11419               .addImm(TOCOffset)
11420               .addReg(BufReg)
11421               .cloneMemRefs(MI);
11422   }
11423 
11424   // Naked functions never have a base pointer, and so we use r1. For all
11425   // other functions, this decision must be delayed until during PEI.
11426   unsigned BaseReg;
11427   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11428     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11429   else
11430     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11431 
11432   MIB = BuildMI(*thisMBB, MI, DL,
11433                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11434             .addReg(BaseReg)
11435             .addImm(BPOffset)
11436             .addReg(BufReg)
11437             .cloneMemRefs(MI);
11438 
11439   // Setup
11440   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11441   MIB.addRegMask(TRI->getNoPreservedMask());
11442 
11443   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11444 
11445   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11446           .addMBB(mainMBB);
11447   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11448 
11449   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11450   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11451 
11452   // mainMBB:
11453   //  mainDstReg = 0
11454   MIB =
11455       BuildMI(mainMBB, DL,
11456               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11457 
11458   // Store IP
11459   if (Subtarget.isPPC64()) {
11460     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11461             .addReg(LabelReg)
11462             .addImm(LabelOffset)
11463             .addReg(BufReg);
11464   } else {
11465     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11466             .addReg(LabelReg)
11467             .addImm(LabelOffset)
11468             .addReg(BufReg);
11469   }
11470   MIB.cloneMemRefs(MI);
11471 
11472   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11473   mainMBB->addSuccessor(sinkMBB);
11474 
11475   // sinkMBB:
11476   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11477           TII->get(PPC::PHI), DstReg)
11478     .addReg(mainDstReg).addMBB(mainMBB)
11479     .addReg(restoreDstReg).addMBB(thisMBB);
11480 
11481   MI.eraseFromParent();
11482   return sinkMBB;
11483 }
11484 
11485 MachineBasicBlock *
11486 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11487                                      MachineBasicBlock *MBB) const {
11488   DebugLoc DL = MI.getDebugLoc();
11489   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11490 
11491   MachineFunction *MF = MBB->getParent();
11492   MachineRegisterInfo &MRI = MF->getRegInfo();
11493 
11494   MVT PVT = getPointerTy(MF->getDataLayout());
11495   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11496          "Invalid Pointer Size!");
11497 
11498   const TargetRegisterClass *RC =
11499     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11500   Register Tmp = MRI.createVirtualRegister(RC);
11501   // Since FP is only updated here but NOT referenced, it's treated as GPR.
11502   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11503   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11504   unsigned BP =
11505       (PVT == MVT::i64)
11506           ? PPC::X30
11507           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11508                                                               : PPC::R30);
11509 
11510   MachineInstrBuilder MIB;
11511 
11512   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11513   const int64_t SPOffset    = 2 * PVT.getStoreSize();
11514   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11515   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11516 
11517   Register BufReg = MI.getOperand(0).getReg();
11518 
11519   // Reload FP (the jumped-to function may not have had a
11520   // frame pointer, and if so, then its r31 will be restored
11521   // as necessary).
11522   if (PVT == MVT::i64) {
11523     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11524             .addImm(0)
11525             .addReg(BufReg);
11526   } else {
11527     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11528             .addImm(0)
11529             .addReg(BufReg);
11530   }
11531   MIB.cloneMemRefs(MI);
11532 
11533   // Reload IP
11534   if (PVT == MVT::i64) {
11535     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11536             .addImm(LabelOffset)
11537             .addReg(BufReg);
11538   } else {
11539     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11540             .addImm(LabelOffset)
11541             .addReg(BufReg);
11542   }
11543   MIB.cloneMemRefs(MI);
11544 
11545   // Reload SP
11546   if (PVT == MVT::i64) {
11547     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11548             .addImm(SPOffset)
11549             .addReg(BufReg);
11550   } else {
11551     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11552             .addImm(SPOffset)
11553             .addReg(BufReg);
11554   }
11555   MIB.cloneMemRefs(MI);
11556 
11557   // Reload BP
11558   if (PVT == MVT::i64) {
11559     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11560             .addImm(BPOffset)
11561             .addReg(BufReg);
11562   } else {
11563     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11564             .addImm(BPOffset)
11565             .addReg(BufReg);
11566   }
11567   MIB.cloneMemRefs(MI);
11568 
11569   // Reload TOC
11570   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11571     setUsesTOCBasePtr(*MBB->getParent());
11572     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11573               .addImm(TOCOffset)
11574               .addReg(BufReg)
11575               .cloneMemRefs(MI);
11576   }
11577 
11578   // Jump
11579   BuildMI(*MBB, MI, DL,
11580           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11581   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11582 
11583   MI.eraseFromParent();
11584   return MBB;
11585 }
11586 
11587 MachineBasicBlock *
11588 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11589                                                MachineBasicBlock *BB) const {
11590   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11591       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11592     if (Subtarget.is64BitELFABI() &&
11593         MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11594         !Subtarget.isUsingPCRelativeCalls()) {
11595       // Call lowering should have added an r2 operand to indicate a dependence
11596       // on the TOC base pointer value. It can't however, because there is no
11597       // way to mark the dependence as implicit there, and so the stackmap code
11598       // will confuse it with a regular operand. Instead, add the dependence
11599       // here.
11600       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11601     }
11602 
11603     return emitPatchPoint(MI, BB);
11604   }
11605 
11606   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11607       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11608     return emitEHSjLjSetJmp(MI, BB);
11609   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11610              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11611     return emitEHSjLjLongJmp(MI, BB);
11612   }
11613 
11614   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11615 
11616   // To "insert" these instructions we actually have to insert their
11617   // control-flow patterns.
11618   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11619   MachineFunction::iterator It = ++BB->getIterator();
11620 
11621   MachineFunction *F = BB->getParent();
11622 
11623   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11624       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11625       MI.getOpcode() == PPC::SELECT_I8) {
11626     SmallVector<MachineOperand, 2> Cond;
11627     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11628         MI.getOpcode() == PPC::SELECT_CC_I8)
11629       Cond.push_back(MI.getOperand(4));
11630     else
11631       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
11632     Cond.push_back(MI.getOperand(1));
11633 
11634     DebugLoc dl = MI.getDebugLoc();
11635     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11636                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11637   } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11638              MI.getOpcode() == PPC::SELECT_CC_F8 ||
11639              MI.getOpcode() == PPC::SELECT_CC_F16 ||
11640              MI.getOpcode() == PPC::SELECT_CC_QFRC ||
11641              MI.getOpcode() == PPC::SELECT_CC_QSRC ||
11642              MI.getOpcode() == PPC::SELECT_CC_QBRC ||
11643              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11644              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11645              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11646              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11647              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11648              MI.getOpcode() == PPC::SELECT_CC_SPE ||
11649              MI.getOpcode() == PPC::SELECT_F4 ||
11650              MI.getOpcode() == PPC::SELECT_F8 ||
11651              MI.getOpcode() == PPC::SELECT_F16 ||
11652              MI.getOpcode() == PPC::SELECT_QFRC ||
11653              MI.getOpcode() == PPC::SELECT_QSRC ||
11654              MI.getOpcode() == PPC::SELECT_QBRC ||
11655              MI.getOpcode() == PPC::SELECT_SPE ||
11656              MI.getOpcode() == PPC::SELECT_SPE4 ||
11657              MI.getOpcode() == PPC::SELECT_VRRC ||
11658              MI.getOpcode() == PPC::SELECT_VSFRC ||
11659              MI.getOpcode() == PPC::SELECT_VSSRC ||
11660              MI.getOpcode() == PPC::SELECT_VSRC) {
11661     // The incoming instruction knows the destination vreg to set, the
11662     // condition code register to branch on, the true/false values to
11663     // select between, and a branch opcode to use.
11664 
11665     //  thisMBB:
11666     //  ...
11667     //   TrueVal = ...
11668     //   cmpTY ccX, r1, r2
11669     //   bCC copy1MBB
11670     //   fallthrough --> copy0MBB
11671     MachineBasicBlock *thisMBB = BB;
11672     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11673     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11674     DebugLoc dl = MI.getDebugLoc();
11675     F->insert(It, copy0MBB);
11676     F->insert(It, sinkMBB);
11677 
11678     // Transfer the remainder of BB and its successor edges to sinkMBB.
11679     sinkMBB->splice(sinkMBB->begin(), BB,
11680                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11681     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11682 
11683     // Next, add the true and fallthrough blocks as its successors.
11684     BB->addSuccessor(copy0MBB);
11685     BB->addSuccessor(sinkMBB);
11686 
11687     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11688         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11689         MI.getOpcode() == PPC::SELECT_F16 ||
11690         MI.getOpcode() == PPC::SELECT_SPE4 ||
11691         MI.getOpcode() == PPC::SELECT_SPE ||
11692         MI.getOpcode() == PPC::SELECT_QFRC ||
11693         MI.getOpcode() == PPC::SELECT_QSRC ||
11694         MI.getOpcode() == PPC::SELECT_QBRC ||
11695         MI.getOpcode() == PPC::SELECT_VRRC ||
11696         MI.getOpcode() == PPC::SELECT_VSFRC ||
11697         MI.getOpcode() == PPC::SELECT_VSSRC ||
11698         MI.getOpcode() == PPC::SELECT_VSRC) {
11699       BuildMI(BB, dl, TII->get(PPC::BC))
11700           .addReg(MI.getOperand(1).getReg())
11701           .addMBB(sinkMBB);
11702     } else {
11703       unsigned SelectPred = MI.getOperand(4).getImm();
11704       BuildMI(BB, dl, TII->get(PPC::BCC))
11705           .addImm(SelectPred)
11706           .addReg(MI.getOperand(1).getReg())
11707           .addMBB(sinkMBB);
11708     }
11709 
11710     //  copy0MBB:
11711     //   %FalseValue = ...
11712     //   # fallthrough to sinkMBB
11713     BB = copy0MBB;
11714 
11715     // Update machine-CFG edges
11716     BB->addSuccessor(sinkMBB);
11717 
11718     //  sinkMBB:
11719     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11720     //  ...
11721     BB = sinkMBB;
11722     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11723         .addReg(MI.getOperand(3).getReg())
11724         .addMBB(copy0MBB)
11725         .addReg(MI.getOperand(2).getReg())
11726         .addMBB(thisMBB);
11727   } else if (MI.getOpcode() == PPC::ReadTB) {
11728     // To read the 64-bit time-base register on a 32-bit target, we read the
11729     // two halves. Should the counter have wrapped while it was being read, we
11730     // need to try again.
11731     // ...
11732     // readLoop:
11733     // mfspr Rx,TBU # load from TBU
11734     // mfspr Ry,TB  # load from TB
11735     // mfspr Rz,TBU # load from TBU
11736     // cmpw crX,Rx,Rz # check if 'old'='new'
11737     // bne readLoop   # branch if they're not equal
11738     // ...
11739 
11740     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
11741     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11742     DebugLoc dl = MI.getDebugLoc();
11743     F->insert(It, readMBB);
11744     F->insert(It, sinkMBB);
11745 
11746     // Transfer the remainder of BB and its successor edges to sinkMBB.
11747     sinkMBB->splice(sinkMBB->begin(), BB,
11748                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11749     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11750 
11751     BB->addSuccessor(readMBB);
11752     BB = readMBB;
11753 
11754     MachineRegisterInfo &RegInfo = F->getRegInfo();
11755     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11756     Register LoReg = MI.getOperand(0).getReg();
11757     Register HiReg = MI.getOperand(1).getReg();
11758 
11759     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
11760     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
11761     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
11762 
11763     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11764 
11765     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
11766         .addReg(HiReg)
11767         .addReg(ReadAgainReg);
11768     BuildMI(BB, dl, TII->get(PPC::BCC))
11769         .addImm(PPC::PRED_NE)
11770         .addReg(CmpReg)
11771         .addMBB(readMBB);
11772 
11773     BB->addSuccessor(readMBB);
11774     BB->addSuccessor(sinkMBB);
11775   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
11776     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
11777   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
11778     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
11779   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
11780     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
11781   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
11782     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
11783 
11784   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
11785     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
11786   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
11787     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
11788   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
11789     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
11790   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
11791     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
11792 
11793   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
11794     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
11795   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
11796     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
11797   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
11798     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
11799   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
11800     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
11801 
11802   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
11803     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
11804   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
11805     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
11806   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
11807     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
11808   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
11809     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
11810 
11811   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
11812     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
11813   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
11814     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
11815   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
11816     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
11817   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
11818     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
11819 
11820   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
11821     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
11822   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
11823     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
11824   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
11825     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
11826   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
11827     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
11828 
11829   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
11830     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
11831   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
11832     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
11833   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
11834     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
11835   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
11836     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
11837 
11838   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
11839     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
11840   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
11841     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
11842   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
11843     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
11844   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
11845     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
11846 
11847   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
11848     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
11849   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
11850     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
11851   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
11852     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
11853   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
11854     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
11855 
11856   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
11857     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
11858   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
11859     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
11860   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
11861     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
11862   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
11863     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
11864 
11865   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
11866     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
11867   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
11868     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
11869   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
11870     BB = EmitAtomicBinary(MI, BB, 4, 0);
11871   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
11872     BB = EmitAtomicBinary(MI, BB, 8, 0);
11873   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
11874            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
11875            (Subtarget.hasPartwordAtomics() &&
11876             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
11877            (Subtarget.hasPartwordAtomics() &&
11878             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
11879     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
11880 
11881     auto LoadMnemonic = PPC::LDARX;
11882     auto StoreMnemonic = PPC::STDCX;
11883     switch (MI.getOpcode()) {
11884     default:
11885       llvm_unreachable("Compare and swap of unknown size");
11886     case PPC::ATOMIC_CMP_SWAP_I8:
11887       LoadMnemonic = PPC::LBARX;
11888       StoreMnemonic = PPC::STBCX;
11889       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11890       break;
11891     case PPC::ATOMIC_CMP_SWAP_I16:
11892       LoadMnemonic = PPC::LHARX;
11893       StoreMnemonic = PPC::STHCX;
11894       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11895       break;
11896     case PPC::ATOMIC_CMP_SWAP_I32:
11897       LoadMnemonic = PPC::LWARX;
11898       StoreMnemonic = PPC::STWCX;
11899       break;
11900     case PPC::ATOMIC_CMP_SWAP_I64:
11901       LoadMnemonic = PPC::LDARX;
11902       StoreMnemonic = PPC::STDCX;
11903       break;
11904     }
11905     Register dest = MI.getOperand(0).getReg();
11906     Register ptrA = MI.getOperand(1).getReg();
11907     Register ptrB = MI.getOperand(2).getReg();
11908     Register oldval = MI.getOperand(3).getReg();
11909     Register newval = MI.getOperand(4).getReg();
11910     DebugLoc dl = MI.getDebugLoc();
11911 
11912     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11913     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11914     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11915     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11916     F->insert(It, loop1MBB);
11917     F->insert(It, loop2MBB);
11918     F->insert(It, midMBB);
11919     F->insert(It, exitMBB);
11920     exitMBB->splice(exitMBB->begin(), BB,
11921                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11922     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11923 
11924     //  thisMBB:
11925     //   ...
11926     //   fallthrough --> loopMBB
11927     BB->addSuccessor(loop1MBB);
11928 
11929     // loop1MBB:
11930     //   l[bhwd]arx dest, ptr
11931     //   cmp[wd] dest, oldval
11932     //   bne- midMBB
11933     // loop2MBB:
11934     //   st[bhwd]cx. newval, ptr
11935     //   bne- loopMBB
11936     //   b exitBB
11937     // midMBB:
11938     //   st[bhwd]cx. dest, ptr
11939     // exitBB:
11940     BB = loop1MBB;
11941     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11942     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
11943         .addReg(oldval)
11944         .addReg(dest);
11945     BuildMI(BB, dl, TII->get(PPC::BCC))
11946         .addImm(PPC::PRED_NE)
11947         .addReg(PPC::CR0)
11948         .addMBB(midMBB);
11949     BB->addSuccessor(loop2MBB);
11950     BB->addSuccessor(midMBB);
11951 
11952     BB = loop2MBB;
11953     BuildMI(BB, dl, TII->get(StoreMnemonic))
11954         .addReg(newval)
11955         .addReg(ptrA)
11956         .addReg(ptrB);
11957     BuildMI(BB, dl, TII->get(PPC::BCC))
11958         .addImm(PPC::PRED_NE)
11959         .addReg(PPC::CR0)
11960         .addMBB(loop1MBB);
11961     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11962     BB->addSuccessor(loop1MBB);
11963     BB->addSuccessor(exitMBB);
11964 
11965     BB = midMBB;
11966     BuildMI(BB, dl, TII->get(StoreMnemonic))
11967         .addReg(dest)
11968         .addReg(ptrA)
11969         .addReg(ptrB);
11970     BB->addSuccessor(exitMBB);
11971 
11972     //  exitMBB:
11973     //   ...
11974     BB = exitMBB;
11975   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11976              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
11977     // We must use 64-bit registers for addresses when targeting 64-bit,
11978     // since we're actually doing arithmetic on them.  Other registers
11979     // can be 32-bit.
11980     bool is64bit = Subtarget.isPPC64();
11981     bool isLittleEndian = Subtarget.isLittleEndian();
11982     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11983 
11984     Register dest = MI.getOperand(0).getReg();
11985     Register ptrA = MI.getOperand(1).getReg();
11986     Register ptrB = MI.getOperand(2).getReg();
11987     Register oldval = MI.getOperand(3).getReg();
11988     Register newval = MI.getOperand(4).getReg();
11989     DebugLoc dl = MI.getDebugLoc();
11990 
11991     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11992     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11993     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11994     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11995     F->insert(It, loop1MBB);
11996     F->insert(It, loop2MBB);
11997     F->insert(It, midMBB);
11998     F->insert(It, exitMBB);
11999     exitMBB->splice(exitMBB->begin(), BB,
12000                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12001     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12002 
12003     MachineRegisterInfo &RegInfo = F->getRegInfo();
12004     const TargetRegisterClass *RC =
12005         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12006     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12007 
12008     Register PtrReg = RegInfo.createVirtualRegister(RC);
12009     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12010     Register ShiftReg =
12011         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12012     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12013     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12014     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12015     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12016     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12017     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12018     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12019     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12020     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12021     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12022     Register Ptr1Reg;
12023     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12024     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12025     //  thisMBB:
12026     //   ...
12027     //   fallthrough --> loopMBB
12028     BB->addSuccessor(loop1MBB);
12029 
12030     // The 4-byte load must be aligned, while a char or short may be
12031     // anywhere in the word.  Hence all this nasty bookkeeping code.
12032     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
12033     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12034     //   xori shift, shift1, 24 [16]
12035     //   rlwinm ptr, ptr1, 0, 0, 29
12036     //   slw newval2, newval, shift
12037     //   slw oldval2, oldval,shift
12038     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12039     //   slw mask, mask2, shift
12040     //   and newval3, newval2, mask
12041     //   and oldval3, oldval2, mask
12042     // loop1MBB:
12043     //   lwarx tmpDest, ptr
12044     //   and tmp, tmpDest, mask
12045     //   cmpw tmp, oldval3
12046     //   bne- midMBB
12047     // loop2MBB:
12048     //   andc tmp2, tmpDest, mask
12049     //   or tmp4, tmp2, newval3
12050     //   stwcx. tmp4, ptr
12051     //   bne- loop1MBB
12052     //   b exitBB
12053     // midMBB:
12054     //   stwcx. tmpDest, ptr
12055     // exitBB:
12056     //   srw dest, tmpDest, shift
12057     if (ptrA != ZeroReg) {
12058       Ptr1Reg = RegInfo.createVirtualRegister(RC);
12059       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12060           .addReg(ptrA)
12061           .addReg(ptrB);
12062     } else {
12063       Ptr1Reg = ptrB;
12064     }
12065 
12066     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12067     // mode.
12068     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12069         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12070         .addImm(3)
12071         .addImm(27)
12072         .addImm(is8bit ? 28 : 27);
12073     if (!isLittleEndian)
12074       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12075           .addReg(Shift1Reg)
12076           .addImm(is8bit ? 24 : 16);
12077     if (is64bit)
12078       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12079           .addReg(Ptr1Reg)
12080           .addImm(0)
12081           .addImm(61);
12082     else
12083       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12084           .addReg(Ptr1Reg)
12085           .addImm(0)
12086           .addImm(0)
12087           .addImm(29);
12088     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12089         .addReg(newval)
12090         .addReg(ShiftReg);
12091     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12092         .addReg(oldval)
12093         .addReg(ShiftReg);
12094     if (is8bit)
12095       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12096     else {
12097       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12098       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12099           .addReg(Mask3Reg)
12100           .addImm(65535);
12101     }
12102     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12103         .addReg(Mask2Reg)
12104         .addReg(ShiftReg);
12105     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12106         .addReg(NewVal2Reg)
12107         .addReg(MaskReg);
12108     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12109         .addReg(OldVal2Reg)
12110         .addReg(MaskReg);
12111 
12112     BB = loop1MBB;
12113     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12114         .addReg(ZeroReg)
12115         .addReg(PtrReg);
12116     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12117         .addReg(TmpDestReg)
12118         .addReg(MaskReg);
12119     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12120         .addReg(TmpReg)
12121         .addReg(OldVal3Reg);
12122     BuildMI(BB, dl, TII->get(PPC::BCC))
12123         .addImm(PPC::PRED_NE)
12124         .addReg(PPC::CR0)
12125         .addMBB(midMBB);
12126     BB->addSuccessor(loop2MBB);
12127     BB->addSuccessor(midMBB);
12128 
12129     BB = loop2MBB;
12130     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12131         .addReg(TmpDestReg)
12132         .addReg(MaskReg);
12133     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12134         .addReg(Tmp2Reg)
12135         .addReg(NewVal3Reg);
12136     BuildMI(BB, dl, TII->get(PPC::STWCX))
12137         .addReg(Tmp4Reg)
12138         .addReg(ZeroReg)
12139         .addReg(PtrReg);
12140     BuildMI(BB, dl, TII->get(PPC::BCC))
12141         .addImm(PPC::PRED_NE)
12142         .addReg(PPC::CR0)
12143         .addMBB(loop1MBB);
12144     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12145     BB->addSuccessor(loop1MBB);
12146     BB->addSuccessor(exitMBB);
12147 
12148     BB = midMBB;
12149     BuildMI(BB, dl, TII->get(PPC::STWCX))
12150         .addReg(TmpDestReg)
12151         .addReg(ZeroReg)
12152         .addReg(PtrReg);
12153     BB->addSuccessor(exitMBB);
12154 
12155     //  exitMBB:
12156     //   ...
12157     BB = exitMBB;
12158     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12159         .addReg(TmpReg)
12160         .addReg(ShiftReg);
12161   } else if (MI.getOpcode() == PPC::FADDrtz) {
12162     // This pseudo performs an FADD with rounding mode temporarily forced
12163     // to round-to-zero.  We emit this via custom inserter since the FPSCR
12164     // is not modeled at the SelectionDAG level.
12165     Register Dest = MI.getOperand(0).getReg();
12166     Register Src1 = MI.getOperand(1).getReg();
12167     Register Src2 = MI.getOperand(2).getReg();
12168     DebugLoc dl = MI.getDebugLoc();
12169 
12170     MachineRegisterInfo &RegInfo = F->getRegInfo();
12171     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12172 
12173     // Save FPSCR value.
12174     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12175 
12176     // Set rounding mode to round-to-zero.
12177     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
12178     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
12179 
12180     // Perform addition.
12181     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
12182 
12183     // Restore FPSCR value.
12184     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12185   } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12186              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12187              MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12188              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12189     unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12190                        MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12191                           ? PPC::ANDI8_rec
12192                           : PPC::ANDI_rec;
12193     bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12194                  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12195 
12196     MachineRegisterInfo &RegInfo = F->getRegInfo();
12197     Register Dest = RegInfo.createVirtualRegister(
12198         Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12199 
12200     DebugLoc Dl = MI.getDebugLoc();
12201     BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12202         .addReg(MI.getOperand(1).getReg())
12203         .addImm(1);
12204     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12205             MI.getOperand(0).getReg())
12206         .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12207   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12208     DebugLoc Dl = MI.getDebugLoc();
12209     MachineRegisterInfo &RegInfo = F->getRegInfo();
12210     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12211     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12212     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12213             MI.getOperand(0).getReg())
12214         .addReg(CRReg);
12215   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12216     DebugLoc Dl = MI.getDebugLoc();
12217     unsigned Imm = MI.getOperand(1).getImm();
12218     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12219     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12220             MI.getOperand(0).getReg())
12221         .addReg(PPC::CR0EQ);
12222   } else if (MI.getOpcode() == PPC::SETRNDi) {
12223     DebugLoc dl = MI.getDebugLoc();
12224     Register OldFPSCRReg = MI.getOperand(0).getReg();
12225 
12226     // Save FPSCR value.
12227     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12228 
12229     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12230     // the following settings:
12231     //   00 Round to nearest
12232     //   01 Round to 0
12233     //   10 Round to +inf
12234     //   11 Round to -inf
12235 
12236     // When the operand is immediate, using the two least significant bits of
12237     // the immediate to set the bits 62:63 of FPSCR.
12238     unsigned Mode = MI.getOperand(1).getImm();
12239     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12240       .addImm(31);
12241 
12242     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12243       .addImm(30);
12244   } else if (MI.getOpcode() == PPC::SETRND) {
12245     DebugLoc dl = MI.getDebugLoc();
12246 
12247     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12248     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12249     // If the target doesn't have DirectMove, we should use stack to do the
12250     // conversion, because the target doesn't have the instructions like mtvsrd
12251     // or mfvsrd to do this conversion directly.
12252     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12253       if (Subtarget.hasDirectMove()) {
12254         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12255           .addReg(SrcReg);
12256       } else {
12257         // Use stack to do the register copy.
12258         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12259         MachineRegisterInfo &RegInfo = F->getRegInfo();
12260         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12261         if (RC == &PPC::F8RCRegClass) {
12262           // Copy register from F8RCRegClass to G8RCRegclass.
12263           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12264                  "Unsupported RegClass.");
12265 
12266           StoreOp = PPC::STFD;
12267           LoadOp = PPC::LD;
12268         } else {
12269           // Copy register from G8RCRegClass to F8RCRegclass.
12270           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12271                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12272                  "Unsupported RegClass.");
12273         }
12274 
12275         MachineFrameInfo &MFI = F->getFrameInfo();
12276         int FrameIdx = MFI.CreateStackObject(8, 8, false);
12277 
12278         MachineMemOperand *MMOStore = F->getMachineMemOperand(
12279             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12280             MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12281             MFI.getObjectAlign(FrameIdx));
12282 
12283         // Store the SrcReg into the stack.
12284         BuildMI(*BB, MI, dl, TII->get(StoreOp))
12285           .addReg(SrcReg)
12286           .addImm(0)
12287           .addFrameIndex(FrameIdx)
12288           .addMemOperand(MMOStore);
12289 
12290         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12291             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12292             MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12293             MFI.getObjectAlign(FrameIdx));
12294 
12295         // Load from the stack where SrcReg is stored, and save to DestReg,
12296         // so we have done the RegClass conversion from RegClass::SrcReg to
12297         // RegClass::DestReg.
12298         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12299           .addImm(0)
12300           .addFrameIndex(FrameIdx)
12301           .addMemOperand(MMOLoad);
12302       }
12303     };
12304 
12305     Register OldFPSCRReg = MI.getOperand(0).getReg();
12306 
12307     // Save FPSCR value.
12308     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12309 
12310     // When the operand is gprc register, use two least significant bits of the
12311     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12312     //
12313     // copy OldFPSCRTmpReg, OldFPSCRReg
12314     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12315     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12316     // copy NewFPSCRReg, NewFPSCRTmpReg
12317     // mtfsf 255, NewFPSCRReg
12318     MachineOperand SrcOp = MI.getOperand(1);
12319     MachineRegisterInfo &RegInfo = F->getRegInfo();
12320     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12321 
12322     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12323 
12324     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12325     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12326 
12327     // The first operand of INSERT_SUBREG should be a register which has
12328     // subregisters, we only care about its RegClass, so we should use an
12329     // IMPLICIT_DEF register.
12330     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12331     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12332       .addReg(ImDefReg)
12333       .add(SrcOp)
12334       .addImm(1);
12335 
12336     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12337     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12338       .addReg(OldFPSCRTmpReg)
12339       .addReg(ExtSrcReg)
12340       .addImm(0)
12341       .addImm(62);
12342 
12343     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12344     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12345 
12346     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12347     // bits of FPSCR.
12348     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12349       .addImm(255)
12350       .addReg(NewFPSCRReg)
12351       .addImm(0)
12352       .addImm(0);
12353   } else {
12354     llvm_unreachable("Unexpected instr type to insert");
12355   }
12356 
12357   MI.eraseFromParent(); // The pseudo instruction is gone now.
12358   return BB;
12359 }
12360 
12361 //===----------------------------------------------------------------------===//
12362 // Target Optimization Hooks
12363 //===----------------------------------------------------------------------===//
12364 
12365 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12366   // For the estimates, convergence is quadratic, so we essentially double the
12367   // number of digits correct after every iteration. For both FRE and FRSQRTE,
12368   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12369   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12370   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12371   if (VT.getScalarType() == MVT::f64)
12372     RefinementSteps++;
12373   return RefinementSteps;
12374 }
12375 
12376 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12377                                            int Enabled, int &RefinementSteps,
12378                                            bool &UseOneConstNR,
12379                                            bool Reciprocal) const {
12380   EVT VT = Operand.getValueType();
12381   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12382       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12383       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12384       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
12385       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
12386       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
12387     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12388       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12389 
12390     // The Newton-Raphson computation with a single constant does not provide
12391     // enough accuracy on some CPUs.
12392     UseOneConstNR = !Subtarget.needsTwoConstNR();
12393     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12394   }
12395   return SDValue();
12396 }
12397 
12398 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12399                                             int Enabled,
12400                                             int &RefinementSteps) const {
12401   EVT VT = Operand.getValueType();
12402   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12403       (VT == MVT::f64 && Subtarget.hasFRE()) ||
12404       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12405       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
12406       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
12407       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
12408     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12409       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12410     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12411   }
12412   return SDValue();
12413 }
12414 
12415 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12416   // Note: This functionality is used only when unsafe-fp-math is enabled, and
12417   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12418   // enabled for division), this functionality is redundant with the default
12419   // combiner logic (once the division -> reciprocal/multiply transformation
12420   // has taken place). As a result, this matters more for older cores than for
12421   // newer ones.
12422 
12423   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12424   // reciprocal if there are two or more FDIVs (for embedded cores with only
12425   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12426   switch (Subtarget.getCPUDirective()) {
12427   default:
12428     return 3;
12429   case PPC::DIR_440:
12430   case PPC::DIR_A2:
12431   case PPC::DIR_E500:
12432   case PPC::DIR_E500mc:
12433   case PPC::DIR_E5500:
12434     return 2;
12435   }
12436 }
12437 
12438 // isConsecutiveLSLoc needs to work even if all adds have not yet been
12439 // collapsed, and so we need to look through chains of them.
12440 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
12441                                      int64_t& Offset, SelectionDAG &DAG) {
12442   if (DAG.isBaseWithConstantOffset(Loc)) {
12443     Base = Loc.getOperand(0);
12444     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12445 
12446     // The base might itself be a base plus an offset, and if so, accumulate
12447     // that as well.
12448     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12449   }
12450 }
12451 
12452 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12453                             unsigned Bytes, int Dist,
12454                             SelectionDAG &DAG) {
12455   if (VT.getSizeInBits() / 8 != Bytes)
12456     return false;
12457 
12458   SDValue BaseLoc = Base->getBasePtr();
12459   if (Loc.getOpcode() == ISD::FrameIndex) {
12460     if (BaseLoc.getOpcode() != ISD::FrameIndex)
12461       return false;
12462     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12463     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
12464     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12465     int FS  = MFI.getObjectSize(FI);
12466     int BFS = MFI.getObjectSize(BFI);
12467     if (FS != BFS || FS != (int)Bytes) return false;
12468     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12469   }
12470 
12471   SDValue Base1 = Loc, Base2 = BaseLoc;
12472   int64_t Offset1 = 0, Offset2 = 0;
12473   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12474   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12475   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12476     return true;
12477 
12478   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12479   const GlobalValue *GV1 = nullptr;
12480   const GlobalValue *GV2 = nullptr;
12481   Offset1 = 0;
12482   Offset2 = 0;
12483   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12484   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12485   if (isGA1 && isGA2 && GV1 == GV2)
12486     return Offset1 == (Offset2 + Dist*Bytes);
12487   return false;
12488 }
12489 
12490 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12491 // not enforce equality of the chain operands.
12492 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12493                             unsigned Bytes, int Dist,
12494                             SelectionDAG &DAG) {
12495   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12496     EVT VT = LS->getMemoryVT();
12497     SDValue Loc = LS->getBasePtr();
12498     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12499   }
12500 
12501   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12502     EVT VT;
12503     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12504     default: return false;
12505     case Intrinsic::ppc_qpx_qvlfd:
12506     case Intrinsic::ppc_qpx_qvlfda:
12507       VT = MVT::v4f64;
12508       break;
12509     case Intrinsic::ppc_qpx_qvlfs:
12510     case Intrinsic::ppc_qpx_qvlfsa:
12511       VT = MVT::v4f32;
12512       break;
12513     case Intrinsic::ppc_qpx_qvlfcd:
12514     case Intrinsic::ppc_qpx_qvlfcda:
12515       VT = MVT::v2f64;
12516       break;
12517     case Intrinsic::ppc_qpx_qvlfcs:
12518     case Intrinsic::ppc_qpx_qvlfcsa:
12519       VT = MVT::v2f32;
12520       break;
12521     case Intrinsic::ppc_qpx_qvlfiwa:
12522     case Intrinsic::ppc_qpx_qvlfiwz:
12523     case Intrinsic::ppc_altivec_lvx:
12524     case Intrinsic::ppc_altivec_lvxl:
12525     case Intrinsic::ppc_vsx_lxvw4x:
12526     case Intrinsic::ppc_vsx_lxvw4x_be:
12527       VT = MVT::v4i32;
12528       break;
12529     case Intrinsic::ppc_vsx_lxvd2x:
12530     case Intrinsic::ppc_vsx_lxvd2x_be:
12531       VT = MVT::v2f64;
12532       break;
12533     case Intrinsic::ppc_altivec_lvebx:
12534       VT = MVT::i8;
12535       break;
12536     case Intrinsic::ppc_altivec_lvehx:
12537       VT = MVT::i16;
12538       break;
12539     case Intrinsic::ppc_altivec_lvewx:
12540       VT = MVT::i32;
12541       break;
12542     }
12543 
12544     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12545   }
12546 
12547   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12548     EVT VT;
12549     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12550     default: return false;
12551     case Intrinsic::ppc_qpx_qvstfd:
12552     case Intrinsic::ppc_qpx_qvstfda:
12553       VT = MVT::v4f64;
12554       break;
12555     case Intrinsic::ppc_qpx_qvstfs:
12556     case Intrinsic::ppc_qpx_qvstfsa:
12557       VT = MVT::v4f32;
12558       break;
12559     case Intrinsic::ppc_qpx_qvstfcd:
12560     case Intrinsic::ppc_qpx_qvstfcda:
12561       VT = MVT::v2f64;
12562       break;
12563     case Intrinsic::ppc_qpx_qvstfcs:
12564     case Intrinsic::ppc_qpx_qvstfcsa:
12565       VT = MVT::v2f32;
12566       break;
12567     case Intrinsic::ppc_qpx_qvstfiw:
12568     case Intrinsic::ppc_qpx_qvstfiwa:
12569     case Intrinsic::ppc_altivec_stvx:
12570     case Intrinsic::ppc_altivec_stvxl:
12571     case Intrinsic::ppc_vsx_stxvw4x:
12572       VT = MVT::v4i32;
12573       break;
12574     case Intrinsic::ppc_vsx_stxvd2x:
12575       VT = MVT::v2f64;
12576       break;
12577     case Intrinsic::ppc_vsx_stxvw4x_be:
12578       VT = MVT::v4i32;
12579       break;
12580     case Intrinsic::ppc_vsx_stxvd2x_be:
12581       VT = MVT::v2f64;
12582       break;
12583     case Intrinsic::ppc_altivec_stvebx:
12584       VT = MVT::i8;
12585       break;
12586     case Intrinsic::ppc_altivec_stvehx:
12587       VT = MVT::i16;
12588       break;
12589     case Intrinsic::ppc_altivec_stvewx:
12590       VT = MVT::i32;
12591       break;
12592     }
12593 
12594     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12595   }
12596 
12597   return false;
12598 }
12599 
12600 // Return true is there is a nearyby consecutive load to the one provided
12601 // (regardless of alignment). We search up and down the chain, looking though
12602 // token factors and other loads (but nothing else). As a result, a true result
12603 // indicates that it is safe to create a new consecutive load adjacent to the
12604 // load provided.
12605 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
12606   SDValue Chain = LD->getChain();
12607   EVT VT = LD->getMemoryVT();
12608 
12609   SmallSet<SDNode *, 16> LoadRoots;
12610   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12611   SmallSet<SDNode *, 16> Visited;
12612 
12613   // First, search up the chain, branching to follow all token-factor operands.
12614   // If we find a consecutive load, then we're done, otherwise, record all
12615   // nodes just above the top-level loads and token factors.
12616   while (!Queue.empty()) {
12617     SDNode *ChainNext = Queue.pop_back_val();
12618     if (!Visited.insert(ChainNext).second)
12619       continue;
12620 
12621     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12622       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12623         return true;
12624 
12625       if (!Visited.count(ChainLD->getChain().getNode()))
12626         Queue.push_back(ChainLD->getChain().getNode());
12627     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12628       for (const SDUse &O : ChainNext->ops())
12629         if (!Visited.count(O.getNode()))
12630           Queue.push_back(O.getNode());
12631     } else
12632       LoadRoots.insert(ChainNext);
12633   }
12634 
12635   // Second, search down the chain, starting from the top-level nodes recorded
12636   // in the first phase. These top-level nodes are the nodes just above all
12637   // loads and token factors. Starting with their uses, recursively look though
12638   // all loads (just the chain uses) and token factors to find a consecutive
12639   // load.
12640   Visited.clear();
12641   Queue.clear();
12642 
12643   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12644        IE = LoadRoots.end(); I != IE; ++I) {
12645     Queue.push_back(*I);
12646 
12647     while (!Queue.empty()) {
12648       SDNode *LoadRoot = Queue.pop_back_val();
12649       if (!Visited.insert(LoadRoot).second)
12650         continue;
12651 
12652       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12653         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12654           return true;
12655 
12656       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12657            UE = LoadRoot->use_end(); UI != UE; ++UI)
12658         if (((isa<MemSDNode>(*UI) &&
12659             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12660             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12661           Queue.push_back(*UI);
12662     }
12663   }
12664 
12665   return false;
12666 }
12667 
12668 /// This function is called when we have proved that a SETCC node can be replaced
12669 /// by subtraction (and other supporting instructions) so that the result of
12670 /// comparison is kept in a GPR instead of CR. This function is purely for
12671 /// codegen purposes and has some flags to guide the codegen process.
12672 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12673                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12674   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12675 
12676   // Zero extend the operands to the largest legal integer. Originally, they
12677   // must be of a strictly smaller size.
12678   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12679                          DAG.getConstant(Size, DL, MVT::i32));
12680   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12681                          DAG.getConstant(Size, DL, MVT::i32));
12682 
12683   // Swap if needed. Depends on the condition code.
12684   if (Swap)
12685     std::swap(Op0, Op1);
12686 
12687   // Subtract extended integers.
12688   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12689 
12690   // Move the sign bit to the least significant position and zero out the rest.
12691   // Now the least significant bit carries the result of original comparison.
12692   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12693                              DAG.getConstant(Size - 1, DL, MVT::i32));
12694   auto Final = Shifted;
12695 
12696   // Complement the result if needed. Based on the condition code.
12697   if (Complement)
12698     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12699                         DAG.getConstant(1, DL, MVT::i64));
12700 
12701   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
12702 }
12703 
12704 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
12705                                                   DAGCombinerInfo &DCI) const {
12706   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12707 
12708   SelectionDAG &DAG = DCI.DAG;
12709   SDLoc DL(N);
12710 
12711   // Size of integers being compared has a critical role in the following
12712   // analysis, so we prefer to do this when all types are legal.
12713   if (!DCI.isAfterLegalizeDAG())
12714     return SDValue();
12715 
12716   // If all users of SETCC extend its value to a legal integer type
12717   // then we replace SETCC with a subtraction
12718   for (SDNode::use_iterator UI = N->use_begin(),
12719        UE = N->use_end(); UI != UE; ++UI) {
12720     if (UI->getOpcode() != ISD::ZERO_EXTEND)
12721       return SDValue();
12722   }
12723 
12724   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12725   auto OpSize = N->getOperand(0).getValueSizeInBits();
12726 
12727   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
12728 
12729   if (OpSize < Size) {
12730     switch (CC) {
12731     default: break;
12732     case ISD::SETULT:
12733       return generateEquivalentSub(N, Size, false, false, DL, DAG);
12734     case ISD::SETULE:
12735       return generateEquivalentSub(N, Size, true, true, DL, DAG);
12736     case ISD::SETUGT:
12737       return generateEquivalentSub(N, Size, false, true, DL, DAG);
12738     case ISD::SETUGE:
12739       return generateEquivalentSub(N, Size, true, false, DL, DAG);
12740     }
12741   }
12742 
12743   return SDValue();
12744 }
12745 
12746 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
12747                                                   DAGCombinerInfo &DCI) const {
12748   SelectionDAG &DAG = DCI.DAG;
12749   SDLoc dl(N);
12750 
12751   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
12752   // If we're tracking CR bits, we need to be careful that we don't have:
12753   //   trunc(binary-ops(zext(x), zext(y)))
12754   // or
12755   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
12756   // such that we're unnecessarily moving things into GPRs when it would be
12757   // better to keep them in CR bits.
12758 
12759   // Note that trunc here can be an actual i1 trunc, or can be the effective
12760   // truncation that comes from a setcc or select_cc.
12761   if (N->getOpcode() == ISD::TRUNCATE &&
12762       N->getValueType(0) != MVT::i1)
12763     return SDValue();
12764 
12765   if (N->getOperand(0).getValueType() != MVT::i32 &&
12766       N->getOperand(0).getValueType() != MVT::i64)
12767     return SDValue();
12768 
12769   if (N->getOpcode() == ISD::SETCC ||
12770       N->getOpcode() == ISD::SELECT_CC) {
12771     // If we're looking at a comparison, then we need to make sure that the
12772     // high bits (all except for the first) don't matter the result.
12773     ISD::CondCode CC =
12774       cast<CondCodeSDNode>(N->getOperand(
12775         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
12776     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
12777 
12778     if (ISD::isSignedIntSetCC(CC)) {
12779       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
12780           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
12781         return SDValue();
12782     } else if (ISD::isUnsignedIntSetCC(CC)) {
12783       if (!DAG.MaskedValueIsZero(N->getOperand(0),
12784                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
12785           !DAG.MaskedValueIsZero(N->getOperand(1),
12786                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
12787         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
12788                                              : SDValue());
12789     } else {
12790       // This is neither a signed nor an unsigned comparison, just make sure
12791       // that the high bits are equal.
12792       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
12793       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
12794 
12795       // We don't really care about what is known about the first bit (if
12796       // anything), so clear it in all masks prior to comparing them.
12797       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
12798       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
12799 
12800       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
12801         return SDValue();
12802     }
12803   }
12804 
12805   // We now know that the higher-order bits are irrelevant, we just need to
12806   // make sure that all of the intermediate operations are bit operations, and
12807   // all inputs are extensions.
12808   if (N->getOperand(0).getOpcode() != ISD::AND &&
12809       N->getOperand(0).getOpcode() != ISD::OR  &&
12810       N->getOperand(0).getOpcode() != ISD::XOR &&
12811       N->getOperand(0).getOpcode() != ISD::SELECT &&
12812       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
12813       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
12814       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
12815       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
12816       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
12817     return SDValue();
12818 
12819   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
12820       N->getOperand(1).getOpcode() != ISD::AND &&
12821       N->getOperand(1).getOpcode() != ISD::OR  &&
12822       N->getOperand(1).getOpcode() != ISD::XOR &&
12823       N->getOperand(1).getOpcode() != ISD::SELECT &&
12824       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
12825       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
12826       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
12827       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
12828       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
12829     return SDValue();
12830 
12831   SmallVector<SDValue, 4> Inputs;
12832   SmallVector<SDValue, 8> BinOps, PromOps;
12833   SmallPtrSet<SDNode *, 16> Visited;
12834 
12835   for (unsigned i = 0; i < 2; ++i) {
12836     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12837           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12838           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12839           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12840         isa<ConstantSDNode>(N->getOperand(i)))
12841       Inputs.push_back(N->getOperand(i));
12842     else
12843       BinOps.push_back(N->getOperand(i));
12844 
12845     if (N->getOpcode() == ISD::TRUNCATE)
12846       break;
12847   }
12848 
12849   // Visit all inputs, collect all binary operations (and, or, xor and
12850   // select) that are all fed by extensions.
12851   while (!BinOps.empty()) {
12852     SDValue BinOp = BinOps.back();
12853     BinOps.pop_back();
12854 
12855     if (!Visited.insert(BinOp.getNode()).second)
12856       continue;
12857 
12858     PromOps.push_back(BinOp);
12859 
12860     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12861       // The condition of the select is not promoted.
12862       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12863         continue;
12864       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12865         continue;
12866 
12867       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12868             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12869             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12870            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12871           isa<ConstantSDNode>(BinOp.getOperand(i))) {
12872         Inputs.push_back(BinOp.getOperand(i));
12873       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12874                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
12875                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12876                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12877                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
12878                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12879                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12880                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12881                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
12882         BinOps.push_back(BinOp.getOperand(i));
12883       } else {
12884         // We have an input that is not an extension or another binary
12885         // operation; we'll abort this transformation.
12886         return SDValue();
12887       }
12888     }
12889   }
12890 
12891   // Make sure that this is a self-contained cluster of operations (which
12892   // is not quite the same thing as saying that everything has only one
12893   // use).
12894   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12895     if (isa<ConstantSDNode>(Inputs[i]))
12896       continue;
12897 
12898     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12899                               UE = Inputs[i].getNode()->use_end();
12900          UI != UE; ++UI) {
12901       SDNode *User = *UI;
12902       if (User != N && !Visited.count(User))
12903         return SDValue();
12904 
12905       // Make sure that we're not going to promote the non-output-value
12906       // operand(s) or SELECT or SELECT_CC.
12907       // FIXME: Although we could sometimes handle this, and it does occur in
12908       // practice that one of the condition inputs to the select is also one of
12909       // the outputs, we currently can't deal with this.
12910       if (User->getOpcode() == ISD::SELECT) {
12911         if (User->getOperand(0) == Inputs[i])
12912           return SDValue();
12913       } else if (User->getOpcode() == ISD::SELECT_CC) {
12914         if (User->getOperand(0) == Inputs[i] ||
12915             User->getOperand(1) == Inputs[i])
12916           return SDValue();
12917       }
12918     }
12919   }
12920 
12921   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12922     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12923                               UE = PromOps[i].getNode()->use_end();
12924          UI != UE; ++UI) {
12925       SDNode *User = *UI;
12926       if (User != N && !Visited.count(User))
12927         return SDValue();
12928 
12929       // Make sure that we're not going to promote the non-output-value
12930       // operand(s) or SELECT or SELECT_CC.
12931       // FIXME: Although we could sometimes handle this, and it does occur in
12932       // practice that one of the condition inputs to the select is also one of
12933       // the outputs, we currently can't deal with this.
12934       if (User->getOpcode() == ISD::SELECT) {
12935         if (User->getOperand(0) == PromOps[i])
12936           return SDValue();
12937       } else if (User->getOpcode() == ISD::SELECT_CC) {
12938         if (User->getOperand(0) == PromOps[i] ||
12939             User->getOperand(1) == PromOps[i])
12940           return SDValue();
12941       }
12942     }
12943   }
12944 
12945   // Replace all inputs with the extension operand.
12946   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12947     // Constants may have users outside the cluster of to-be-promoted nodes,
12948     // and so we need to replace those as we do the promotions.
12949     if (isa<ConstantSDNode>(Inputs[i]))
12950       continue;
12951     else
12952       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12953   }
12954 
12955   std::list<HandleSDNode> PromOpHandles;
12956   for (auto &PromOp : PromOps)
12957     PromOpHandles.emplace_back(PromOp);
12958 
12959   // Replace all operations (these are all the same, but have a different
12960   // (i1) return type). DAG.getNode will validate that the types of
12961   // a binary operator match, so go through the list in reverse so that
12962   // we've likely promoted both operands first. Any intermediate truncations or
12963   // extensions disappear.
12964   while (!PromOpHandles.empty()) {
12965     SDValue PromOp = PromOpHandles.back().getValue();
12966     PromOpHandles.pop_back();
12967 
12968     if (PromOp.getOpcode() == ISD::TRUNCATE ||
12969         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12970         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12971         PromOp.getOpcode() == ISD::ANY_EXTEND) {
12972       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12973           PromOp.getOperand(0).getValueType() != MVT::i1) {
12974         // The operand is not yet ready (see comment below).
12975         PromOpHandles.emplace_front(PromOp);
12976         continue;
12977       }
12978 
12979       SDValue RepValue = PromOp.getOperand(0);
12980       if (isa<ConstantSDNode>(RepValue))
12981         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
12982 
12983       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
12984       continue;
12985     }
12986 
12987     unsigned C;
12988     switch (PromOp.getOpcode()) {
12989     default:             C = 0; break;
12990     case ISD::SELECT:    C = 1; break;
12991     case ISD::SELECT_CC: C = 2; break;
12992     }
12993 
12994     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12995          PromOp.getOperand(C).getValueType() != MVT::i1) ||
12996         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12997          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12998       // The to-be-promoted operands of this node have not yet been
12999       // promoted (this should be rare because we're going through the
13000       // list backward, but if one of the operands has several users in
13001       // this cluster of to-be-promoted nodes, it is possible).
13002       PromOpHandles.emplace_front(PromOp);
13003       continue;
13004     }
13005 
13006     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13007                                 PromOp.getNode()->op_end());
13008 
13009     // If there are any constant inputs, make sure they're replaced now.
13010     for (unsigned i = 0; i < 2; ++i)
13011       if (isa<ConstantSDNode>(Ops[C+i]))
13012         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13013 
13014     DAG.ReplaceAllUsesOfValueWith(PromOp,
13015       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13016   }
13017 
13018   // Now we're left with the initial truncation itself.
13019   if (N->getOpcode() == ISD::TRUNCATE)
13020     return N->getOperand(0);
13021 
13022   // Otherwise, this is a comparison. The operands to be compared have just
13023   // changed type (to i1), but everything else is the same.
13024   return SDValue(N, 0);
13025 }
13026 
13027 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13028                                                   DAGCombinerInfo &DCI) const {
13029   SelectionDAG &DAG = DCI.DAG;
13030   SDLoc dl(N);
13031 
13032   // If we're tracking CR bits, we need to be careful that we don't have:
13033   //   zext(binary-ops(trunc(x), trunc(y)))
13034   // or
13035   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13036   // such that we're unnecessarily moving things into CR bits that can more
13037   // efficiently stay in GPRs. Note that if we're not certain that the high
13038   // bits are set as required by the final extension, we still may need to do
13039   // some masking to get the proper behavior.
13040 
13041   // This same functionality is important on PPC64 when dealing with
13042   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13043   // the return values of functions. Because it is so similar, it is handled
13044   // here as well.
13045 
13046   if (N->getValueType(0) != MVT::i32 &&
13047       N->getValueType(0) != MVT::i64)
13048     return SDValue();
13049 
13050   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13051         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13052     return SDValue();
13053 
13054   if (N->getOperand(0).getOpcode() != ISD::AND &&
13055       N->getOperand(0).getOpcode() != ISD::OR  &&
13056       N->getOperand(0).getOpcode() != ISD::XOR &&
13057       N->getOperand(0).getOpcode() != ISD::SELECT &&
13058       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13059     return SDValue();
13060 
13061   SmallVector<SDValue, 4> Inputs;
13062   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13063   SmallPtrSet<SDNode *, 16> Visited;
13064 
13065   // Visit all inputs, collect all binary operations (and, or, xor and
13066   // select) that are all fed by truncations.
13067   while (!BinOps.empty()) {
13068     SDValue BinOp = BinOps.back();
13069     BinOps.pop_back();
13070 
13071     if (!Visited.insert(BinOp.getNode()).second)
13072       continue;
13073 
13074     PromOps.push_back(BinOp);
13075 
13076     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13077       // The condition of the select is not promoted.
13078       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13079         continue;
13080       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13081         continue;
13082 
13083       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13084           isa<ConstantSDNode>(BinOp.getOperand(i))) {
13085         Inputs.push_back(BinOp.getOperand(i));
13086       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13087                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
13088                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13089                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13090                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13091         BinOps.push_back(BinOp.getOperand(i));
13092       } else {
13093         // We have an input that is not a truncation or another binary
13094         // operation; we'll abort this transformation.
13095         return SDValue();
13096       }
13097     }
13098   }
13099 
13100   // The operands of a select that must be truncated when the select is
13101   // promoted because the operand is actually part of the to-be-promoted set.
13102   DenseMap<SDNode *, EVT> SelectTruncOp[2];
13103 
13104   // Make sure that this is a self-contained cluster of operations (which
13105   // is not quite the same thing as saying that everything has only one
13106   // use).
13107   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13108     if (isa<ConstantSDNode>(Inputs[i]))
13109       continue;
13110 
13111     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13112                               UE = Inputs[i].getNode()->use_end();
13113          UI != UE; ++UI) {
13114       SDNode *User = *UI;
13115       if (User != N && !Visited.count(User))
13116         return SDValue();
13117 
13118       // If we're going to promote the non-output-value operand(s) or SELECT or
13119       // SELECT_CC, record them for truncation.
13120       if (User->getOpcode() == ISD::SELECT) {
13121         if (User->getOperand(0) == Inputs[i])
13122           SelectTruncOp[0].insert(std::make_pair(User,
13123                                     User->getOperand(0).getValueType()));
13124       } else if (User->getOpcode() == ISD::SELECT_CC) {
13125         if (User->getOperand(0) == Inputs[i])
13126           SelectTruncOp[0].insert(std::make_pair(User,
13127                                     User->getOperand(0).getValueType()));
13128         if (User->getOperand(1) == Inputs[i])
13129           SelectTruncOp[1].insert(std::make_pair(User,
13130                                     User->getOperand(1).getValueType()));
13131       }
13132     }
13133   }
13134 
13135   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13136     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13137                               UE = PromOps[i].getNode()->use_end();
13138          UI != UE; ++UI) {
13139       SDNode *User = *UI;
13140       if (User != N && !Visited.count(User))
13141         return SDValue();
13142 
13143       // If we're going to promote the non-output-value operand(s) or SELECT or
13144       // SELECT_CC, record them for truncation.
13145       if (User->getOpcode() == ISD::SELECT) {
13146         if (User->getOperand(0) == PromOps[i])
13147           SelectTruncOp[0].insert(std::make_pair(User,
13148                                     User->getOperand(0).getValueType()));
13149       } else if (User->getOpcode() == ISD::SELECT_CC) {
13150         if (User->getOperand(0) == PromOps[i])
13151           SelectTruncOp[0].insert(std::make_pair(User,
13152                                     User->getOperand(0).getValueType()));
13153         if (User->getOperand(1) == PromOps[i])
13154           SelectTruncOp[1].insert(std::make_pair(User,
13155                                     User->getOperand(1).getValueType()));
13156       }
13157     }
13158   }
13159 
13160   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13161   bool ReallyNeedsExt = false;
13162   if (N->getOpcode() != ISD::ANY_EXTEND) {
13163     // If all of the inputs are not already sign/zero extended, then
13164     // we'll still need to do that at the end.
13165     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13166       if (isa<ConstantSDNode>(Inputs[i]))
13167         continue;
13168 
13169       unsigned OpBits =
13170         Inputs[i].getOperand(0).getValueSizeInBits();
13171       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13172 
13173       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13174            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13175                                   APInt::getHighBitsSet(OpBits,
13176                                                         OpBits-PromBits))) ||
13177           (N->getOpcode() == ISD::SIGN_EXTEND &&
13178            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13179              (OpBits-(PromBits-1)))) {
13180         ReallyNeedsExt = true;
13181         break;
13182       }
13183     }
13184   }
13185 
13186   // Replace all inputs, either with the truncation operand, or a
13187   // truncation or extension to the final output type.
13188   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13189     // Constant inputs need to be replaced with the to-be-promoted nodes that
13190     // use them because they might have users outside of the cluster of
13191     // promoted nodes.
13192     if (isa<ConstantSDNode>(Inputs[i]))
13193       continue;
13194 
13195     SDValue InSrc = Inputs[i].getOperand(0);
13196     if (Inputs[i].getValueType() == N->getValueType(0))
13197       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13198     else if (N->getOpcode() == ISD::SIGN_EXTEND)
13199       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13200         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13201     else if (N->getOpcode() == ISD::ZERO_EXTEND)
13202       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13203         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13204     else
13205       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13206         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13207   }
13208 
13209   std::list<HandleSDNode> PromOpHandles;
13210   for (auto &PromOp : PromOps)
13211     PromOpHandles.emplace_back(PromOp);
13212 
13213   // Replace all operations (these are all the same, but have a different
13214   // (promoted) return type). DAG.getNode will validate that the types of
13215   // a binary operator match, so go through the list in reverse so that
13216   // we've likely promoted both operands first.
13217   while (!PromOpHandles.empty()) {
13218     SDValue PromOp = PromOpHandles.back().getValue();
13219     PromOpHandles.pop_back();
13220 
13221     unsigned C;
13222     switch (PromOp.getOpcode()) {
13223     default:             C = 0; break;
13224     case ISD::SELECT:    C = 1; break;
13225     case ISD::SELECT_CC: C = 2; break;
13226     }
13227 
13228     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13229          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13230         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13231          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13232       // The to-be-promoted operands of this node have not yet been
13233       // promoted (this should be rare because we're going through the
13234       // list backward, but if one of the operands has several users in
13235       // this cluster of to-be-promoted nodes, it is possible).
13236       PromOpHandles.emplace_front(PromOp);
13237       continue;
13238     }
13239 
13240     // For SELECT and SELECT_CC nodes, we do a similar check for any
13241     // to-be-promoted comparison inputs.
13242     if (PromOp.getOpcode() == ISD::SELECT ||
13243         PromOp.getOpcode() == ISD::SELECT_CC) {
13244       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13245            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13246           (SelectTruncOp[1].count(PromOp.getNode()) &&
13247            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13248         PromOpHandles.emplace_front(PromOp);
13249         continue;
13250       }
13251     }
13252 
13253     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13254                                 PromOp.getNode()->op_end());
13255 
13256     // If this node has constant inputs, then they'll need to be promoted here.
13257     for (unsigned i = 0; i < 2; ++i) {
13258       if (!isa<ConstantSDNode>(Ops[C+i]))
13259         continue;
13260       if (Ops[C+i].getValueType() == N->getValueType(0))
13261         continue;
13262 
13263       if (N->getOpcode() == ISD::SIGN_EXTEND)
13264         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13265       else if (N->getOpcode() == ISD::ZERO_EXTEND)
13266         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13267       else
13268         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13269     }
13270 
13271     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13272     // truncate them again to the original value type.
13273     if (PromOp.getOpcode() == ISD::SELECT ||
13274         PromOp.getOpcode() == ISD::SELECT_CC) {
13275       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13276       if (SI0 != SelectTruncOp[0].end())
13277         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13278       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13279       if (SI1 != SelectTruncOp[1].end())
13280         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13281     }
13282 
13283     DAG.ReplaceAllUsesOfValueWith(PromOp,
13284       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13285   }
13286 
13287   // Now we're left with the initial extension itself.
13288   if (!ReallyNeedsExt)
13289     return N->getOperand(0);
13290 
13291   // To zero extend, just mask off everything except for the first bit (in the
13292   // i1 case).
13293   if (N->getOpcode() == ISD::ZERO_EXTEND)
13294     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13295                        DAG.getConstant(APInt::getLowBitsSet(
13296                                          N->getValueSizeInBits(0), PromBits),
13297                                        dl, N->getValueType(0)));
13298 
13299   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13300          "Invalid extension type");
13301   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13302   SDValue ShiftCst =
13303       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13304   return DAG.getNode(
13305       ISD::SRA, dl, N->getValueType(0),
13306       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13307       ShiftCst);
13308 }
13309 
13310 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13311                                         DAGCombinerInfo &DCI) const {
13312   assert(N->getOpcode() == ISD::SETCC &&
13313          "Should be called with a SETCC node");
13314 
13315   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13316   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13317     SDValue LHS = N->getOperand(0);
13318     SDValue RHS = N->getOperand(1);
13319 
13320     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13321     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13322         LHS.hasOneUse())
13323       std::swap(LHS, RHS);
13324 
13325     // x == 0-y --> x+y == 0
13326     // x != 0-y --> x+y != 0
13327     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13328         RHS.hasOneUse()) {
13329       SDLoc DL(N);
13330       SelectionDAG &DAG = DCI.DAG;
13331       EVT VT = N->getValueType(0);
13332       EVT OpVT = LHS.getValueType();
13333       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13334       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13335     }
13336   }
13337 
13338   return DAGCombineTruncBoolExt(N, DCI);
13339 }
13340 
13341 // Is this an extending load from an f32 to an f64?
13342 static bool isFPExtLoad(SDValue Op) {
13343   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13344     return LD->getExtensionType() == ISD::EXTLOAD &&
13345       Op.getValueType() == MVT::f64;
13346   return false;
13347 }
13348 
13349 /// Reduces the number of fp-to-int conversion when building a vector.
13350 ///
13351 /// If this vector is built out of floating to integer conversions,
13352 /// transform it to a vector built out of floating point values followed by a
13353 /// single floating to integer conversion of the vector.
13354 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
13355 /// becomes (fptosi (build_vector ($A, $B, ...)))
13356 SDValue PPCTargetLowering::
13357 combineElementTruncationToVectorTruncation(SDNode *N,
13358                                            DAGCombinerInfo &DCI) const {
13359   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13360          "Should be called with a BUILD_VECTOR node");
13361 
13362   SelectionDAG &DAG = DCI.DAG;
13363   SDLoc dl(N);
13364 
13365   SDValue FirstInput = N->getOperand(0);
13366   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13367          "The input operand must be an fp-to-int conversion.");
13368 
13369   // This combine happens after legalization so the fp_to_[su]i nodes are
13370   // already converted to PPCSISD nodes.
13371   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13372   if (FirstConversion == PPCISD::FCTIDZ ||
13373       FirstConversion == PPCISD::FCTIDUZ ||
13374       FirstConversion == PPCISD::FCTIWZ ||
13375       FirstConversion == PPCISD::FCTIWUZ) {
13376     bool IsSplat = true;
13377     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13378       FirstConversion == PPCISD::FCTIWUZ;
13379     EVT SrcVT = FirstInput.getOperand(0).getValueType();
13380     SmallVector<SDValue, 4> Ops;
13381     EVT TargetVT = N->getValueType(0);
13382     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13383       SDValue NextOp = N->getOperand(i);
13384       if (NextOp.getOpcode() != PPCISD::MFVSR)
13385         return SDValue();
13386       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13387       if (NextConversion != FirstConversion)
13388         return SDValue();
13389       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13390       // This is not valid if the input was originally double precision. It is
13391       // also not profitable to do unless this is an extending load in which
13392       // case doing this combine will allow us to combine consecutive loads.
13393       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13394         return SDValue();
13395       if (N->getOperand(i) != FirstInput)
13396         IsSplat = false;
13397     }
13398 
13399     // If this is a splat, we leave it as-is since there will be only a single
13400     // fp-to-int conversion followed by a splat of the integer. This is better
13401     // for 32-bit and smaller ints and neutral for 64-bit ints.
13402     if (IsSplat)
13403       return SDValue();
13404 
13405     // Now that we know we have the right type of node, get its operands
13406     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13407       SDValue In = N->getOperand(i).getOperand(0);
13408       if (Is32Bit) {
13409         // For 32-bit values, we need to add an FP_ROUND node (if we made it
13410         // here, we know that all inputs are extending loads so this is safe).
13411         if (In.isUndef())
13412           Ops.push_back(DAG.getUNDEF(SrcVT));
13413         else {
13414           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13415                                       MVT::f32, In.getOperand(0),
13416                                       DAG.getIntPtrConstant(1, dl));
13417           Ops.push_back(Trunc);
13418         }
13419       } else
13420         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13421     }
13422 
13423     unsigned Opcode;
13424     if (FirstConversion == PPCISD::FCTIDZ ||
13425         FirstConversion == PPCISD::FCTIWZ)
13426       Opcode = ISD::FP_TO_SINT;
13427     else
13428       Opcode = ISD::FP_TO_UINT;
13429 
13430     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13431     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13432     return DAG.getNode(Opcode, dl, TargetVT, BV);
13433   }
13434   return SDValue();
13435 }
13436 
13437 /// Reduce the number of loads when building a vector.
13438 ///
13439 /// Building a vector out of multiple loads can be converted to a load
13440 /// of the vector type if the loads are consecutive. If the loads are
13441 /// consecutive but in descending order, a shuffle is added at the end
13442 /// to reorder the vector.
13443 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
13444   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13445          "Should be called with a BUILD_VECTOR node");
13446 
13447   SDLoc dl(N);
13448 
13449   // Return early for non byte-sized type, as they can't be consecutive.
13450   if (!N->getValueType(0).getVectorElementType().isByteSized())
13451     return SDValue();
13452 
13453   bool InputsAreConsecutiveLoads = true;
13454   bool InputsAreReverseConsecutive = true;
13455   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13456   SDValue FirstInput = N->getOperand(0);
13457   bool IsRoundOfExtLoad = false;
13458 
13459   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13460       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13461     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13462     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13463   }
13464   // Not a build vector of (possibly fp_rounded) loads.
13465   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13466       N->getNumOperands() == 1)
13467     return SDValue();
13468 
13469   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13470     // If any inputs are fp_round(extload), they all must be.
13471     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13472       return SDValue();
13473 
13474     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13475       N->getOperand(i);
13476     if (NextInput.getOpcode() != ISD::LOAD)
13477       return SDValue();
13478 
13479     SDValue PreviousInput =
13480       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13481     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13482     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13483 
13484     // If any inputs are fp_round(extload), they all must be.
13485     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13486       return SDValue();
13487 
13488     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13489       InputsAreConsecutiveLoads = false;
13490     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13491       InputsAreReverseConsecutive = false;
13492 
13493     // Exit early if the loads are neither consecutive nor reverse consecutive.
13494     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13495       return SDValue();
13496   }
13497 
13498   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13499          "The loads cannot be both consecutive and reverse consecutive.");
13500 
13501   SDValue FirstLoadOp =
13502     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13503   SDValue LastLoadOp =
13504     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13505                        N->getOperand(N->getNumOperands()-1);
13506 
13507   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13508   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13509   if (InputsAreConsecutiveLoads) {
13510     assert(LD1 && "Input needs to be a LoadSDNode.");
13511     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13512                        LD1->getBasePtr(), LD1->getPointerInfo(),
13513                        LD1->getAlignment());
13514   }
13515   if (InputsAreReverseConsecutive) {
13516     assert(LDL && "Input needs to be a LoadSDNode.");
13517     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13518                                LDL->getBasePtr(), LDL->getPointerInfo(),
13519                                LDL->getAlignment());
13520     SmallVector<int, 16> Ops;
13521     for (int i = N->getNumOperands() - 1; i >= 0; i--)
13522       Ops.push_back(i);
13523 
13524     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13525                                 DAG.getUNDEF(N->getValueType(0)), Ops);
13526   }
13527   return SDValue();
13528 }
13529 
13530 // This function adds the required vector_shuffle needed to get
13531 // the elements of the vector extract in the correct position
13532 // as specified by the CorrectElems encoding.
13533 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
13534                                       SDValue Input, uint64_t Elems,
13535                                       uint64_t CorrectElems) {
13536   SDLoc dl(N);
13537 
13538   unsigned NumElems = Input.getValueType().getVectorNumElements();
13539   SmallVector<int, 16> ShuffleMask(NumElems, -1);
13540 
13541   // Knowing the element indices being extracted from the original
13542   // vector and the order in which they're being inserted, just put
13543   // them at element indices required for the instruction.
13544   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13545     if (DAG.getDataLayout().isLittleEndian())
13546       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13547     else
13548       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13549     CorrectElems = CorrectElems >> 8;
13550     Elems = Elems >> 8;
13551   }
13552 
13553   SDValue Shuffle =
13554       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13555                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13556 
13557   EVT VT = N->getValueType(0);
13558   SDValue Conv = DAG.getBitcast(VT, Shuffle);
13559 
13560   EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13561                                Input.getValueType().getVectorElementType(),
13562                                VT.getVectorNumElements());
13563   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13564                      DAG.getValueType(ExtVT));
13565 }
13566 
13567 // Look for build vector patterns where input operands come from sign
13568 // extended vector_extract elements of specific indices. If the correct indices
13569 // aren't used, add a vector shuffle to fix up the indices and create
13570 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13571 // during instruction selection.
13572 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
13573   // This array encodes the indices that the vector sign extend instructions
13574   // extract from when extending from one type to another for both BE and LE.
13575   // The right nibble of each byte corresponds to the LE incides.
13576   // and the left nibble of each byte corresponds to the BE incides.
13577   // For example: 0x3074B8FC  byte->word
13578   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13579   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13580   // For example: 0x000070F8  byte->double word
13581   // For LE: the allowed indices are: 0x0,0x8
13582   // For BE: the allowed indices are: 0x7,0xF
13583   uint64_t TargetElems[] = {
13584       0x3074B8FC, // b->w
13585       0x000070F8, // b->d
13586       0x10325476, // h->w
13587       0x00003074, // h->d
13588       0x00001032, // w->d
13589   };
13590 
13591   uint64_t Elems = 0;
13592   int Index;
13593   SDValue Input;
13594 
13595   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13596     if (!Op)
13597       return false;
13598     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13599         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13600       return false;
13601 
13602     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13603     // of the right width.
13604     SDValue Extract = Op.getOperand(0);
13605     if (Extract.getOpcode() == ISD::ANY_EXTEND)
13606       Extract = Extract.getOperand(0);
13607     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13608       return false;
13609 
13610     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13611     if (!ExtOp)
13612       return false;
13613 
13614     Index = ExtOp->getZExtValue();
13615     if (Input && Input != Extract.getOperand(0))
13616       return false;
13617 
13618     if (!Input)
13619       Input = Extract.getOperand(0);
13620 
13621     Elems = Elems << 8;
13622     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13623     Elems |= Index;
13624 
13625     return true;
13626   };
13627 
13628   // If the build vector operands aren't sign extended vector extracts,
13629   // of the same input vector, then return.
13630   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13631     if (!isSExtOfVecExtract(N->getOperand(i))) {
13632       return SDValue();
13633     }
13634   }
13635 
13636   // If the vector extract indicies are not correct, add the appropriate
13637   // vector_shuffle.
13638   int TgtElemArrayIdx;
13639   int InputSize = Input.getValueType().getScalarSizeInBits();
13640   int OutputSize = N->getValueType(0).getScalarSizeInBits();
13641   if (InputSize + OutputSize == 40)
13642     TgtElemArrayIdx = 0;
13643   else if (InputSize + OutputSize == 72)
13644     TgtElemArrayIdx = 1;
13645   else if (InputSize + OutputSize == 48)
13646     TgtElemArrayIdx = 2;
13647   else if (InputSize + OutputSize == 80)
13648     TgtElemArrayIdx = 3;
13649   else if (InputSize + OutputSize == 96)
13650     TgtElemArrayIdx = 4;
13651   else
13652     return SDValue();
13653 
13654   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13655   CorrectElems = DAG.getDataLayout().isLittleEndian()
13656                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13657                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13658   if (Elems != CorrectElems) {
13659     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13660   }
13661 
13662   // Regular lowering will catch cases where a shuffle is not needed.
13663   return SDValue();
13664 }
13665 
13666 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
13667                                                  DAGCombinerInfo &DCI) const {
13668   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13669          "Should be called with a BUILD_VECTOR node");
13670 
13671   SelectionDAG &DAG = DCI.DAG;
13672   SDLoc dl(N);
13673 
13674   if (!Subtarget.hasVSX())
13675     return SDValue();
13676 
13677   // The target independent DAG combiner will leave a build_vector of
13678   // float-to-int conversions intact. We can generate MUCH better code for
13679   // a float-to-int conversion of a vector of floats.
13680   SDValue FirstInput = N->getOperand(0);
13681   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
13682     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
13683     if (Reduced)
13684       return Reduced;
13685   }
13686 
13687   // If we're building a vector out of consecutive loads, just load that
13688   // vector type.
13689   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
13690   if (Reduced)
13691     return Reduced;
13692 
13693   // If we're building a vector out of extended elements from another vector
13694   // we have P9 vector integer extend instructions. The code assumes legal
13695   // input types (i.e. it can't handle things like v4i16) so do not run before
13696   // legalization.
13697   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
13698     Reduced = combineBVOfVecSExt(N, DAG);
13699     if (Reduced)
13700       return Reduced;
13701   }
13702 
13703 
13704   if (N->getValueType(0) != MVT::v2f64)
13705     return SDValue();
13706 
13707   // Looking for:
13708   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
13709   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
13710       FirstInput.getOpcode() != ISD::UINT_TO_FP)
13711     return SDValue();
13712   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
13713       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
13714     return SDValue();
13715   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
13716     return SDValue();
13717 
13718   SDValue Ext1 = FirstInput.getOperand(0);
13719   SDValue Ext2 = N->getOperand(1).getOperand(0);
13720   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13721      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13722     return SDValue();
13723 
13724   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
13725   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
13726   if (!Ext1Op || !Ext2Op)
13727     return SDValue();
13728   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
13729       Ext1.getOperand(0) != Ext2.getOperand(0))
13730     return SDValue();
13731 
13732   int FirstElem = Ext1Op->getZExtValue();
13733   int SecondElem = Ext2Op->getZExtValue();
13734   int SubvecIdx;
13735   if (FirstElem == 0 && SecondElem == 1)
13736     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
13737   else if (FirstElem == 2 && SecondElem == 3)
13738     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
13739   else
13740     return SDValue();
13741 
13742   SDValue SrcVec = Ext1.getOperand(0);
13743   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
13744     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
13745   return DAG.getNode(NodeType, dl, MVT::v2f64,
13746                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
13747 }
13748 
13749 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
13750                                               DAGCombinerInfo &DCI) const {
13751   assert((N->getOpcode() == ISD::SINT_TO_FP ||
13752           N->getOpcode() == ISD::UINT_TO_FP) &&
13753          "Need an int -> FP conversion node here");
13754 
13755   if (useSoftFloat() || !Subtarget.has64BitSupport())
13756     return SDValue();
13757 
13758   SelectionDAG &DAG = DCI.DAG;
13759   SDLoc dl(N);
13760   SDValue Op(N, 0);
13761 
13762   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
13763   // from the hardware.
13764   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
13765     return SDValue();
13766   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
13767       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
13768     return SDValue();
13769 
13770   SDValue FirstOperand(Op.getOperand(0));
13771   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
13772     (FirstOperand.getValueType() == MVT::i8 ||
13773      FirstOperand.getValueType() == MVT::i16);
13774   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
13775     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
13776     bool DstDouble = Op.getValueType() == MVT::f64;
13777     unsigned ConvOp = Signed ?
13778       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
13779       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
13780     SDValue WidthConst =
13781       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
13782                             dl, false);
13783     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
13784     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
13785     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
13786                                          DAG.getVTList(MVT::f64, MVT::Other),
13787                                          Ops, MVT::i8, LDN->getMemOperand());
13788 
13789     // For signed conversion, we need to sign-extend the value in the VSR
13790     if (Signed) {
13791       SDValue ExtOps[] = { Ld, WidthConst };
13792       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
13793       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
13794     } else
13795       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
13796   }
13797 
13798 
13799   // For i32 intermediate values, unfortunately, the conversion functions
13800   // leave the upper 32 bits of the value are undefined. Within the set of
13801   // scalar instructions, we have no method for zero- or sign-extending the
13802   // value. Thus, we cannot handle i32 intermediate values here.
13803   if (Op.getOperand(0).getValueType() == MVT::i32)
13804     return SDValue();
13805 
13806   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
13807          "UINT_TO_FP is supported only with FPCVT");
13808 
13809   // If we have FCFIDS, then use it when converting to single-precision.
13810   // Otherwise, convert to double-precision and then round.
13811   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13812                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
13813                                                             : PPCISD::FCFIDS)
13814                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
13815                                                             : PPCISD::FCFID);
13816   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13817                   ? MVT::f32
13818                   : MVT::f64;
13819 
13820   // If we're converting from a float, to an int, and back to a float again,
13821   // then we don't need the store/load pair at all.
13822   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
13823        Subtarget.hasFPCVT()) ||
13824       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
13825     SDValue Src = Op.getOperand(0).getOperand(0);
13826     if (Src.getValueType() == MVT::f32) {
13827       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
13828       DCI.AddToWorklist(Src.getNode());
13829     } else if (Src.getValueType() != MVT::f64) {
13830       // Make sure that we don't pick up a ppc_fp128 source value.
13831       return SDValue();
13832     }
13833 
13834     unsigned FCTOp =
13835       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
13836                                                         PPCISD::FCTIDUZ;
13837 
13838     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
13839     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
13840 
13841     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
13842       FP = DAG.getNode(ISD::FP_ROUND, dl,
13843                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
13844       DCI.AddToWorklist(FP.getNode());
13845     }
13846 
13847     return FP;
13848   }
13849 
13850   return SDValue();
13851 }
13852 
13853 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
13854 // builtins) into loads with swaps.
13855 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
13856                                               DAGCombinerInfo &DCI) const {
13857   SelectionDAG &DAG = DCI.DAG;
13858   SDLoc dl(N);
13859   SDValue Chain;
13860   SDValue Base;
13861   MachineMemOperand *MMO;
13862 
13863   switch (N->getOpcode()) {
13864   default:
13865     llvm_unreachable("Unexpected opcode for little endian VSX load");
13866   case ISD::LOAD: {
13867     LoadSDNode *LD = cast<LoadSDNode>(N);
13868     Chain = LD->getChain();
13869     Base = LD->getBasePtr();
13870     MMO = LD->getMemOperand();
13871     // If the MMO suggests this isn't a load of a full vector, leave
13872     // things alone.  For a built-in, we have to make the change for
13873     // correctness, so if there is a size problem that will be a bug.
13874     if (MMO->getSize() < 16)
13875       return SDValue();
13876     break;
13877   }
13878   case ISD::INTRINSIC_W_CHAIN: {
13879     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13880     Chain = Intrin->getChain();
13881     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
13882     // us what we want. Get operand 2 instead.
13883     Base = Intrin->getOperand(2);
13884     MMO = Intrin->getMemOperand();
13885     break;
13886   }
13887   }
13888 
13889   MVT VecTy = N->getValueType(0).getSimpleVT();
13890 
13891   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
13892   // aligned and the type is a vector with elements up to 4 bytes
13893   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13894       VecTy.getScalarSizeInBits() <= 32) {
13895     return SDValue();
13896   }
13897 
13898   SDValue LoadOps[] = { Chain, Base };
13899   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
13900                                          DAG.getVTList(MVT::v2f64, MVT::Other),
13901                                          LoadOps, MVT::v2f64, MMO);
13902 
13903   DCI.AddToWorklist(Load.getNode());
13904   Chain = Load.getValue(1);
13905   SDValue Swap = DAG.getNode(
13906       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
13907   DCI.AddToWorklist(Swap.getNode());
13908 
13909   // Add a bitcast if the resulting load type doesn't match v2f64.
13910   if (VecTy != MVT::v2f64) {
13911     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
13912     DCI.AddToWorklist(N.getNode());
13913     // Package {bitcast value, swap's chain} to match Load's shape.
13914     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
13915                        N, Swap.getValue(1));
13916   }
13917 
13918   return Swap;
13919 }
13920 
13921 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13922 // builtins) into stores with swaps.
13923 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
13924                                                DAGCombinerInfo &DCI) const {
13925   SelectionDAG &DAG = DCI.DAG;
13926   SDLoc dl(N);
13927   SDValue Chain;
13928   SDValue Base;
13929   unsigned SrcOpnd;
13930   MachineMemOperand *MMO;
13931 
13932   switch (N->getOpcode()) {
13933   default:
13934     llvm_unreachable("Unexpected opcode for little endian VSX store");
13935   case ISD::STORE: {
13936     StoreSDNode *ST = cast<StoreSDNode>(N);
13937     Chain = ST->getChain();
13938     Base = ST->getBasePtr();
13939     MMO = ST->getMemOperand();
13940     SrcOpnd = 1;
13941     // If the MMO suggests this isn't a store of a full vector, leave
13942     // things alone.  For a built-in, we have to make the change for
13943     // correctness, so if there is a size problem that will be a bug.
13944     if (MMO->getSize() < 16)
13945       return SDValue();
13946     break;
13947   }
13948   case ISD::INTRINSIC_VOID: {
13949     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13950     Chain = Intrin->getChain();
13951     // Intrin->getBasePtr() oddly does not get what we want.
13952     Base = Intrin->getOperand(3);
13953     MMO = Intrin->getMemOperand();
13954     SrcOpnd = 2;
13955     break;
13956   }
13957   }
13958 
13959   SDValue Src = N->getOperand(SrcOpnd);
13960   MVT VecTy = Src.getValueType().getSimpleVT();
13961 
13962   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13963   // aligned and the type is a vector with elements up to 4 bytes
13964   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13965       VecTy.getScalarSizeInBits() <= 32) {
13966     return SDValue();
13967   }
13968 
13969   // All stores are done as v2f64 and possible bit cast.
13970   if (VecTy != MVT::v2f64) {
13971     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13972     DCI.AddToWorklist(Src.getNode());
13973   }
13974 
13975   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13976                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13977   DCI.AddToWorklist(Swap.getNode());
13978   Chain = Swap.getValue(1);
13979   SDValue StoreOps[] = { Chain, Swap, Base };
13980   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
13981                                           DAG.getVTList(MVT::Other),
13982                                           StoreOps, VecTy, MMO);
13983   DCI.AddToWorklist(Store.getNode());
13984   return Store;
13985 }
13986 
13987 // Handle DAG combine for STORE (FP_TO_INT F).
13988 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13989                                                DAGCombinerInfo &DCI) const {
13990 
13991   SelectionDAG &DAG = DCI.DAG;
13992   SDLoc dl(N);
13993   unsigned Opcode = N->getOperand(1).getOpcode();
13994 
13995   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
13996          && "Not a FP_TO_INT Instruction!");
13997 
13998   SDValue Val = N->getOperand(1).getOperand(0);
13999   EVT Op1VT = N->getOperand(1).getValueType();
14000   EVT ResVT = Val.getValueType();
14001 
14002   // Floating point types smaller than 32 bits are not legal on Power.
14003   if (ResVT.getScalarSizeInBits() < 32)
14004     return SDValue();
14005 
14006   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14007   bool ValidTypeForStoreFltAsInt =
14008         (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14009          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14010 
14011   if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14012       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14013     return SDValue();
14014 
14015   // Extend f32 values to f64
14016   if (ResVT.getScalarSizeInBits() == 32) {
14017     Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14018     DCI.AddToWorklist(Val.getNode());
14019   }
14020 
14021   // Set signed or unsigned conversion opcode.
14022   unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14023                           PPCISD::FP_TO_SINT_IN_VSR :
14024                           PPCISD::FP_TO_UINT_IN_VSR;
14025 
14026   Val = DAG.getNode(ConvOpcode,
14027                     dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14028   DCI.AddToWorklist(Val.getNode());
14029 
14030   // Set number of bytes being converted.
14031   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14032   SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14033                     DAG.getIntPtrConstant(ByteSize, dl, false),
14034                     DAG.getValueType(Op1VT) };
14035 
14036   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
14037           DAG.getVTList(MVT::Other), Ops,
14038           cast<StoreSDNode>(N)->getMemoryVT(),
14039           cast<StoreSDNode>(N)->getMemOperand());
14040 
14041   DCI.AddToWorklist(Val.getNode());
14042   return Val;
14043 }
14044 
14045 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14046                                                 LSBaseSDNode *LSBase,
14047                                                 DAGCombinerInfo &DCI) const {
14048   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14049         "Not a reverse memop pattern!");
14050 
14051   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14052     auto Mask = SVN->getMask();
14053     int i = 0;
14054     auto I = Mask.rbegin();
14055     auto E = Mask.rend();
14056 
14057     for (; I != E; ++I) {
14058       if (*I != i)
14059         return false;
14060       i++;
14061     }
14062     return true;
14063   };
14064 
14065   SelectionDAG &DAG = DCI.DAG;
14066   EVT VT = SVN->getValueType(0);
14067 
14068   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14069     return SDValue();
14070 
14071   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14072   // See comment in PPCVSXSwapRemoval.cpp.
14073   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14074   if (!Subtarget.hasP9Vector())
14075     return SDValue();
14076 
14077   if(!IsElementReverse(SVN))
14078     return SDValue();
14079 
14080   if (LSBase->getOpcode() == ISD::LOAD) {
14081     SDLoc dl(SVN);
14082     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14083     return DAG.getMemIntrinsicNode(
14084         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14085         LSBase->getMemoryVT(), LSBase->getMemOperand());
14086   }
14087 
14088   if (LSBase->getOpcode() == ISD::STORE) {
14089     SDLoc dl(LSBase);
14090     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14091                           LSBase->getBasePtr()};
14092     return DAG.getMemIntrinsicNode(
14093         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14094         LSBase->getMemoryVT(), LSBase->getMemOperand());
14095   }
14096 
14097   llvm_unreachable("Expected a load or store node here");
14098 }
14099 
14100 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
14101                                              DAGCombinerInfo &DCI) const {
14102   SelectionDAG &DAG = DCI.DAG;
14103   SDLoc dl(N);
14104   switch (N->getOpcode()) {
14105   default: break;
14106   case ISD::ADD:
14107     return combineADD(N, DCI);
14108   case ISD::SHL:
14109     return combineSHL(N, DCI);
14110   case ISD::SRA:
14111     return combineSRA(N, DCI);
14112   case ISD::SRL:
14113     return combineSRL(N, DCI);
14114   case ISD::MUL:
14115     return combineMUL(N, DCI);
14116   case PPCISD::SHL:
14117     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14118         return N->getOperand(0);
14119     break;
14120   case PPCISD::SRL:
14121     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14122         return N->getOperand(0);
14123     break;
14124   case PPCISD::SRA:
14125     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14126       if (C->isNullValue() ||   //  0 >>s V -> 0.
14127           C->isAllOnesValue())    // -1 >>s V -> -1.
14128         return N->getOperand(0);
14129     }
14130     break;
14131   case ISD::SIGN_EXTEND:
14132   case ISD::ZERO_EXTEND:
14133   case ISD::ANY_EXTEND:
14134     return DAGCombineExtBoolTrunc(N, DCI);
14135   case ISD::TRUNCATE:
14136     return combineTRUNCATE(N, DCI);
14137   case ISD::SETCC:
14138     if (SDValue CSCC = combineSetCC(N, DCI))
14139       return CSCC;
14140     LLVM_FALLTHROUGH;
14141   case ISD::SELECT_CC:
14142     return DAGCombineTruncBoolExt(N, DCI);
14143   case ISD::SINT_TO_FP:
14144   case ISD::UINT_TO_FP:
14145     return combineFPToIntToFP(N, DCI);
14146   case ISD::VECTOR_SHUFFLE:
14147     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14148       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14149       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14150     }
14151     break;
14152   case ISD::STORE: {
14153 
14154     EVT Op1VT = N->getOperand(1).getValueType();
14155     unsigned Opcode = N->getOperand(1).getOpcode();
14156 
14157     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14158       SDValue Val= combineStoreFPToInt(N, DCI);
14159       if (Val)
14160         return Val;
14161     }
14162 
14163     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14164       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14165       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14166       if (Val)
14167         return Val;
14168     }
14169 
14170     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14171     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14172         N->getOperand(1).getNode()->hasOneUse() &&
14173         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14174          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14175 
14176       // STBRX can only handle simple types and it makes no sense to store less
14177       // two bytes in byte-reversed order.
14178       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14179       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14180         break;
14181 
14182       SDValue BSwapOp = N->getOperand(1).getOperand(0);
14183       // Do an any-extend to 32-bits if this is a half-word input.
14184       if (BSwapOp.getValueType() == MVT::i16)
14185         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14186 
14187       // If the type of BSWAP operand is wider than stored memory width
14188       // it need to be shifted to the right side before STBRX.
14189       if (Op1VT.bitsGT(mVT)) {
14190         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14191         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14192                               DAG.getConstant(Shift, dl, MVT::i32));
14193         // Need to truncate if this is a bswap of i64 stored as i32/i16.
14194         if (Op1VT == MVT::i64)
14195           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14196       }
14197 
14198       SDValue Ops[] = {
14199         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14200       };
14201       return
14202         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
14203                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14204                                 cast<StoreSDNode>(N)->getMemOperand());
14205     }
14206 
14207     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
14208     // So it can increase the chance of CSE constant construction.
14209     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14210         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14211       // Need to sign-extended to 64-bits to handle negative values.
14212       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14213       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14214                                     MemVT.getSizeInBits());
14215       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14216 
14217       // DAG.getTruncStore() can't be used here because it doesn't accept
14218       // the general (base + offset) addressing mode.
14219       // So we use UpdateNodeOperands and setTruncatingStore instead.
14220       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14221                              N->getOperand(3));
14222       cast<StoreSDNode>(N)->setTruncatingStore(true);
14223       return SDValue(N, 0);
14224     }
14225 
14226     // For little endian, VSX stores require generating xxswapd/lxvd2x.
14227     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14228     if (Op1VT.isSimple()) {
14229       MVT StoreVT = Op1VT.getSimpleVT();
14230       if (Subtarget.needsSwapsForVSXMemOps() &&
14231           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14232            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14233         return expandVSXStoreForLE(N, DCI);
14234     }
14235     break;
14236   }
14237   case ISD::LOAD: {
14238     LoadSDNode *LD = cast<LoadSDNode>(N);
14239     EVT VT = LD->getValueType(0);
14240 
14241     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14242     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14243     if (VT.isSimple()) {
14244       MVT LoadVT = VT.getSimpleVT();
14245       if (Subtarget.needsSwapsForVSXMemOps() &&
14246           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14247            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14248         return expandVSXLoadForLE(N, DCI);
14249     }
14250 
14251     // We sometimes end up with a 64-bit integer load, from which we extract
14252     // two single-precision floating-point numbers. This happens with
14253     // std::complex<float>, and other similar structures, because of the way we
14254     // canonicalize structure copies. However, if we lack direct moves,
14255     // then the final bitcasts from the extracted integer values to the
14256     // floating-point numbers turn into store/load pairs. Even with direct moves,
14257     // just loading the two floating-point numbers is likely better.
14258     auto ReplaceTwoFloatLoad = [&]() {
14259       if (VT != MVT::i64)
14260         return false;
14261 
14262       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14263           LD->isVolatile())
14264         return false;
14265 
14266       //  We're looking for a sequence like this:
14267       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14268       //      t16: i64 = srl t13, Constant:i32<32>
14269       //    t17: i32 = truncate t16
14270       //  t18: f32 = bitcast t17
14271       //    t19: i32 = truncate t13
14272       //  t20: f32 = bitcast t19
14273 
14274       if (!LD->hasNUsesOfValue(2, 0))
14275         return false;
14276 
14277       auto UI = LD->use_begin();
14278       while (UI.getUse().getResNo() != 0) ++UI;
14279       SDNode *Trunc = *UI++;
14280       while (UI.getUse().getResNo() != 0) ++UI;
14281       SDNode *RightShift = *UI;
14282       if (Trunc->getOpcode() != ISD::TRUNCATE)
14283         std::swap(Trunc, RightShift);
14284 
14285       if (Trunc->getOpcode() != ISD::TRUNCATE ||
14286           Trunc->getValueType(0) != MVT::i32 ||
14287           !Trunc->hasOneUse())
14288         return false;
14289       if (RightShift->getOpcode() != ISD::SRL ||
14290           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14291           RightShift->getConstantOperandVal(1) != 32 ||
14292           !RightShift->hasOneUse())
14293         return false;
14294 
14295       SDNode *Trunc2 = *RightShift->use_begin();
14296       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14297           Trunc2->getValueType(0) != MVT::i32 ||
14298           !Trunc2->hasOneUse())
14299         return false;
14300 
14301       SDNode *Bitcast = *Trunc->use_begin();
14302       SDNode *Bitcast2 = *Trunc2->use_begin();
14303 
14304       if (Bitcast->getOpcode() != ISD::BITCAST ||
14305           Bitcast->getValueType(0) != MVT::f32)
14306         return false;
14307       if (Bitcast2->getOpcode() != ISD::BITCAST ||
14308           Bitcast2->getValueType(0) != MVT::f32)
14309         return false;
14310 
14311       if (Subtarget.isLittleEndian())
14312         std::swap(Bitcast, Bitcast2);
14313 
14314       // Bitcast has the second float (in memory-layout order) and Bitcast2
14315       // has the first one.
14316 
14317       SDValue BasePtr = LD->getBasePtr();
14318       if (LD->isIndexed()) {
14319         assert(LD->getAddressingMode() == ISD::PRE_INC &&
14320                "Non-pre-inc AM on PPC?");
14321         BasePtr =
14322           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14323                       LD->getOffset());
14324       }
14325 
14326       auto MMOFlags =
14327           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14328       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14329                                       LD->getPointerInfo(), LD->getAlignment(),
14330                                       MMOFlags, LD->getAAInfo());
14331       SDValue AddPtr =
14332         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14333                     BasePtr, DAG.getIntPtrConstant(4, dl));
14334       SDValue FloatLoad2 = DAG.getLoad(
14335           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14336           LD->getPointerInfo().getWithOffset(4),
14337           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14338 
14339       if (LD->isIndexed()) {
14340         // Note that DAGCombine should re-form any pre-increment load(s) from
14341         // what is produced here if that makes sense.
14342         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14343       }
14344 
14345       DCI.CombineTo(Bitcast2, FloatLoad);
14346       DCI.CombineTo(Bitcast, FloatLoad2);
14347 
14348       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14349                                     SDValue(FloatLoad2.getNode(), 1));
14350       return true;
14351     };
14352 
14353     if (ReplaceTwoFloatLoad())
14354       return SDValue(N, 0);
14355 
14356     EVT MemVT = LD->getMemoryVT();
14357     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14358     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
14359     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
14360     unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
14361     if (LD->isUnindexed() && VT.isVector() &&
14362         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14363           // P8 and later hardware should just use LOAD.
14364           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
14365                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
14366          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
14367           LD->getAlignment() >= ScalarABIAlignment)) &&
14368         LD->getAlignment() < ABIAlignment) {
14369       // This is a type-legal unaligned Altivec or QPX load.
14370       SDValue Chain = LD->getChain();
14371       SDValue Ptr = LD->getBasePtr();
14372       bool isLittleEndian = Subtarget.isLittleEndian();
14373 
14374       // This implements the loading of unaligned vectors as described in
14375       // the venerable Apple Velocity Engine overview. Specifically:
14376       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14377       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14378       //
14379       // The general idea is to expand a sequence of one or more unaligned
14380       // loads into an alignment-based permutation-control instruction (lvsl
14381       // or lvsr), a series of regular vector loads (which always truncate
14382       // their input address to an aligned address), and a series of
14383       // permutations.  The results of these permutations are the requested
14384       // loaded values.  The trick is that the last "extra" load is not taken
14385       // from the address you might suspect (sizeof(vector) bytes after the
14386       // last requested load), but rather sizeof(vector) - 1 bytes after the
14387       // last requested vector. The point of this is to avoid a page fault if
14388       // the base address happened to be aligned. This works because if the
14389       // base address is aligned, then adding less than a full vector length
14390       // will cause the last vector in the sequence to be (re)loaded.
14391       // Otherwise, the next vector will be fetched as you might suspect was
14392       // necessary.
14393 
14394       // We might be able to reuse the permutation generation from
14395       // a different base address offset from this one by an aligned amount.
14396       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14397       // optimization later.
14398       Intrinsic::ID Intr, IntrLD, IntrPerm;
14399       MVT PermCntlTy, PermTy, LDTy;
14400       if (Subtarget.hasAltivec()) {
14401         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
14402                                  Intrinsic::ppc_altivec_lvsl;
14403         IntrLD = Intrinsic::ppc_altivec_lvx;
14404         IntrPerm = Intrinsic::ppc_altivec_vperm;
14405         PermCntlTy = MVT::v16i8;
14406         PermTy = MVT::v4i32;
14407         LDTy = MVT::v4i32;
14408       } else {
14409         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
14410                                        Intrinsic::ppc_qpx_qvlpcls;
14411         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
14412                                        Intrinsic::ppc_qpx_qvlfs;
14413         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
14414         PermCntlTy = MVT::v4f64;
14415         PermTy = MVT::v4f64;
14416         LDTy = MemVT.getSimpleVT();
14417       }
14418 
14419       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
14420 
14421       // Create the new MMO for the new base load. It is like the original MMO,
14422       // but represents an area in memory almost twice the vector size centered
14423       // on the original address. If the address is unaligned, we might start
14424       // reading up to (sizeof(vector)-1) bytes below the address of the
14425       // original unaligned load.
14426       MachineFunction &MF = DAG.getMachineFunction();
14427       MachineMemOperand *BaseMMO =
14428         MF.getMachineMemOperand(LD->getMemOperand(),
14429                                 -(long)MemVT.getStoreSize()+1,
14430                                 2*MemVT.getStoreSize()-1);
14431 
14432       // Create the new base load.
14433       SDValue LDXIntID =
14434           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
14435       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14436       SDValue BaseLoad =
14437         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14438                                 DAG.getVTList(PermTy, MVT::Other),
14439                                 BaseLoadOps, LDTy, BaseMMO);
14440 
14441       // Note that the value of IncOffset (which is provided to the next
14442       // load's pointer info offset value, and thus used to calculate the
14443       // alignment), and the value of IncValue (which is actually used to
14444       // increment the pointer value) are different! This is because we
14445       // require the next load to appear to be aligned, even though it
14446       // is actually offset from the base pointer by a lesser amount.
14447       int IncOffset = VT.getSizeInBits() / 8;
14448       int IncValue = IncOffset;
14449 
14450       // Walk (both up and down) the chain looking for another load at the real
14451       // (aligned) offset (the alignment of the other load does not matter in
14452       // this case). If found, then do not use the offset reduction trick, as
14453       // that will prevent the loads from being later combined (as they would
14454       // otherwise be duplicates).
14455       if (!findConsecutiveLoad(LD, DAG))
14456         --IncValue;
14457 
14458       SDValue Increment =
14459           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
14460       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
14461 
14462       MachineMemOperand *ExtraMMO =
14463         MF.getMachineMemOperand(LD->getMemOperand(),
14464                                 1, 2*MemVT.getStoreSize()-1);
14465       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
14466       SDValue ExtraLoad =
14467         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14468                                 DAG.getVTList(PermTy, MVT::Other),
14469                                 ExtraLoadOps, LDTy, ExtraMMO);
14470 
14471       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
14472         BaseLoad.getValue(1), ExtraLoad.getValue(1));
14473 
14474       // Because vperm has a big-endian bias, we must reverse the order
14475       // of the input vectors and complement the permute control vector
14476       // when generating little endian code.  We have already handled the
14477       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
14478       // and ExtraLoad here.
14479       SDValue Perm;
14480       if (isLittleEndian)
14481         Perm = BuildIntrinsicOp(IntrPerm,
14482                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
14483       else
14484         Perm = BuildIntrinsicOp(IntrPerm,
14485                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
14486 
14487       if (VT != PermTy)
14488         Perm = Subtarget.hasAltivec() ?
14489                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
14490                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
14491                                DAG.getTargetConstant(1, dl, MVT::i64));
14492                                // second argument is 1 because this rounding
14493                                // is always exact.
14494 
14495       // The output of the permutation is our loaded result, the TokenFactor is
14496       // our new chain.
14497       DCI.CombineTo(N, Perm, TF);
14498       return SDValue(N, 0);
14499     }
14500     }
14501     break;
14502     case ISD::INTRINSIC_WO_CHAIN: {
14503       bool isLittleEndian = Subtarget.isLittleEndian();
14504       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
14505       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14506                                            : Intrinsic::ppc_altivec_lvsl);
14507       if ((IID == Intr ||
14508            IID == Intrinsic::ppc_qpx_qvlpcld  ||
14509            IID == Intrinsic::ppc_qpx_qvlpcls) &&
14510         N->getOperand(1)->getOpcode() == ISD::ADD) {
14511         SDValue Add = N->getOperand(1);
14512 
14513         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
14514                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
14515 
14516         if (DAG.MaskedValueIsZero(Add->getOperand(1),
14517                                   APInt::getAllOnesValue(Bits /* alignment */)
14518                                       .zext(Add.getScalarValueSizeInBits()))) {
14519           SDNode *BasePtr = Add->getOperand(0).getNode();
14520           for (SDNode::use_iterator UI = BasePtr->use_begin(),
14521                                     UE = BasePtr->use_end();
14522                UI != UE; ++UI) {
14523             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14524                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
14525               // We've found another LVSL/LVSR, and this address is an aligned
14526               // multiple of that one. The results will be the same, so use the
14527               // one we've just found instead.
14528 
14529               return SDValue(*UI, 0);
14530             }
14531           }
14532         }
14533 
14534         if (isa<ConstantSDNode>(Add->getOperand(1))) {
14535           SDNode *BasePtr = Add->getOperand(0).getNode();
14536           for (SDNode::use_iterator UI = BasePtr->use_begin(),
14537                UE = BasePtr->use_end(); UI != UE; ++UI) {
14538             if (UI->getOpcode() == ISD::ADD &&
14539                 isa<ConstantSDNode>(UI->getOperand(1)) &&
14540                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
14541                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
14542                 (1ULL << Bits) == 0) {
14543               SDNode *OtherAdd = *UI;
14544               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
14545                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
14546                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14547                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
14548                   return SDValue(*VI, 0);
14549                 }
14550               }
14551             }
14552           }
14553         }
14554       }
14555 
14556       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
14557       // Expose the vabsduw/h/b opportunity for down stream
14558       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
14559           (IID == Intrinsic::ppc_altivec_vmaxsw ||
14560            IID == Intrinsic::ppc_altivec_vmaxsh ||
14561            IID == Intrinsic::ppc_altivec_vmaxsb)) {
14562         SDValue V1 = N->getOperand(1);
14563         SDValue V2 = N->getOperand(2);
14564         if ((V1.getSimpleValueType() == MVT::v4i32 ||
14565              V1.getSimpleValueType() == MVT::v8i16 ||
14566              V1.getSimpleValueType() == MVT::v16i8) &&
14567             V1.getSimpleValueType() == V2.getSimpleValueType()) {
14568           // (0-a, a)
14569           if (V1.getOpcode() == ISD::SUB &&
14570               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
14571               V1.getOperand(1) == V2) {
14572             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
14573           }
14574           // (a, 0-a)
14575           if (V2.getOpcode() == ISD::SUB &&
14576               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
14577               V2.getOperand(1) == V1) {
14578             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14579           }
14580           // (x-y, y-x)
14581           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
14582               V1.getOperand(0) == V2.getOperand(1) &&
14583               V1.getOperand(1) == V2.getOperand(0)) {
14584             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14585           }
14586         }
14587       }
14588     }
14589 
14590     break;
14591   case ISD::INTRINSIC_W_CHAIN:
14592     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14593     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14594     if (Subtarget.needsSwapsForVSXMemOps()) {
14595       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14596       default:
14597         break;
14598       case Intrinsic::ppc_vsx_lxvw4x:
14599       case Intrinsic::ppc_vsx_lxvd2x:
14600         return expandVSXLoadForLE(N, DCI);
14601       }
14602     }
14603     break;
14604   case ISD::INTRINSIC_VOID:
14605     // For little endian, VSX stores require generating xxswapd/stxvd2x.
14606     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14607     if (Subtarget.needsSwapsForVSXMemOps()) {
14608       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14609       default:
14610         break;
14611       case Intrinsic::ppc_vsx_stxvw4x:
14612       case Intrinsic::ppc_vsx_stxvd2x:
14613         return expandVSXStoreForLE(N, DCI);
14614       }
14615     }
14616     break;
14617   case ISD::BSWAP:
14618     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
14619     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
14620         N->getOperand(0).hasOneUse() &&
14621         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
14622          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
14623           N->getValueType(0) == MVT::i64))) {
14624       SDValue Load = N->getOperand(0);
14625       LoadSDNode *LD = cast<LoadSDNode>(Load);
14626       // Create the byte-swapping load.
14627       SDValue Ops[] = {
14628         LD->getChain(),    // Chain
14629         LD->getBasePtr(),  // Ptr
14630         DAG.getValueType(N->getValueType(0)) // VT
14631       };
14632       SDValue BSLoad =
14633         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
14634                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
14635                                               MVT::i64 : MVT::i32, MVT::Other),
14636                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
14637 
14638       // If this is an i16 load, insert the truncate.
14639       SDValue ResVal = BSLoad;
14640       if (N->getValueType(0) == MVT::i16)
14641         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
14642 
14643       // First, combine the bswap away.  This makes the value produced by the
14644       // load dead.
14645       DCI.CombineTo(N, ResVal);
14646 
14647       // Next, combine the load away, we give it a bogus result value but a real
14648       // chain result.  The result value is dead because the bswap is dead.
14649       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
14650 
14651       // Return N so it doesn't get rechecked!
14652       return SDValue(N, 0);
14653     }
14654     break;
14655   case PPCISD::VCMP:
14656     // If a VCMPo node already exists with exactly the same operands as this
14657     // node, use its result instead of this node (VCMPo computes both a CR6 and
14658     // a normal output).
14659     //
14660     if (!N->getOperand(0).hasOneUse() &&
14661         !N->getOperand(1).hasOneUse() &&
14662         !N->getOperand(2).hasOneUse()) {
14663 
14664       // Scan all of the users of the LHS, looking for VCMPo's that match.
14665       SDNode *VCMPoNode = nullptr;
14666 
14667       SDNode *LHSN = N->getOperand(0).getNode();
14668       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
14669            UI != E; ++UI)
14670         if (UI->getOpcode() == PPCISD::VCMPo &&
14671             UI->getOperand(1) == N->getOperand(1) &&
14672             UI->getOperand(2) == N->getOperand(2) &&
14673             UI->getOperand(0) == N->getOperand(0)) {
14674           VCMPoNode = *UI;
14675           break;
14676         }
14677 
14678       // If there is no VCMPo node, or if the flag value has a single use, don't
14679       // transform this.
14680       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
14681         break;
14682 
14683       // Look at the (necessarily single) use of the flag value.  If it has a
14684       // chain, this transformation is more complex.  Note that multiple things
14685       // could use the value result, which we should ignore.
14686       SDNode *FlagUser = nullptr;
14687       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
14688            FlagUser == nullptr; ++UI) {
14689         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
14690         SDNode *User = *UI;
14691         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
14692           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
14693             FlagUser = User;
14694             break;
14695           }
14696         }
14697       }
14698 
14699       // If the user is a MFOCRF instruction, we know this is safe.
14700       // Otherwise we give up for right now.
14701       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
14702         return SDValue(VCMPoNode, 0);
14703     }
14704     break;
14705   case ISD::BRCOND: {
14706     SDValue Cond = N->getOperand(1);
14707     SDValue Target = N->getOperand(2);
14708 
14709     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14710         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
14711           Intrinsic::loop_decrement) {
14712 
14713       // We now need to make the intrinsic dead (it cannot be instruction
14714       // selected).
14715       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
14716       assert(Cond.getNode()->hasOneUse() &&
14717              "Counter decrement has more than one use");
14718 
14719       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
14720                          N->getOperand(0), Target);
14721     }
14722   }
14723   break;
14724   case ISD::BR_CC: {
14725     // If this is a branch on an altivec predicate comparison, lower this so
14726     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
14727     // lowering is done pre-legalize, because the legalizer lowers the predicate
14728     // compare down to code that is difficult to reassemble.
14729     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
14730     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
14731 
14732     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
14733     // value. If so, pass-through the AND to get to the intrinsic.
14734     if (LHS.getOpcode() == ISD::AND &&
14735         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14736         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
14737           Intrinsic::loop_decrement &&
14738         isa<ConstantSDNode>(LHS.getOperand(1)) &&
14739         !isNullConstant(LHS.getOperand(1)))
14740       LHS = LHS.getOperand(0);
14741 
14742     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14743         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
14744           Intrinsic::loop_decrement &&
14745         isa<ConstantSDNode>(RHS)) {
14746       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
14747              "Counter decrement comparison is not EQ or NE");
14748 
14749       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14750       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
14751                     (CC == ISD::SETNE && !Val);
14752 
14753       // We now need to make the intrinsic dead (it cannot be instruction
14754       // selected).
14755       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
14756       assert(LHS.getNode()->hasOneUse() &&
14757              "Counter decrement has more than one use");
14758 
14759       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
14760                          N->getOperand(0), N->getOperand(4));
14761     }
14762 
14763     int CompareOpc;
14764     bool isDot;
14765 
14766     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14767         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
14768         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
14769       assert(isDot && "Can't compare against a vector result!");
14770 
14771       // If this is a comparison against something other than 0/1, then we know
14772       // that the condition is never/always true.
14773       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14774       if (Val != 0 && Val != 1) {
14775         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
14776           return N->getOperand(0);
14777         // Always !=, turn it into an unconditional branch.
14778         return DAG.getNode(ISD::BR, dl, MVT::Other,
14779                            N->getOperand(0), N->getOperand(4));
14780       }
14781 
14782       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
14783 
14784       // Create the PPCISD altivec 'dot' comparison node.
14785       SDValue Ops[] = {
14786         LHS.getOperand(2),  // LHS of compare
14787         LHS.getOperand(3),  // RHS of compare
14788         DAG.getConstant(CompareOpc, dl, MVT::i32)
14789       };
14790       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
14791       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
14792 
14793       // Unpack the result based on how the target uses it.
14794       PPC::Predicate CompOpc;
14795       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
14796       default:  // Can't happen, don't crash on invalid number though.
14797       case 0:   // Branch on the value of the EQ bit of CR6.
14798         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
14799         break;
14800       case 1:   // Branch on the inverted value of the EQ bit of CR6.
14801         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
14802         break;
14803       case 2:   // Branch on the value of the LT bit of CR6.
14804         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
14805         break;
14806       case 3:   // Branch on the inverted value of the LT bit of CR6.
14807         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
14808         break;
14809       }
14810 
14811       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
14812                          DAG.getConstant(CompOpc, dl, MVT::i32),
14813                          DAG.getRegister(PPC::CR6, MVT::i32),
14814                          N->getOperand(4), CompNode.getValue(1));
14815     }
14816     break;
14817   }
14818   case ISD::BUILD_VECTOR:
14819     return DAGCombineBuildVector(N, DCI);
14820   case ISD::ABS:
14821     return combineABS(N, DCI);
14822   case ISD::VSELECT:
14823     return combineVSelect(N, DCI);
14824   }
14825 
14826   return SDValue();
14827 }
14828 
14829 SDValue
14830 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
14831                                  SelectionDAG &DAG,
14832                                  SmallVectorImpl<SDNode *> &Created) const {
14833   // fold (sdiv X, pow2)
14834   EVT VT = N->getValueType(0);
14835   if (VT == MVT::i64 && !Subtarget.isPPC64())
14836     return SDValue();
14837   if ((VT != MVT::i32 && VT != MVT::i64) ||
14838       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
14839     return SDValue();
14840 
14841   SDLoc DL(N);
14842   SDValue N0 = N->getOperand(0);
14843 
14844   bool IsNegPow2 = (-Divisor).isPowerOf2();
14845   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
14846   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
14847 
14848   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
14849   Created.push_back(Op.getNode());
14850 
14851   if (IsNegPow2) {
14852     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
14853     Created.push_back(Op.getNode());
14854   }
14855 
14856   return Op;
14857 }
14858 
14859 //===----------------------------------------------------------------------===//
14860 // Inline Assembly Support
14861 //===----------------------------------------------------------------------===//
14862 
14863 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
14864                                                       KnownBits &Known,
14865                                                       const APInt &DemandedElts,
14866                                                       const SelectionDAG &DAG,
14867                                                       unsigned Depth) const {
14868   Known.resetAll();
14869   switch (Op.getOpcode()) {
14870   default: break;
14871   case PPCISD::LBRX: {
14872     // lhbrx is known to have the top bits cleared out.
14873     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
14874       Known.Zero = 0xFFFF0000;
14875     break;
14876   }
14877   case ISD::INTRINSIC_WO_CHAIN: {
14878     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
14879     default: break;
14880     case Intrinsic::ppc_altivec_vcmpbfp_p:
14881     case Intrinsic::ppc_altivec_vcmpeqfp_p:
14882     case Intrinsic::ppc_altivec_vcmpequb_p:
14883     case Intrinsic::ppc_altivec_vcmpequh_p:
14884     case Intrinsic::ppc_altivec_vcmpequw_p:
14885     case Intrinsic::ppc_altivec_vcmpequd_p:
14886     case Intrinsic::ppc_altivec_vcmpgefp_p:
14887     case Intrinsic::ppc_altivec_vcmpgtfp_p:
14888     case Intrinsic::ppc_altivec_vcmpgtsb_p:
14889     case Intrinsic::ppc_altivec_vcmpgtsh_p:
14890     case Intrinsic::ppc_altivec_vcmpgtsw_p:
14891     case Intrinsic::ppc_altivec_vcmpgtsd_p:
14892     case Intrinsic::ppc_altivec_vcmpgtub_p:
14893     case Intrinsic::ppc_altivec_vcmpgtuh_p:
14894     case Intrinsic::ppc_altivec_vcmpgtuw_p:
14895     case Intrinsic::ppc_altivec_vcmpgtud_p:
14896       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
14897       break;
14898     }
14899   }
14900   }
14901 }
14902 
14903 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
14904   switch (Subtarget.getCPUDirective()) {
14905   default: break;
14906   case PPC::DIR_970:
14907   case PPC::DIR_PWR4:
14908   case PPC::DIR_PWR5:
14909   case PPC::DIR_PWR5X:
14910   case PPC::DIR_PWR6:
14911   case PPC::DIR_PWR6X:
14912   case PPC::DIR_PWR7:
14913   case PPC::DIR_PWR8:
14914   case PPC::DIR_PWR9:
14915   case PPC::DIR_PWR_FUTURE: {
14916     if (!ML)
14917       break;
14918 
14919     if (!DisableInnermostLoopAlign32) {
14920       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14921       // so that we can decrease cache misses and branch-prediction misses.
14922       // Actual alignment of the loop will depend on the hotness check and other
14923       // logic in alignBlocks.
14924       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
14925         return Align(32);
14926     }
14927 
14928     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
14929 
14930     // For small loops (between 5 and 8 instructions), align to a 32-byte
14931     // boundary so that the entire loop fits in one instruction-cache line.
14932     uint64_t LoopSize = 0;
14933     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
14934       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
14935         LoopSize += TII->getInstSizeInBytes(*J);
14936         if (LoopSize > 32)
14937           break;
14938       }
14939 
14940     if (LoopSize > 16 && LoopSize <= 32)
14941       return Align(32);
14942 
14943     break;
14944   }
14945   }
14946 
14947   return TargetLowering::getPrefLoopAlignment(ML);
14948 }
14949 
14950 /// getConstraintType - Given a constraint, return the type of
14951 /// constraint it is for this target.
14952 PPCTargetLowering::ConstraintType
14953 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
14954   if (Constraint.size() == 1) {
14955     switch (Constraint[0]) {
14956     default: break;
14957     case 'b':
14958     case 'r':
14959     case 'f':
14960     case 'd':
14961     case 'v':
14962     case 'y':
14963       return C_RegisterClass;
14964     case 'Z':
14965       // FIXME: While Z does indicate a memory constraint, it specifically
14966       // indicates an r+r address (used in conjunction with the 'y' modifier
14967       // in the replacement string). Currently, we're forcing the base
14968       // register to be r0 in the asm printer (which is interpreted as zero)
14969       // and forming the complete address in the second register. This is
14970       // suboptimal.
14971       return C_Memory;
14972     }
14973   } else if (Constraint == "wc") { // individual CR bits.
14974     return C_RegisterClass;
14975   } else if (Constraint == "wa" || Constraint == "wd" ||
14976              Constraint == "wf" || Constraint == "ws" ||
14977              Constraint == "wi" || Constraint == "ww") {
14978     return C_RegisterClass; // VSX registers.
14979   }
14980   return TargetLowering::getConstraintType(Constraint);
14981 }
14982 
14983 /// Examine constraint type and operand type and determine a weight value.
14984 /// This object must already have been set up with the operand type
14985 /// and the current alternative constraint selected.
14986 TargetLowering::ConstraintWeight
14987 PPCTargetLowering::getSingleConstraintMatchWeight(
14988     AsmOperandInfo &info, const char *constraint) const {
14989   ConstraintWeight weight = CW_Invalid;
14990   Value *CallOperandVal = info.CallOperandVal;
14991     // If we don't have a value, we can't do a match,
14992     // but allow it at the lowest weight.
14993   if (!CallOperandVal)
14994     return CW_Default;
14995   Type *type = CallOperandVal->getType();
14996 
14997   // Look at the constraint type.
14998   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
14999     return CW_Register; // an individual CR bit.
15000   else if ((StringRef(constraint) == "wa" ||
15001             StringRef(constraint) == "wd" ||
15002             StringRef(constraint) == "wf") &&
15003            type->isVectorTy())
15004     return CW_Register;
15005   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15006     return CW_Register; // just hold 64-bit integers data.
15007   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15008     return CW_Register;
15009   else if (StringRef(constraint) == "ww" && type->isFloatTy())
15010     return CW_Register;
15011 
15012   switch (*constraint) {
15013   default:
15014     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15015     break;
15016   case 'b':
15017     if (type->isIntegerTy())
15018       weight = CW_Register;
15019     break;
15020   case 'f':
15021     if (type->isFloatTy())
15022       weight = CW_Register;
15023     break;
15024   case 'd':
15025     if (type->isDoubleTy())
15026       weight = CW_Register;
15027     break;
15028   case 'v':
15029     if (type->isVectorTy())
15030       weight = CW_Register;
15031     break;
15032   case 'y':
15033     weight = CW_Register;
15034     break;
15035   case 'Z':
15036     weight = CW_Memory;
15037     break;
15038   }
15039   return weight;
15040 }
15041 
15042 std::pair<unsigned, const TargetRegisterClass *>
15043 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
15044                                                 StringRef Constraint,
15045                                                 MVT VT) const {
15046   if (Constraint.size() == 1) {
15047     // GCC RS6000 Constraint Letters
15048     switch (Constraint[0]) {
15049     case 'b':   // R1-R31
15050       if (VT == MVT::i64 && Subtarget.isPPC64())
15051         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15052       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15053     case 'r':   // R0-R31
15054       if (VT == MVT::i64 && Subtarget.isPPC64())
15055         return std::make_pair(0U, &PPC::G8RCRegClass);
15056       return std::make_pair(0U, &PPC::GPRCRegClass);
15057     // 'd' and 'f' constraints are both defined to be "the floating point
15058     // registers", where one is for 32-bit and the other for 64-bit. We don't
15059     // really care overly much here so just give them all the same reg classes.
15060     case 'd':
15061     case 'f':
15062       if (Subtarget.hasSPE()) {
15063         if (VT == MVT::f32 || VT == MVT::i32)
15064           return std::make_pair(0U, &PPC::GPRCRegClass);
15065         if (VT == MVT::f64 || VT == MVT::i64)
15066           return std::make_pair(0U, &PPC::SPERCRegClass);
15067       } else {
15068         if (VT == MVT::f32 || VT == MVT::i32)
15069           return std::make_pair(0U, &PPC::F4RCRegClass);
15070         if (VT == MVT::f64 || VT == MVT::i64)
15071           return std::make_pair(0U, &PPC::F8RCRegClass);
15072         if (VT == MVT::v4f64 && Subtarget.hasQPX())
15073           return std::make_pair(0U, &PPC::QFRCRegClass);
15074         if (VT == MVT::v4f32 && Subtarget.hasQPX())
15075           return std::make_pair(0U, &PPC::QSRCRegClass);
15076       }
15077       break;
15078     case 'v':
15079       if (VT == MVT::v4f64 && Subtarget.hasQPX())
15080         return std::make_pair(0U, &PPC::QFRCRegClass);
15081       if (VT == MVT::v4f32 && Subtarget.hasQPX())
15082         return std::make_pair(0U, &PPC::QSRCRegClass);
15083       if (Subtarget.hasAltivec())
15084         return std::make_pair(0U, &PPC::VRRCRegClass);
15085       break;
15086     case 'y':   // crrc
15087       return std::make_pair(0U, &PPC::CRRCRegClass);
15088     }
15089   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15090     // An individual CR bit.
15091     return std::make_pair(0U, &PPC::CRBITRCRegClass);
15092   } else if ((Constraint == "wa" || Constraint == "wd" ||
15093              Constraint == "wf" || Constraint == "wi") &&
15094              Subtarget.hasVSX()) {
15095     return std::make_pair(0U, &PPC::VSRCRegClass);
15096   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15097     if (VT == MVT::f32 && Subtarget.hasP8Vector())
15098       return std::make_pair(0U, &PPC::VSSRCRegClass);
15099     else
15100       return std::make_pair(0U, &PPC::VSFRCRegClass);
15101   }
15102 
15103   // If we name a VSX register, we can't defer to the base class because it
15104   // will not recognize the correct register (their names will be VSL{0-31}
15105   // and V{0-31} so they won't match). So we match them here.
15106   if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15107     int VSNum = atoi(Constraint.data() + 3);
15108     assert(VSNum >= 0 && VSNum <= 63 &&
15109            "Attempted to access a vsr out of range");
15110     if (VSNum < 32)
15111       return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15112     return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15113   }
15114   std::pair<unsigned, const TargetRegisterClass *> R =
15115       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
15116 
15117   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15118   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15119   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15120   // register.
15121   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15122   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15123   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15124       PPC::GPRCRegClass.contains(R.first))
15125     return std::make_pair(TRI->getMatchingSuperReg(R.first,
15126                             PPC::sub_32, &PPC::G8RCRegClass),
15127                           &PPC::G8RCRegClass);
15128 
15129   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15130   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15131     R.first = PPC::CR0;
15132     R.second = &PPC::CRRCRegClass;
15133   }
15134 
15135   return R;
15136 }
15137 
15138 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15139 /// vector.  If it is invalid, don't add anything to Ops.
15140 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15141                                                      std::string &Constraint,
15142                                                      std::vector<SDValue>&Ops,
15143                                                      SelectionDAG &DAG) const {
15144   SDValue Result;
15145 
15146   // Only support length 1 constraints.
15147   if (Constraint.length() > 1) return;
15148 
15149   char Letter = Constraint[0];
15150   switch (Letter) {
15151   default: break;
15152   case 'I':
15153   case 'J':
15154   case 'K':
15155   case 'L':
15156   case 'M':
15157   case 'N':
15158   case 'O':
15159   case 'P': {
15160     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15161     if (!CST) return; // Must be an immediate to match.
15162     SDLoc dl(Op);
15163     int64_t Value = CST->getSExtValue();
15164     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15165                          // numbers are printed as such.
15166     switch (Letter) {
15167     default: llvm_unreachable("Unknown constraint letter!");
15168     case 'I':  // "I" is a signed 16-bit constant.
15169       if (isInt<16>(Value))
15170         Result = DAG.getTargetConstant(Value, dl, TCVT);
15171       break;
15172     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
15173       if (isShiftedUInt<16, 16>(Value))
15174         Result = DAG.getTargetConstant(Value, dl, TCVT);
15175       break;
15176     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
15177       if (isShiftedInt<16, 16>(Value))
15178         Result = DAG.getTargetConstant(Value, dl, TCVT);
15179       break;
15180     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
15181       if (isUInt<16>(Value))
15182         Result = DAG.getTargetConstant(Value, dl, TCVT);
15183       break;
15184     case 'M':  // "M" is a constant that is greater than 31.
15185       if (Value > 31)
15186         Result = DAG.getTargetConstant(Value, dl, TCVT);
15187       break;
15188     case 'N':  // "N" is a positive constant that is an exact power of two.
15189       if (Value > 0 && isPowerOf2_64(Value))
15190         Result = DAG.getTargetConstant(Value, dl, TCVT);
15191       break;
15192     case 'O':  // "O" is the constant zero.
15193       if (Value == 0)
15194         Result = DAG.getTargetConstant(Value, dl, TCVT);
15195       break;
15196     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
15197       if (isInt<16>(-Value))
15198         Result = DAG.getTargetConstant(Value, dl, TCVT);
15199       break;
15200     }
15201     break;
15202   }
15203   }
15204 
15205   if (Result.getNode()) {
15206     Ops.push_back(Result);
15207     return;
15208   }
15209 
15210   // Handle standard constraint letters.
15211   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15212 }
15213 
15214 // isLegalAddressingMode - Return true if the addressing mode represented
15215 // by AM is legal for this target, for a load/store of the specified type.
15216 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
15217                                               const AddrMode &AM, Type *Ty,
15218                                               unsigned AS, Instruction *I) const {
15219   // PPC does not allow r+i addressing modes for vectors!
15220   if (Ty->isVectorTy() && AM.BaseOffs != 0)
15221     return false;
15222 
15223   // PPC allows a sign-extended 16-bit immediate field.
15224   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15225     return false;
15226 
15227   // No global is ever allowed as a base.
15228   if (AM.BaseGV)
15229     return false;
15230 
15231   // PPC only support r+r,
15232   switch (AM.Scale) {
15233   case 0:  // "r+i" or just "i", depending on HasBaseReg.
15234     break;
15235   case 1:
15236     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
15237       return false;
15238     // Otherwise we have r+r or r+i.
15239     break;
15240   case 2:
15241     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
15242       return false;
15243     // Allow 2*r as r+r.
15244     break;
15245   default:
15246     // No other scales are supported.
15247     return false;
15248   }
15249 
15250   return true;
15251 }
15252 
15253 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15254                                            SelectionDAG &DAG) const {
15255   MachineFunction &MF = DAG.getMachineFunction();
15256   MachineFrameInfo &MFI = MF.getFrameInfo();
15257   MFI.setReturnAddressIsTaken(true);
15258 
15259   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
15260     return SDValue();
15261 
15262   SDLoc dl(Op);
15263   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15264 
15265   // Make sure the function does not optimize away the store of the RA to
15266   // the stack.
15267   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15268   FuncInfo->setLRStoreRequired();
15269   bool isPPC64 = Subtarget.isPPC64();
15270   auto PtrVT = getPointerTy(MF.getDataLayout());
15271 
15272   if (Depth > 0) {
15273     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15274     SDValue Offset =
15275         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15276                         isPPC64 ? MVT::i64 : MVT::i32);
15277     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15278                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15279                        MachinePointerInfo());
15280   }
15281 
15282   // Just load the return address off the stack.
15283   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15284   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15285                      MachinePointerInfo());
15286 }
15287 
15288 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15289                                           SelectionDAG &DAG) const {
15290   SDLoc dl(Op);
15291   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15292 
15293   MachineFunction &MF = DAG.getMachineFunction();
15294   MachineFrameInfo &MFI = MF.getFrameInfo();
15295   MFI.setFrameAddressIsTaken(true);
15296 
15297   EVT PtrVT = getPointerTy(MF.getDataLayout());
15298   bool isPPC64 = PtrVT == MVT::i64;
15299 
15300   // Naked functions never have a frame pointer, and so we use r1. For all
15301   // other functions, this decision must be delayed until during PEI.
15302   unsigned FrameReg;
15303   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15304     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15305   else
15306     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15307 
15308   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15309                                          PtrVT);
15310   while (Depth--)
15311     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15312                             FrameAddr, MachinePointerInfo());
15313   return FrameAddr;
15314 }
15315 
15316 // FIXME? Maybe this could be a TableGen attribute on some registers and
15317 // this table could be generated automatically from RegInfo.
15318 Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
15319                                               const MachineFunction &MF) const {
15320   bool isPPC64 = Subtarget.isPPC64();
15321 
15322   bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15323   if (!is64Bit && VT != LLT::scalar(32))
15324     report_fatal_error("Invalid register global variable type");
15325 
15326   Register Reg = StringSwitch<Register>(RegName)
15327                      .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15328                      .Case("r2", isPPC64 ? Register() : PPC::R2)
15329                      .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15330                      .Default(Register());
15331 
15332   if (Reg)
15333     return Reg;
15334   report_fatal_error("Invalid register name global variable");
15335 }
15336 
15337 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
15338   // 32-bit SVR4 ABI access everything as got-indirect.
15339   if (Subtarget.is32BitELFABI())
15340     return true;
15341 
15342   // AIX accesses everything indirectly through the TOC, which is similar to
15343   // the GOT.
15344   if (Subtarget.isAIXABI())
15345     return true;
15346 
15347   CodeModel::Model CModel = getTargetMachine().getCodeModel();
15348   // If it is small or large code model, module locals are accessed
15349   // indirectly by loading their address from .toc/.got.
15350   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15351     return true;
15352 
15353   // JumpTable and BlockAddress are accessed as got-indirect.
15354   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15355     return true;
15356 
15357   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15358     return Subtarget.isGVIndirectSymbol(G->getGlobal());
15359 
15360   return false;
15361 }
15362 
15363 bool
15364 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
15365   // The PowerPC target isn't yet aware of offsets.
15366   return false;
15367 }
15368 
15369 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
15370                                            const CallInst &I,
15371                                            MachineFunction &MF,
15372                                            unsigned Intrinsic) const {
15373   switch (Intrinsic) {
15374   case Intrinsic::ppc_qpx_qvlfd:
15375   case Intrinsic::ppc_qpx_qvlfs:
15376   case Intrinsic::ppc_qpx_qvlfcd:
15377   case Intrinsic::ppc_qpx_qvlfcs:
15378   case Intrinsic::ppc_qpx_qvlfiwa:
15379   case Intrinsic::ppc_qpx_qvlfiwz:
15380   case Intrinsic::ppc_altivec_lvx:
15381   case Intrinsic::ppc_altivec_lvxl:
15382   case Intrinsic::ppc_altivec_lvebx:
15383   case Intrinsic::ppc_altivec_lvehx:
15384   case Intrinsic::ppc_altivec_lvewx:
15385   case Intrinsic::ppc_vsx_lxvd2x:
15386   case Intrinsic::ppc_vsx_lxvw4x: {
15387     EVT VT;
15388     switch (Intrinsic) {
15389     case Intrinsic::ppc_altivec_lvebx:
15390       VT = MVT::i8;
15391       break;
15392     case Intrinsic::ppc_altivec_lvehx:
15393       VT = MVT::i16;
15394       break;
15395     case Intrinsic::ppc_altivec_lvewx:
15396       VT = MVT::i32;
15397       break;
15398     case Intrinsic::ppc_vsx_lxvd2x:
15399       VT = MVT::v2f64;
15400       break;
15401     case Intrinsic::ppc_qpx_qvlfd:
15402       VT = MVT::v4f64;
15403       break;
15404     case Intrinsic::ppc_qpx_qvlfs:
15405       VT = MVT::v4f32;
15406       break;
15407     case Intrinsic::ppc_qpx_qvlfcd:
15408       VT = MVT::v2f64;
15409       break;
15410     case Intrinsic::ppc_qpx_qvlfcs:
15411       VT = MVT::v2f32;
15412       break;
15413     default:
15414       VT = MVT::v4i32;
15415       break;
15416     }
15417 
15418     Info.opc = ISD::INTRINSIC_W_CHAIN;
15419     Info.memVT = VT;
15420     Info.ptrVal = I.getArgOperand(0);
15421     Info.offset = -VT.getStoreSize()+1;
15422     Info.size = 2*VT.getStoreSize()-1;
15423     Info.align = Align(1);
15424     Info.flags = MachineMemOperand::MOLoad;
15425     return true;
15426   }
15427   case Intrinsic::ppc_qpx_qvlfda:
15428   case Intrinsic::ppc_qpx_qvlfsa:
15429   case Intrinsic::ppc_qpx_qvlfcda:
15430   case Intrinsic::ppc_qpx_qvlfcsa:
15431   case Intrinsic::ppc_qpx_qvlfiwaa:
15432   case Intrinsic::ppc_qpx_qvlfiwza: {
15433     EVT VT;
15434     switch (Intrinsic) {
15435     case Intrinsic::ppc_qpx_qvlfda:
15436       VT = MVT::v4f64;
15437       break;
15438     case Intrinsic::ppc_qpx_qvlfsa:
15439       VT = MVT::v4f32;
15440       break;
15441     case Intrinsic::ppc_qpx_qvlfcda:
15442       VT = MVT::v2f64;
15443       break;
15444     case Intrinsic::ppc_qpx_qvlfcsa:
15445       VT = MVT::v2f32;
15446       break;
15447     default:
15448       VT = MVT::v4i32;
15449       break;
15450     }
15451 
15452     Info.opc = ISD::INTRINSIC_W_CHAIN;
15453     Info.memVT = VT;
15454     Info.ptrVal = I.getArgOperand(0);
15455     Info.offset = 0;
15456     Info.size = VT.getStoreSize();
15457     Info.align = Align(1);
15458     Info.flags = MachineMemOperand::MOLoad;
15459     return true;
15460   }
15461   case Intrinsic::ppc_qpx_qvstfd:
15462   case Intrinsic::ppc_qpx_qvstfs:
15463   case Intrinsic::ppc_qpx_qvstfcd:
15464   case Intrinsic::ppc_qpx_qvstfcs:
15465   case Intrinsic::ppc_qpx_qvstfiw:
15466   case Intrinsic::ppc_altivec_stvx:
15467   case Intrinsic::ppc_altivec_stvxl:
15468   case Intrinsic::ppc_altivec_stvebx:
15469   case Intrinsic::ppc_altivec_stvehx:
15470   case Intrinsic::ppc_altivec_stvewx:
15471   case Intrinsic::ppc_vsx_stxvd2x:
15472   case Intrinsic::ppc_vsx_stxvw4x: {
15473     EVT VT;
15474     switch (Intrinsic) {
15475     case Intrinsic::ppc_altivec_stvebx:
15476       VT = MVT::i8;
15477       break;
15478     case Intrinsic::ppc_altivec_stvehx:
15479       VT = MVT::i16;
15480       break;
15481     case Intrinsic::ppc_altivec_stvewx:
15482       VT = MVT::i32;
15483       break;
15484     case Intrinsic::ppc_vsx_stxvd2x:
15485       VT = MVT::v2f64;
15486       break;
15487     case Intrinsic::ppc_qpx_qvstfd:
15488       VT = MVT::v4f64;
15489       break;
15490     case Intrinsic::ppc_qpx_qvstfs:
15491       VT = MVT::v4f32;
15492       break;
15493     case Intrinsic::ppc_qpx_qvstfcd:
15494       VT = MVT::v2f64;
15495       break;
15496     case Intrinsic::ppc_qpx_qvstfcs:
15497       VT = MVT::v2f32;
15498       break;
15499     default:
15500       VT = MVT::v4i32;
15501       break;
15502     }
15503 
15504     Info.opc = ISD::INTRINSIC_VOID;
15505     Info.memVT = VT;
15506     Info.ptrVal = I.getArgOperand(1);
15507     Info.offset = -VT.getStoreSize()+1;
15508     Info.size = 2*VT.getStoreSize()-1;
15509     Info.align = Align(1);
15510     Info.flags = MachineMemOperand::MOStore;
15511     return true;
15512   }
15513   case Intrinsic::ppc_qpx_qvstfda:
15514   case Intrinsic::ppc_qpx_qvstfsa:
15515   case Intrinsic::ppc_qpx_qvstfcda:
15516   case Intrinsic::ppc_qpx_qvstfcsa:
15517   case Intrinsic::ppc_qpx_qvstfiwa: {
15518     EVT VT;
15519     switch (Intrinsic) {
15520     case Intrinsic::ppc_qpx_qvstfda:
15521       VT = MVT::v4f64;
15522       break;
15523     case Intrinsic::ppc_qpx_qvstfsa:
15524       VT = MVT::v4f32;
15525       break;
15526     case Intrinsic::ppc_qpx_qvstfcda:
15527       VT = MVT::v2f64;
15528       break;
15529     case Intrinsic::ppc_qpx_qvstfcsa:
15530       VT = MVT::v2f32;
15531       break;
15532     default:
15533       VT = MVT::v4i32;
15534       break;
15535     }
15536 
15537     Info.opc = ISD::INTRINSIC_VOID;
15538     Info.memVT = VT;
15539     Info.ptrVal = I.getArgOperand(1);
15540     Info.offset = 0;
15541     Info.size = VT.getStoreSize();
15542     Info.align = Align(1);
15543     Info.flags = MachineMemOperand::MOStore;
15544     return true;
15545   }
15546   default:
15547     break;
15548   }
15549 
15550   return false;
15551 }
15552 
15553 /// It returns EVT::Other if the type should be determined using generic
15554 /// target-independent logic.
15555 EVT PPCTargetLowering::getOptimalMemOpType(
15556     const MemOp &Op, const AttributeList &FuncAttributes) const {
15557   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
15558     // When expanding a memset, require at least two QPX instructions to cover
15559     // the cost of loading the value to be stored from the constant pool.
15560     if (Subtarget.hasQPX() && Op.size() >= 32 &&
15561         (Op.isMemcpy() || Op.size() >= 64) && Op.isAligned(Align(32)) &&
15562         !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
15563       return MVT::v4f64;
15564     }
15565 
15566     // We should use Altivec/VSX loads and stores when available. For unaligned
15567     // addresses, unaligned VSX loads are only fast starting with the P8.
15568     if (Subtarget.hasAltivec() && Op.size() >= 16 &&
15569         (Op.isAligned(Align(16)) ||
15570          ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
15571       return MVT::v4i32;
15572   }
15573 
15574   if (Subtarget.isPPC64()) {
15575     return MVT::i64;
15576   }
15577 
15578   return MVT::i32;
15579 }
15580 
15581 /// Returns true if it is beneficial to convert a load of a constant
15582 /// to just the constant itself.
15583 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
15584                                                           Type *Ty) const {
15585   assert(Ty->isIntegerTy());
15586 
15587   unsigned BitSize = Ty->getPrimitiveSizeInBits();
15588   return !(BitSize == 0 || BitSize > 64);
15589 }
15590 
15591 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
15592   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15593     return false;
15594   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15595   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15596   return NumBits1 == 64 && NumBits2 == 32;
15597 }
15598 
15599 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
15600   if (!VT1.isInteger() || !VT2.isInteger())
15601     return false;
15602   unsigned NumBits1 = VT1.getSizeInBits();
15603   unsigned NumBits2 = VT2.getSizeInBits();
15604   return NumBits1 == 64 && NumBits2 == 32;
15605 }
15606 
15607 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
15608   // Generally speaking, zexts are not free, but they are free when they can be
15609   // folded with other operations.
15610   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
15611     EVT MemVT = LD->getMemoryVT();
15612     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
15613          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
15614         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
15615          LD->getExtensionType() == ISD::ZEXTLOAD))
15616       return true;
15617   }
15618 
15619   // FIXME: Add other cases...
15620   //  - 32-bit shifts with a zext to i64
15621   //  - zext after ctlz, bswap, etc.
15622   //  - zext after and by a constant mask
15623 
15624   return TargetLowering::isZExtFree(Val, VT2);
15625 }
15626 
15627 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
15628   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
15629          "invalid fpext types");
15630   // Extending to float128 is not free.
15631   if (DestVT == MVT::f128)
15632     return false;
15633   return true;
15634 }
15635 
15636 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
15637   return isInt<16>(Imm) || isUInt<16>(Imm);
15638 }
15639 
15640 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
15641   return isInt<16>(Imm) || isUInt<16>(Imm);
15642 }
15643 
15644 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
15645                                                        unsigned,
15646                                                        unsigned,
15647                                                        MachineMemOperand::Flags,
15648                                                        bool *Fast) const {
15649   if (DisablePPCUnaligned)
15650     return false;
15651 
15652   // PowerPC supports unaligned memory access for simple non-vector types.
15653   // Although accessing unaligned addresses is not as efficient as accessing
15654   // aligned addresses, it is generally more efficient than manual expansion,
15655   // and generally only traps for software emulation when crossing page
15656   // boundaries.
15657 
15658   if (!VT.isSimple())
15659     return false;
15660 
15661   if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
15662     return false;
15663 
15664   if (VT.getSimpleVT().isVector()) {
15665     if (Subtarget.hasVSX()) {
15666       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
15667           VT != MVT::v4f32 && VT != MVT::v4i32)
15668         return false;
15669     } else {
15670       return false;
15671     }
15672   }
15673 
15674   if (VT == MVT::ppcf128)
15675     return false;
15676 
15677   if (Fast)
15678     *Fast = true;
15679 
15680   return true;
15681 }
15682 
15683 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
15684                                                    EVT VT) const {
15685   return isFMAFasterThanFMulAndFAdd(
15686       MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
15687 }
15688 
15689 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
15690                                                    Type *Ty) const {
15691   switch (Ty->getScalarType()->getTypeID()) {
15692   case Type::FloatTyID:
15693   case Type::DoubleTyID:
15694     return true;
15695   case Type::FP128TyID:
15696     return EnableQuadPrecision && Subtarget.hasP9Vector();
15697   default:
15698     return false;
15699   }
15700 }
15701 
15702 // Currently this is a copy from AArch64TargetLowering::isProfitableToHoist.
15703 // FIXME: add more patterns which are profitable to hoist.
15704 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
15705   if (I->getOpcode() != Instruction::FMul)
15706     return true;
15707 
15708   if (!I->hasOneUse())
15709     return true;
15710 
15711   Instruction *User = I->user_back();
15712   assert(User && "A single use instruction with no uses.");
15713 
15714   if (User->getOpcode() != Instruction::FSub &&
15715       User->getOpcode() != Instruction::FAdd)
15716     return true;
15717 
15718   const TargetOptions &Options = getTargetMachine().Options;
15719   const Function *F = I->getFunction();
15720   const DataLayout &DL = F->getParent()->getDataLayout();
15721   Type *Ty = User->getOperand(0)->getType();
15722 
15723   return !(
15724       isFMAFasterThanFMulAndFAdd(*F, Ty) &&
15725       isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
15726       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
15727 }
15728 
15729 const MCPhysReg *
15730 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
15731   // LR is a callee-save register, but we must treat it as clobbered by any call
15732   // site. Hence we include LR in the scratch registers, which are in turn added
15733   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
15734   // to CTR, which is used by any indirect call.
15735   static const MCPhysReg ScratchRegs[] = {
15736     PPC::X12, PPC::LR8, PPC::CTR8, 0
15737   };
15738 
15739   return ScratchRegs;
15740 }
15741 
15742 Register PPCTargetLowering::getExceptionPointerRegister(
15743     const Constant *PersonalityFn) const {
15744   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
15745 }
15746 
15747 Register PPCTargetLowering::getExceptionSelectorRegister(
15748     const Constant *PersonalityFn) const {
15749   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
15750 }
15751 
15752 bool
15753 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
15754                      EVT VT , unsigned DefinedValues) const {
15755   if (VT == MVT::v2i64)
15756     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
15757 
15758   if (Subtarget.hasVSX() || Subtarget.hasQPX())
15759     return true;
15760 
15761   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
15762 }
15763 
15764 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
15765   if (DisableILPPref || Subtarget.enableMachineScheduler())
15766     return TargetLowering::getSchedulingPreference(N);
15767 
15768   return Sched::ILP;
15769 }
15770 
15771 // Create a fast isel object.
15772 FastISel *
15773 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
15774                                   const TargetLibraryInfo *LibInfo) const {
15775   return PPC::createFastISel(FuncInfo, LibInfo);
15776 }
15777 
15778 // Override to enable LOAD_STACK_GUARD lowering on Linux.
15779 bool PPCTargetLowering::useLoadStackGuardNode() const {
15780   if (!Subtarget.isTargetLinux())
15781     return TargetLowering::useLoadStackGuardNode();
15782   return true;
15783 }
15784 
15785 // Override to disable global variable loading on Linux.
15786 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
15787   if (!Subtarget.isTargetLinux())
15788     return TargetLowering::insertSSPDeclarations(M);
15789 }
15790 
15791 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
15792                                      bool ForCodeSize) const {
15793   if (!VT.isSimple() || !Subtarget.hasVSX())
15794     return false;
15795 
15796   switch(VT.getSimpleVT().SimpleTy) {
15797   default:
15798     // For FP types that are currently not supported by PPC backend, return
15799     // false. Examples: f16, f80.
15800     return false;
15801   case MVT::f32:
15802   case MVT::f64:
15803   case MVT::ppcf128:
15804     return Imm.isPosZero();
15805   }
15806 }
15807 
15808 // For vector shift operation op, fold
15809 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
15810 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
15811                                   SelectionDAG &DAG) {
15812   SDValue N0 = N->getOperand(0);
15813   SDValue N1 = N->getOperand(1);
15814   EVT VT = N0.getValueType();
15815   unsigned OpSizeInBits = VT.getScalarSizeInBits();
15816   unsigned Opcode = N->getOpcode();
15817   unsigned TargetOpcode;
15818 
15819   switch (Opcode) {
15820   default:
15821     llvm_unreachable("Unexpected shift operation");
15822   case ISD::SHL:
15823     TargetOpcode = PPCISD::SHL;
15824     break;
15825   case ISD::SRL:
15826     TargetOpcode = PPCISD::SRL;
15827     break;
15828   case ISD::SRA:
15829     TargetOpcode = PPCISD::SRA;
15830     break;
15831   }
15832 
15833   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
15834       N1->getOpcode() == ISD::AND)
15835     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
15836       if (Mask->getZExtValue() == OpSizeInBits - 1)
15837         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
15838 
15839   return SDValue();
15840 }
15841 
15842 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
15843   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15844     return Value;
15845 
15846   SDValue N0 = N->getOperand(0);
15847   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15848   if (!Subtarget.isISA3_0() ||
15849       N0.getOpcode() != ISD::SIGN_EXTEND ||
15850       N0.getOperand(0).getValueType() != MVT::i32 ||
15851       CN1 == nullptr || N->getValueType(0) != MVT::i64)
15852     return SDValue();
15853 
15854   // We can't save an operation here if the value is already extended, and
15855   // the existing shift is easier to combine.
15856   SDValue ExtsSrc = N0.getOperand(0);
15857   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
15858       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
15859     return SDValue();
15860 
15861   SDLoc DL(N0);
15862   SDValue ShiftBy = SDValue(CN1, 0);
15863   // We want the shift amount to be i32 on the extswli, but the shift could
15864   // have an i64.
15865   if (ShiftBy.getValueType() == MVT::i64)
15866     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
15867 
15868   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
15869                          ShiftBy);
15870 }
15871 
15872 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
15873   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15874     return Value;
15875 
15876   return SDValue();
15877 }
15878 
15879 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
15880   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15881     return Value;
15882 
15883   return SDValue();
15884 }
15885 
15886 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
15887 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
15888 // When C is zero, the equation (addi Z, -C) can be simplified to Z
15889 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
15890 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
15891                                  const PPCSubtarget &Subtarget) {
15892   if (!Subtarget.isPPC64())
15893     return SDValue();
15894 
15895   SDValue LHS = N->getOperand(0);
15896   SDValue RHS = N->getOperand(1);
15897 
15898   auto isZextOfCompareWithConstant = [](SDValue Op) {
15899     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
15900         Op.getValueType() != MVT::i64)
15901       return false;
15902 
15903     SDValue Cmp = Op.getOperand(0);
15904     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
15905         Cmp.getOperand(0).getValueType() != MVT::i64)
15906       return false;
15907 
15908     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
15909       int64_t NegConstant = 0 - Constant->getSExtValue();
15910       // Due to the limitations of the addi instruction,
15911       // -C is required to be [-32768, 32767].
15912       return isInt<16>(NegConstant);
15913     }
15914 
15915     return false;
15916   };
15917 
15918   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
15919   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
15920 
15921   // If there is a pattern, canonicalize a zext operand to the RHS.
15922   if (LHSHasPattern && !RHSHasPattern)
15923     std::swap(LHS, RHS);
15924   else if (!LHSHasPattern && !RHSHasPattern)
15925     return SDValue();
15926 
15927   SDLoc DL(N);
15928   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
15929   SDValue Cmp = RHS.getOperand(0);
15930   SDValue Z = Cmp.getOperand(0);
15931   auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
15932 
15933   assert(Constant && "Constant Should not be a null pointer.");
15934   int64_t NegConstant = 0 - Constant->getSExtValue();
15935 
15936   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
15937   default: break;
15938   case ISD::SETNE: {
15939     //                                 when C == 0
15940     //                             --> addze X, (addic Z, -1).carry
15941     //                            /
15942     // add X, (zext(setne Z, C))--
15943     //                            \    when -32768 <= -C <= 32767 && C != 0
15944     //                             --> addze X, (addic (addi Z, -C), -1).carry
15945     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
15946                               DAG.getConstant(NegConstant, DL, MVT::i64));
15947     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
15948     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
15949                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
15950     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
15951                        SDValue(Addc.getNode(), 1));
15952     }
15953   case ISD::SETEQ: {
15954     //                                 when C == 0
15955     //                             --> addze X, (subfic Z, 0).carry
15956     //                            /
15957     // add X, (zext(sete  Z, C))--
15958     //                            \    when -32768 <= -C <= 32767 && C != 0
15959     //                             --> addze X, (subfic (addi Z, -C), 0).carry
15960     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
15961                               DAG.getConstant(NegConstant, DL, MVT::i64));
15962     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
15963     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
15964                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
15965     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
15966                        SDValue(Subc.getNode(), 1));
15967     }
15968   }
15969 
15970   return SDValue();
15971 }
15972 
15973 // Transform
15974 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
15975 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
15976 // In this case both C1 and C2 must be known constants.
15977 // C1+C2 must fit into a 34 bit signed integer.
15978 static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
15979                                           const PPCSubtarget &Subtarget) {
15980   if (!Subtarget.isUsingPCRelativeCalls())
15981     return SDValue();
15982 
15983   // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
15984   // If we find that node try to cast the Global Address and the Constant.
15985   SDValue LHS = N->getOperand(0);
15986   SDValue RHS = N->getOperand(1);
15987 
15988   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
15989     std::swap(LHS, RHS);
15990 
15991   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
15992     return SDValue();
15993 
15994   // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
15995   GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
15996   ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
15997 
15998   // Check that both casts succeeded.
15999   if (!GSDN || !ConstNode)
16000     return SDValue();
16001 
16002   int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16003   SDLoc DL(GSDN);
16004 
16005   // The signed int offset needs to fit in 34 bits.
16006   if (!isInt<34>(NewOffset))
16007     return SDValue();
16008 
16009   // The new global address is a copy of the old global address except
16010   // that it has the updated Offset.
16011   SDValue GA =
16012       DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16013                                  NewOffset, GSDN->getTargetFlags());
16014   SDValue MatPCRel =
16015       DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16016   return MatPCRel;
16017 }
16018 
16019 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16020   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16021     return Value;
16022 
16023   if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16024     return Value;
16025 
16026   return SDValue();
16027 }
16028 
16029 // Detect TRUNCATE operations on bitcasts of float128 values.
16030 // What we are looking for here is the situtation where we extract a subset
16031 // of bits from a 128 bit float.
16032 // This can be of two forms:
16033 // 1) BITCAST of f128 feeding TRUNCATE
16034 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16035 // The reason this is required is because we do not have a legal i128 type
16036 // and so we want to prevent having to store the f128 and then reload part
16037 // of it.
16038 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16039                                            DAGCombinerInfo &DCI) const {
16040   // If we are using CRBits then try that first.
16041   if (Subtarget.useCRBits()) {
16042     // Check if CRBits did anything and return that if it did.
16043     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16044       return CRTruncValue;
16045   }
16046 
16047   SDLoc dl(N);
16048   SDValue Op0 = N->getOperand(0);
16049 
16050   // Looking for a truncate of i128 to i64.
16051   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16052     return SDValue();
16053 
16054   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16055 
16056   // SRL feeding TRUNCATE.
16057   if (Op0.getOpcode() == ISD::SRL) {
16058     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16059     // The right shift has to be by 64 bits.
16060     if (!ConstNode || ConstNode->getZExtValue() != 64)
16061       return SDValue();
16062 
16063     // Switch the element number to extract.
16064     EltToExtract = EltToExtract ? 0 : 1;
16065     // Update Op0 past the SRL.
16066     Op0 = Op0.getOperand(0);
16067   }
16068 
16069   // BITCAST feeding a TRUNCATE possibly via SRL.
16070   if (Op0.getOpcode() == ISD::BITCAST &&
16071       Op0.getValueType() == MVT::i128 &&
16072       Op0.getOperand(0).getValueType() == MVT::f128) {
16073     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16074     return DCI.DAG.getNode(
16075         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16076         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16077   }
16078   return SDValue();
16079 }
16080 
16081 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16082   SelectionDAG &DAG = DCI.DAG;
16083 
16084   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16085   if (!ConstOpOrElement)
16086     return SDValue();
16087 
16088   // An imul is usually smaller than the alternative sequence for legal type.
16089   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16090       isOperationLegal(ISD::MUL, N->getValueType(0)))
16091     return SDValue();
16092 
16093   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16094     switch (this->Subtarget.getCPUDirective()) {
16095     default:
16096       // TODO: enhance the condition for subtarget before pwr8
16097       return false;
16098     case PPC::DIR_PWR8:
16099       //  type        mul     add    shl
16100       // scalar        4       1      1
16101       // vector        7       2      2
16102       return true;
16103     case PPC::DIR_PWR9:
16104     case PPC::DIR_PWR_FUTURE:
16105       //  type        mul     add    shl
16106       // scalar        5       2      2
16107       // vector        7       2      2
16108 
16109       // The cycle RATIO of related operations are showed as a table above.
16110       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16111       // scalar and vector type. For 2 instrs patterns, add/sub + shl
16112       // are 4, it is always profitable; but for 3 instrs patterns
16113       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16114       // So we should only do it for vector type.
16115       return IsAddOne && IsNeg ? VT.isVector() : true;
16116     }
16117   };
16118 
16119   EVT VT = N->getValueType(0);
16120   SDLoc DL(N);
16121 
16122   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16123   bool IsNeg = MulAmt.isNegative();
16124   APInt MulAmtAbs = MulAmt.abs();
16125 
16126   if ((MulAmtAbs - 1).isPowerOf2()) {
16127     // (mul x, 2^N + 1) => (add (shl x, N), x)
16128     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16129 
16130     if (!IsProfitable(IsNeg, true, VT))
16131       return SDValue();
16132 
16133     SDValue Op0 = N->getOperand(0);
16134     SDValue Op1 =
16135         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16136                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16137     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16138 
16139     if (!IsNeg)
16140       return Res;
16141 
16142     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16143   } else if ((MulAmtAbs + 1).isPowerOf2()) {
16144     // (mul x, 2^N - 1) => (sub (shl x, N), x)
16145     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16146 
16147     if (!IsProfitable(IsNeg, false, VT))
16148       return SDValue();
16149 
16150     SDValue Op0 = N->getOperand(0);
16151     SDValue Op1 =
16152         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16153                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16154 
16155     if (!IsNeg)
16156       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16157     else
16158       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16159 
16160   } else {
16161     return SDValue();
16162   }
16163 }
16164 
16165 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16166   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16167   if (!Subtarget.is64BitELFABI())
16168     return false;
16169 
16170   // If not a tail call then no need to proceed.
16171   if (!CI->isTailCall())
16172     return false;
16173 
16174   // If sibling calls have been disabled and tail-calls aren't guaranteed
16175   // there is no reason to duplicate.
16176   auto &TM = getTargetMachine();
16177   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16178     return false;
16179 
16180   // Can't tail call a function called indirectly, or if it has variadic args.
16181   const Function *Callee = CI->getCalledFunction();
16182   if (!Callee || Callee->isVarArg())
16183     return false;
16184 
16185   // Make sure the callee and caller calling conventions are eligible for tco.
16186   const Function *Caller = CI->getParent()->getParent();
16187   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16188                                            CI->getCallingConv()))
16189       return false;
16190 
16191   // If the function is local then we have a good chance at tail-calling it
16192   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16193 }
16194 
16195 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16196   if (!Subtarget.hasVSX())
16197     return false;
16198   if (Subtarget.hasP9Vector() && VT == MVT::f128)
16199     return true;
16200   return VT == MVT::f32 || VT == MVT::f64 ||
16201     VT == MVT::v4f32 || VT == MVT::v2f64;
16202 }
16203 
16204 bool PPCTargetLowering::
16205 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16206   const Value *Mask = AndI.getOperand(1);
16207   // If the mask is suitable for andi. or andis. we should sink the and.
16208   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16209     // Can't handle constants wider than 64-bits.
16210     if (CI->getBitWidth() > 64)
16211       return false;
16212     int64_t ConstVal = CI->getZExtValue();
16213     return isUInt<16>(ConstVal) ||
16214       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16215   }
16216 
16217   // For non-constant masks, we can always use the record-form and.
16218   return true;
16219 }
16220 
16221 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16222 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16223 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16224 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16225 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16226 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16227   assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
16228   assert(Subtarget.hasP9Altivec() &&
16229          "Only combine this when P9 altivec supported!");
16230   EVT VT = N->getValueType(0);
16231   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16232     return SDValue();
16233 
16234   SelectionDAG &DAG = DCI.DAG;
16235   SDLoc dl(N);
16236   if (N->getOperand(0).getOpcode() == ISD::SUB) {
16237     // Even for signed integers, if it's known to be positive (as signed
16238     // integer) due to zero-extended inputs.
16239     unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16240     unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16241     if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16242          SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16243         (SubOpcd1 == ISD::ZERO_EXTEND ||
16244          SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16245       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16246                          N->getOperand(0)->getOperand(0),
16247                          N->getOperand(0)->getOperand(1),
16248                          DAG.getTargetConstant(0, dl, MVT::i32));
16249     }
16250 
16251     // For type v4i32, it can be optimized with xvnegsp + vabsduw
16252     if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16253         N->getOperand(0).hasOneUse()) {
16254       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16255                          N->getOperand(0)->getOperand(0),
16256                          N->getOperand(0)->getOperand(1),
16257                          DAG.getTargetConstant(1, dl, MVT::i32));
16258     }
16259   }
16260 
16261   return SDValue();
16262 }
16263 
16264 // For type v4i32/v8ii16/v16i8, transform
16265 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16266 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16267 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16268 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16269 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16270                                           DAGCombinerInfo &DCI) const {
16271   assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
16272   assert(Subtarget.hasP9Altivec() &&
16273          "Only combine this when P9 altivec supported!");
16274 
16275   SelectionDAG &DAG = DCI.DAG;
16276   SDLoc dl(N);
16277   SDValue Cond = N->getOperand(0);
16278   SDValue TrueOpnd = N->getOperand(1);
16279   SDValue FalseOpnd = N->getOperand(2);
16280   EVT VT = N->getOperand(1).getValueType();
16281 
16282   if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16283       FalseOpnd.getOpcode() != ISD::SUB)
16284     return SDValue();
16285 
16286   // ABSD only available for type v4i32/v8i16/v16i8
16287   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16288     return SDValue();
16289 
16290   // At least to save one more dependent computation
16291   if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16292     return SDValue();
16293 
16294   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16295 
16296   // Can only handle unsigned comparison here
16297   switch (CC) {
16298   default:
16299     return SDValue();
16300   case ISD::SETUGT:
16301   case ISD::SETUGE:
16302     break;
16303   case ISD::SETULT:
16304   case ISD::SETULE:
16305     std::swap(TrueOpnd, FalseOpnd);
16306     break;
16307   }
16308 
16309   SDValue CmpOpnd1 = Cond.getOperand(0);
16310   SDValue CmpOpnd2 = Cond.getOperand(1);
16311 
16312   // SETCC CmpOpnd1 CmpOpnd2 cond
16313   // TrueOpnd = CmpOpnd1 - CmpOpnd2
16314   // FalseOpnd = CmpOpnd2 - CmpOpnd1
16315   if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16316       TrueOpnd.getOperand(1) == CmpOpnd2 &&
16317       FalseOpnd.getOperand(0) == CmpOpnd2 &&
16318       FalseOpnd.getOperand(1) == CmpOpnd1) {
16319     return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16320                        CmpOpnd1, CmpOpnd2,
16321                        DAG.getTargetConstant(0, dl, MVT::i32));
16322   }
16323 
16324   return SDValue();
16325 }
16326